aiqa-client 0.4.3__tar.gz → 0.4.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {aiqa_client-0.4.3/aiqa_client.egg-info → aiqa_client-0.4.7}/PKG-INFO +1 -1
  2. {aiqa_client-0.4.3 → aiqa_client-0.4.7}/aiqa/aiqa_exporter.py +192 -99
  3. {aiqa_client-0.4.3 → aiqa_client-0.4.7}/aiqa/client.py +13 -10
  4. aiqa_client-0.4.7/aiqa/constants.py +8 -0
  5. {aiqa_client-0.4.3 → aiqa_client-0.4.7}/aiqa/experiment_runner.py +12 -29
  6. aiqa_client-0.4.7/aiqa/http_utils.py +69 -0
  7. {aiqa_client-0.4.3 → aiqa_client-0.4.7}/aiqa/object_serialiser.py +136 -115
  8. {aiqa_client-0.4.3 → aiqa_client-0.4.7}/aiqa/tracing.py +113 -253
  9. aiqa_client-0.4.7/aiqa/tracing_llm_utils.py +191 -0
  10. {aiqa_client-0.4.3 → aiqa_client-0.4.7/aiqa_client.egg-info}/PKG-INFO +1 -1
  11. {aiqa_client-0.4.3 → aiqa_client-0.4.7}/aiqa_client.egg-info/SOURCES.txt +6 -4
  12. {aiqa_client-0.4.3 → aiqa_client-0.4.7}/pyproject.toml +1 -1
  13. aiqa_client-0.4.7/tests/test_object_serialiser.py +415 -0
  14. aiqa_client-0.4.3/aiqa/constants.py +0 -6
  15. aiqa_client-0.4.3/aiqa/test_experiment_runner.py +0 -176
  16. {aiqa_client-0.4.3 → aiqa_client-0.4.7}/LICENSE.txt +0 -0
  17. {aiqa_client-0.4.3 → aiqa_client-0.4.7}/MANIFEST.in +0 -0
  18. {aiqa_client-0.4.3 → aiqa_client-0.4.7}/README.md +0 -0
  19. {aiqa_client-0.4.3 → aiqa_client-0.4.7}/aiqa/__init__.py +0 -0
  20. {aiqa_client-0.4.3 → aiqa_client-0.4.7}/aiqa/py.typed +0 -0
  21. {aiqa_client-0.4.3 → aiqa_client-0.4.7}/aiqa_client.egg-info/dependency_links.txt +0 -0
  22. {aiqa_client-0.4.3 → aiqa_client-0.4.7}/aiqa_client.egg-info/requires.txt +0 -0
  23. {aiqa_client-0.4.3 → aiqa_client-0.4.7}/aiqa_client.egg-info/top_level.txt +0 -0
  24. {aiqa_client-0.4.3 → aiqa_client-0.4.7}/setup.cfg +0 -0
  25. {aiqa_client-0.4.3/aiqa → aiqa_client-0.4.7/tests}/test_startup_reliability.py +0 -0
  26. {aiqa_client-0.4.3/aiqa → aiqa_client-0.4.7/tests}/test_tracing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiqa-client
3
- Version: 0.4.3
3
+ Version: 0.4.7
4
4
  Summary: OpenTelemetry-based Python client for tracing functions and sending traces to the AIQA server
5
5
  Author-email: AIQA <info@aiqa.dev>
6
6
  License: MIT
@@ -14,9 +14,11 @@ from typing import List, Dict, Any, Optional
14
14
  from opentelemetry.sdk.trace import ReadableSpan
15
15
  from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
16
16
 
17
- from .constants import AIQA_TRACER_NAME, VERSION
17
+ from .constants import AIQA_TRACER_NAME, VERSION, LOG_TAG
18
+ from .http_utils import get_server_url, get_api_key, build_headers
19
+ from .object_serialiser import toNumber
18
20
 
19
- logger = logging.getLogger("AIQA")
21
+ logger = logging.getLogger(LOG_TAG)
20
22
 
21
23
 
22
24
  class AIQASpanExporter(SpanExporter):
@@ -31,7 +33,8 @@ class AIQASpanExporter(SpanExporter):
31
33
  api_key: Optional[str] = None,
32
34
  flush_interval_seconds: float = 5.0,
33
35
  max_batch_size_bytes: int = 5 * 1024 * 1024, # 5MB default
34
- max_buffer_spans: int = 10000, # Maximum spans to buffer (prevents unbounded growth)
36
+ max_buffer_spans: Optional[int] = None, # Maximum spans to buffer (prevents unbounded growth)
37
+ max_buffer_size_bytes: Optional[int] = None, # Maximum buffer size in bytes (prevents unbounded memory growth)
35
38
  startup_delay_seconds: Optional[float] = None,
36
39
  ):
37
40
  """
@@ -42,15 +45,27 @@ class AIQASpanExporter(SpanExporter):
42
45
  api_key: API key for authentication (defaults to AIQA_API_KEY env var)
43
46
  flush_interval_seconds: How often to flush spans to the server
44
47
  max_batch_size_bytes: Maximum size of a single batch in bytes (default: 5mb)
45
- max_buffer_spans: Maximum spans to buffer (prevents unbounded growth)
48
+ max_buffer_spans: Maximum spans to buffer (prevents unbounded growth).
49
+ Defaults to 10000, or AIQA_MAX_BUFFER_SPANS env var if set.
50
+ max_buffer_size_bytes: Maximum total buffer size in bytes (prevents unbounded memory growth).
51
+ Defaults to None (no limit), or AIQA_MAX_BUFFER_SIZE_BYTES env var if set.
46
52
  startup_delay_seconds: Delay before starting auto-flush (default: 10s, or AIQA_STARTUP_DELAY_SECONDS env var)
47
53
  """
48
- self._server_url = server_url
49
- self._api_key = api_key
54
+ self._server_url = get_server_url(server_url)
55
+ self._api_key = get_api_key(api_key)
50
56
  self.flush_interval_ms = flush_interval_seconds * 1000
51
57
  self.max_batch_size_bytes = max_batch_size_bytes
58
+
59
+ # Get max_buffer_spans from parameter, environment variable, or default
60
+ if not max_buffer_spans:
61
+ max_buffer_spans = toNumber(os.getenv("AIQA_MAX_BUFFER_SPANS")) or 10000
52
62
  self.max_buffer_spans = max_buffer_spans
53
63
 
64
+ # Get max_buffer_size_bytes from parameter, environment variable, or default
65
+ if not max_buffer_size_bytes:
66
+ max_buffer_size_bytes = toNumber(os.getenv("AIQA_MAX_BUFFER_SIZE_BYTES")) or toNumber("100m")
67
+ self.max_buffer_size_bytes = max_buffer_size_bytes
68
+
54
69
  # Get startup delay from parameter or environment variable (default: 10s)
55
70
  if startup_delay_seconds is None:
56
71
  env_delay = os.getenv("AIQA_STARTUP_DELAY_SECONDS")
@@ -66,6 +81,11 @@ class AIQASpanExporter(SpanExporter):
66
81
 
67
82
  self.buffer: List[Dict[str, Any]] = []
68
83
  self.buffer_span_keys: set = set() # Track (traceId, spanId) tuples to prevent duplicates (Python 3.8 compatible)
84
+ self.buffer_size_bytes: int = 0 # Track total size of buffered spans in bytes
85
+ # Cache span sizes to avoid recalculation (maps span_key -> size_bytes)
86
+ # Limited to max_buffer_spans * 2 to prevent unbounded growth
87
+ self._span_size_cache: Dict[tuple, int] = {}
88
+ self._max_cache_size = self.max_buffer_spans * 2 # Allow cache to be 2x buffer size
69
89
  self.buffer_lock = threading.Lock()
70
90
  self.flush_lock = threading.Lock()
71
91
  # shutdown_requested is only set once (in shutdown()) and read many times
@@ -75,27 +95,19 @@ class AIQASpanExporter(SpanExporter):
75
95
  self._auto_flush_started = False
76
96
  self._auto_flush_lock = threading.Lock() # Lock for lazy thread creation
77
97
 
78
- logger.info(
79
- f"Initializing AIQASpanExporter: server_url={self.server_url or 'not set'}, "
98
+ logger.info(f"Initializing AIQASpanExporter: server_url={self._server_url or 'not set'}, "
80
99
  f"flush_interval={flush_interval_seconds}s, startup_delay={startup_delay_seconds}s"
81
100
  )
82
101
  # Don't start thread immediately - start lazily on first export to avoid startup issues
83
102
 
84
- @property
85
- def server_url(self) -> str:
86
- return self._server_url or os.getenv("AIQA_SERVER_URL", "").rstrip("/")
87
-
88
- @property
89
- def api_key(self) -> str:
90
- return self._api_key or os.getenv("AIQA_API_KEY", "")
91
-
92
103
  def export(self, spans: List[ReadableSpan]) -> SpanExportResult:
93
104
  """
94
105
  Export spans to the AIQA server. Adds spans to buffer for async flushing.
95
106
  Deduplicates spans based on (traceId, spanId) to prevent repeated exports.
107
+ Actual send is done by flush -> _send_spans, or shutdown -> _send_spans_sync
96
108
  """
97
109
  if not spans:
98
- logger.debug("export() called with empty spans list")
110
+ logger.debug(f"export: called with empty spans list")
99
111
  return SpanExportResult.SUCCESS
100
112
 
101
113
  # Check if AIQA tracing is enabled
@@ -103,13 +115,13 @@ class AIQASpanExporter(SpanExporter):
103
115
  from .client import get_aiqa_client
104
116
  client = get_aiqa_client()
105
117
  if not client.enabled:
106
- logger.debug(f"AIQA export() skipped: tracing is disabled")
118
+ logger.debug(f"AIQA export: skipped: tracing is disabled")
107
119
  return SpanExportResult.SUCCESS
108
120
  except Exception:
109
121
  # If we can't check enabled status, proceed (fail open)
110
122
  pass
111
123
 
112
- logger.debug(f"AIQA export() called with {len(spans)} spans")
124
+ logger.debug(f"AIQA export() to buffer called with {len(spans)} spans")
113
125
 
114
126
  # Lazy initialization: start auto-flush thread on first export
115
127
  # This avoids thread creation during initialization, which can cause issues in ECS deployments
@@ -118,40 +130,74 @@ class AIQASpanExporter(SpanExporter):
118
130
  # Serialize and add to buffer, deduplicating by (traceId, spanId)
119
131
  with self.buffer_lock:
120
132
  serialized_spans = []
133
+ serialized_sizes = [] # Track sizes of serialized spans
121
134
  duplicates_count = 0
122
135
  dropped_count = 0
136
+ dropped_memory_count = 0
137
+ flush_in_progress = self.flush_lock.locked()
138
+
123
139
  for span in spans:
124
- # Check if buffer is full (prevent unbounded growth)
140
+ # Check if buffer is full by span count (prevent unbounded growth)
125
141
  if len(self.buffer) >= self.max_buffer_spans:
126
- dropped_count += 1
127
- continue
142
+ if flush_in_progress:
143
+ # Flush in progress, drop this span
144
+ dropped_count += 1
145
+ continue
146
+ # Flush not in progress, will trigger flush after adding spans
147
+ # Continue processing remaining spans to add them before flush
128
148
 
129
149
  serialized = self._serialize_span(span)
130
150
  span_key = (serialized["traceId"], serialized["spanId"])
131
151
  if span_key not in self.buffer_span_keys:
152
+ # Estimate size of this span when serialized (cache for later use)
153
+ span_size = self._get_span_size(span_key, serialized)
154
+
155
+ # Check if buffer is full by memory size (prevent unbounded memory growth)
156
+ if self.max_buffer_size_bytes is not None and self.buffer_size_bytes + span_size > self.max_buffer_size_bytes:
157
+ if flush_in_progress:
158
+ # Flush in progress, drop this span
159
+ # Don't cache size for dropped spans to prevent memory leak
160
+ dropped_memory_count += 1
161
+ continue
162
+ # Flush not in progress, will trigger flush after adding spans
163
+ # Continue processing remaining spans to add them before flush
164
+
132
165
  serialized_spans.append(serialized)
166
+ serialized_sizes.append(span_size)
133
167
  self.buffer_span_keys.add(span_key)
134
168
  else:
135
169
  duplicates_count += 1
136
- logger.debug(f"export() skipping duplicate span: traceId={serialized['traceId']}, spanId={serialized['spanId']}")
170
+ logger.debug(f"export: skipping duplicate span: traceId={serialized['traceId']}, spanId={serialized['spanId']}")
137
171
 
172
+ # Add spans and update buffer size
138
173
  self.buffer.extend(serialized_spans)
174
+ self.buffer_size_bytes += sum(serialized_sizes)
139
175
  buffer_size = len(self.buffer)
140
176
 
177
+ # Check if thresholds are reached after adding spans
178
+ threshold_reached = self._check_thresholds_reached()
179
+
141
180
  if dropped_count > 0:
142
- logger.warning(
143
- f"WARNING: Buffer full ({buffer_size} spans), dropped {dropped_count} span(s). "
181
+ logger.warning(f"WARNING: Buffer full ({buffer_size} spans), dropped {dropped_count} span(s) (flush in progress). "
144
182
  f"Consider increasing max_buffer_spans or fixing server connectivity."
145
183
  )
184
+ if dropped_memory_count > 0:
185
+ logger.warning(f"WARNING: Buffer memory limit reached ({self.buffer_size_bytes} bytes / {self.max_buffer_size_bytes} bytes), "
186
+ f"dropped {dropped_memory_count} span(s) (flush in progress). "
187
+ f"Consider increasing AIQA_MAX_BUFFER_SIZE_BYTES or fixing server connectivity."
188
+ )
189
+
190
+ # Trigger immediate flush if threshold reached and flush not in progress
191
+ if threshold_reached and not flush_in_progress:
192
+ logger.info(f"Buffer threshold reached ({buffer_size} spans, {self.buffer_size_bytes} bytes), triggering immediate flush")
193
+ self._trigger_immediate_flush()
146
194
 
147
195
  if duplicates_count > 0:
148
- logger.debug(
149
- f"export() added {len(serialized_spans)} span(s) to buffer, skipped {duplicates_count} duplicate(s). "
196
+ logger.debug(f"export() added {len(serialized_spans)} span(s) to buffer, skipped {duplicates_count} duplicate(s). "
150
197
  f"Total buffered: {buffer_size}"
151
198
  )
152
199
  else:
153
- logger.debug(
154
- f"export() added {len(spans)} span(s) to buffer. "
200
+ logger.debug(f"export() added {len(spans)} span(s) to buffer. "
155
201
  f"Total buffered: {buffer_size}"
156
202
  )
157
203
 
@@ -233,18 +279,35 @@ class AIQASpanExporter(SpanExporter):
233
279
  nanos = int(nanoseconds % 1_000_000_000)
234
280
  return (seconds, nanos)
235
281
 
282
+ def _get_span_size(self, span_key: tuple, serialized: Dict[str, Any]) -> int:
283
+ """
284
+ Get span size from cache or calculate and cache it.
285
+ Thread-safe when called within buffer_lock.
286
+ Limits cache size to prevent unbounded memory growth.
287
+ """
288
+ if span_key in self._span_size_cache:
289
+ return self._span_size_cache[span_key]
290
+ span_json = json.dumps(serialized)
291
+ span_size = len(span_json.encode('utf-8'))
292
+ # Only cache if we have valid keys and cache isn't too large
293
+ if span_key[0] and span_key[1] and len(self._span_size_cache) < self._max_cache_size:
294
+ self._span_size_cache[span_key] = span_size
295
+ return span_size
296
+
297
+ def _check_thresholds_reached(self) -> bool:
298
+ """Check if buffer thresholds are reached. Must be called within buffer_lock."""
299
+ if len(self.buffer) >= self.max_buffer_spans:
300
+ return True
301
+ if self.max_buffer_size_bytes is not None and self.buffer_size_bytes >= self.max_buffer_size_bytes:
302
+ return True
303
+ return False
304
+
236
305
  def _build_request_headers(self) -> Dict[str, str]:
237
306
  """Build HTTP headers for span requests."""
238
- headers = {"Content-Type": "application/json"}
239
- if self.api_key:
240
- headers["Authorization"] = f"ApiKey {self.api_key}"
241
- return headers
307
+ return build_headers(self._api_key)
242
308
 
243
309
  def _get_span_url(self) -> str:
244
- """Get the URL for sending spans."""
245
- if not self.server_url:
246
- raise ValueError("AIQA_SERVER_URL is not set. Cannot send spans to server.")
247
- return f"{self.server_url}/span"
310
+ return f"{self._server_url}/span"
248
311
 
249
312
  def _is_interpreter_shutdown_error(self, error: Exception) -> bool:
250
313
  """Check if error is due to interpreter shutdown."""
@@ -263,36 +326,49 @@ class AIQASpanExporter(SpanExporter):
263
326
  are added between extraction and clearing.
264
327
  Note: Does NOT clear buffer_span_keys - that should be done after successful send
265
328
  to avoid unnecessary clearing/rebuilding on failures.
329
+ Also resets buffer_size_bytes to 0.
266
330
  """
267
331
  with self.buffer_lock:
268
332
  spans = self.buffer[:]
269
333
  self.buffer.clear()
334
+ self.buffer_size_bytes = 0
270
335
  return spans
271
336
 
272
337
  def _remove_span_keys_from_tracking(self, spans: List[Dict[str, Any]]) -> None:
273
338
  """
274
- Remove span keys from tracking set (thread-safe). Called after successful send.
339
+ Remove span keys from tracking set and size cache (thread-safe). Called after successful send.
275
340
  """
276
341
  with self.buffer_lock:
277
342
  for span in spans:
278
343
  span_key = (span["traceId"], span["spanId"])
279
344
  self.buffer_span_keys.discard(span_key)
345
+ # Also remove from size cache to free memory
346
+ self._span_size_cache.pop(span_key, None)
280
347
 
281
348
  def _prepend_spans_to_buffer(self, spans: List[Dict[str, Any]]) -> None:
282
349
  """
283
350
  Prepend spans back to buffer (thread-safe). Used to restore spans
284
- if sending fails. Rebuilds the span keys tracking set.
351
+ if sending fails. Rebuilds the span keys tracking set and buffer size.
352
+ Uses cached sizes when available to avoid re-serialization.
285
353
  """
286
354
  with self.buffer_lock:
287
355
  self.buffer[:0] = spans
288
356
  # Rebuild span keys set from current buffer contents
289
357
  self.buffer_span_keys = {(span["traceId"], span["spanId"]) for span in self.buffer}
358
+ # Recalculate buffer size using cache when available
359
+ total_size = 0
360
+ for span in self.buffer:
361
+ span_key = (span.get("traceId"), span.get("spanId"))
362
+ total_size += self._get_span_size(span_key, span)
363
+ self.buffer_size_bytes = total_size
290
364
 
291
365
  def _clear_buffer(self) -> None:
292
366
  """Clear the buffer (thread-safe)."""
293
367
  with self.buffer_lock:
294
368
  self.buffer.clear()
295
369
  self.buffer_span_keys.clear()
370
+ self.buffer_size_bytes = 0
371
+ self._span_size_cache.clear()
296
372
 
297
373
  def _split_into_batches(self, spans: List[Dict[str, Any]]) -> List[List[Dict[str, Any]]]:
298
374
  """
@@ -308,9 +384,9 @@ class AIQASpanExporter(SpanExporter):
308
384
  current_batch_size = 0
309
385
 
310
386
  for span in spans:
311
- # Estimate size of this span when serialized
312
- span_json = json.dumps(span)
313
- span_size = len(span_json.encode('utf-8'))
387
+ # Get size from cache if available, otherwise calculate it
388
+ span_key = (span.get("traceId"), span.get("spanId"))
389
+ span_size = self._get_span_size(span_key, span)
314
390
 
315
391
  # Check if this single span exceeds the limit
316
392
  if span_size > self.max_batch_size_bytes:
@@ -323,8 +399,7 @@ class AIQASpanExporter(SpanExporter):
323
399
  # Log warning about oversized span
324
400
  span_name = span.get('name', 'unknown')
325
401
  span_trace_id = span.get('traceId', 'unknown')
326
- logger.warning(
327
- f"Span '{span_name}' (traceId={span_trace_id}) exceeds max_batch_size_bytes "
402
+ logger.warning(f"Span \'{span_name}' (traceId={span_trace_id}) exceeds max_batch_size_bytes "
328
403
  f"({span_size} bytes > {self.max_batch_size_bytes} bytes). "
329
404
  f"Will attempt to send it anyway - may fail if server/nginx limit is exceeded."
330
405
  )
@@ -354,22 +429,21 @@ class AIQASpanExporter(SpanExporter):
354
429
 
355
430
  Lock ordering: flush_lock -> buffer_lock (must be consistent to avoid deadlocks)
356
431
  """
357
- logger.debug("flush() called - attempting to acquire flush lock")
432
+ logger.debug(f"flush: called - attempting to acquire flush lock")
358
433
  with self.flush_lock:
359
- logger.debug("flush() acquired flush lock")
434
+ logger.debug(f"flush() acquired flush lock")
360
435
  # Atomically extract and remove spans to prevent race conditions
361
436
  # where export() adds spans between extraction and clearing
362
437
  spans_to_flush = self._extract_and_remove_spans_from_buffer()
363
- logger.debug(f"flush() extracted {len(spans_to_flush)} span(s) from buffer")
438
+ logger.debug(f"flush: extracted {len(spans_to_flush)} span(s) from buffer")
364
439
 
365
440
  if not spans_to_flush:
366
- logger.debug("flush() completed: no spans to flush")
441
+ logger.debug(f"flush() completed: no spans to flush")
367
442
  return
368
443
 
369
444
  # Skip sending if server URL is not configured
370
- if not self.server_url:
371
- logger.warning(
372
- f"Skipping flush: AIQA_SERVER_URL is not set. {len(spans_to_flush)} span(s) will not be sent."
445
+ if not self._server_url:
446
+ logger.warning(f"Skipping flush: AIQA_SERVER_URL is not set. {len(spans_to_flush)} span(s) will not be sent."
373
447
  )
374
448
  # Spans already removed from buffer, clear their keys to free memory
375
449
  self._remove_span_keys_from_tracking(spans_to_flush)
@@ -377,7 +451,7 @@ class AIQASpanExporter(SpanExporter):
377
451
 
378
452
  # Release flush_lock before I/O to avoid blocking other flush attempts
379
453
  # Spans are already extracted, so concurrent exports won't interfere
380
- logger.info(f"flush() sending {len(spans_to_flush)} span(s) to server")
454
+ logger.info(f"flush: sending {len(spans_to_flush)} span(s) to server")
381
455
  try:
382
456
  await self._send_spans(spans_to_flush)
383
457
  logger.info(f"flush() successfully sent {len(spans_to_flush)} span(s) to server")
@@ -387,7 +461,7 @@ class AIQASpanExporter(SpanExporter):
387
461
  except RuntimeError as error:
388
462
  if self._is_interpreter_shutdown_error(error):
389
463
  if self.shutdown_requested:
390
- logger.debug(f"flush() skipped due to interpreter shutdown: {error}")
464
+ logger.debug(f"flush: skipped due to interpreter shutdown: {error}")
391
465
  else:
392
466
  logger.warning(f"flush() interrupted by interpreter shutdown: {error}")
393
467
  # Put spans back for retry with sync send during shutdown
@@ -423,10 +497,32 @@ class AIQASpanExporter(SpanExporter):
423
497
  # Don't raise - allow spans to be buffered even if auto-flush fails
424
498
  # They can still be flushed manually or on shutdown
425
499
 
500
+ def _trigger_immediate_flush(self) -> None:
501
+ """
502
+ Trigger an immediate flush in a background thread.
503
+ This is called when buffer thresholds are reached and no flush is in progress.
504
+ """
505
+ def flush_in_thread():
506
+ """Run flush in a new thread with its own event loop."""
507
+ try:
508
+ loop = asyncio.new_event_loop()
509
+ asyncio.set_event_loop(loop)
510
+ try:
511
+ loop.run_until_complete(self.flush())
512
+ finally:
513
+ if not loop.is_closed():
514
+ loop.close()
515
+ except Exception as e:
516
+ logger.error(f"Error in immediate flush thread: {e}", exc_info=True)
517
+
518
+ # Start flush in background thread (daemon so it doesn't block shutdown)
519
+ flush_thread = threading.Thread(target=flush_in_thread, daemon=True, name="AIQA-ImmediateFlush")
520
+ flush_thread.start()
521
+
426
522
  def _flush_worker(self) -> None:
427
523
  """Worker function for auto-flush thread. Runs in a separate thread with its own event loop."""
428
524
  import asyncio
429
- logger.debug("Auto-flush worker thread started")
525
+ logger.debug(f"Auto-flush worker thread started")
430
526
 
431
527
  # Wait for startup delay before beginning flush operations
432
528
  # This gives the container/application time to stabilize, which helps avoid startup issues (seen with AWS ECS, Dec 2025).
@@ -441,10 +537,10 @@ class AIQASpanExporter(SpanExporter):
441
537
  remaining_delay -= sleep_time
442
538
 
443
539
  if self.shutdown_requested:
444
- logger.debug("Auto-flush startup delay interrupted by shutdown")
540
+ logger.debug(f"Auto-flush startup delay interrupted by shutdown")
445
541
  return
446
542
 
447
- logger.info("Auto-flush startup delay complete, beginning flush operations")
543
+ logger.info(f"Auto-flush startup delay complete, beginning flush operations")
448
544
 
449
545
  # Create event loop in this thread (isolated from main thread's event loop)
450
546
  # This prevents interference with the main application's event loop
@@ -475,24 +571,23 @@ class AIQASpanExporter(SpanExporter):
475
571
  logger.info(f"Auto-flush worker thread stopping (shutdown requested). Completed {cycle_count} cycles.")
476
572
  # Don't do final flush here - shutdown() will handle it with synchronous send
477
573
  # This avoids event loop shutdown issues
478
- logger.debug("Auto-flush thread skipping final flush (will be handled by shutdown() with sync send)")
574
+ logger.debug(f"Auto-flush thread skipping final flush (will be handled by shutdown() with sync send)")
479
575
  finally:
480
576
  # Always close the event loop, even if an exception occurs
481
577
  try:
482
578
  if not loop.is_closed():
483
579
  loop.close()
484
- logger.debug("Auto-flush worker thread event loop closed")
580
+ logger.debug(f"Auto-flush worker thread event loop closed")
485
581
  except Exception:
486
582
  pass # Ignore errors during cleanup
487
583
 
488
584
  def _start_auto_flush(self) -> None:
489
585
  """Start the auto-flush timer with startup delay."""
490
586
  if self.shutdown_requested:
491
- logger.warning("_start_auto_flush() called but shutdown already requested")
587
+ logger.warning(f"_start_auto_flush() called but shutdown already requested")
492
588
  return
493
589
 
494
- logger.info(
495
- f"Starting auto-flush thread with interval {self.flush_interval_ms / 1000.0}s, "
590
+ logger.info(f"Starting auto-flush thread with interval {self.flush_interval_ms / 1000.0}s, "
496
591
  f"startup delay {self.startup_delay_seconds}s"
497
592
  )
498
593
 
@@ -508,15 +603,15 @@ class AIQASpanExporter(SpanExporter):
508
603
  # Split into batches if needed
509
604
  batches = self._split_into_batches(spans)
510
605
  if len(batches) > 1:
511
- logger.info(f"_send_spans() splitting {len(spans)} spans into {len(batches)} batches")
606
+ logger.info(f"_send_spans: splitting {len(spans)} spans into {len(batches)} batches")
512
607
 
513
608
  url = self._get_span_url()
514
609
  headers = self._build_request_headers()
515
610
 
516
- if self.api_key:
517
- logger.debug("_send_spans() using API key authentication")
518
- else:
519
- logger.debug("_send_spans() no API key provided")
611
+ if not self._api_key: # This should not happen
612
+ logger.error(f"_send_spans: fail - no API key provided. {len(spans)} spans lost.")
613
+ # Spans were already removed from buffer before calling this method. They will now get forgotten
614
+ return
520
615
 
521
616
  # Use timeout to prevent hanging on unreachable servers
522
617
  timeout = aiohttp.ClientTimeout(total=30.0, connect=10.0)
@@ -524,41 +619,41 @@ class AIQASpanExporter(SpanExporter):
524
619
  async with aiohttp.ClientSession(timeout=timeout) as session:
525
620
  for batch_idx, batch in enumerate(batches):
526
621
  try:
527
- logger.debug(f"_send_spans() sending batch {batch_idx + 1}/{len(batches)} with {len(batch)} spans to {url}")
622
+ logger.debug(f"_send_spans: sending batch {batch_idx + 1}/{len(batches)} with {len(batch)} spans to {url}")
528
623
  # Pre-serialize JSON to bytes and wrap in BytesIO to avoid blocking event loop
529
624
  json_bytes = json.dumps(batch).encode('utf-8')
530
625
  data = io.BytesIO(json_bytes)
531
626
 
532
627
  async with session.post(url, data=data, headers=headers) as response:
533
- logger.debug(f"_send_spans() batch {batch_idx + 1} received response: status={response.status}")
628
+ logger.debug(f"_send_spans: batch {batch_idx + 1} received response: status={response.status}")
534
629
  if not response.ok:
535
630
  error_text = await response.text()
536
631
  error_msg = f"Failed to send batch {batch_idx + 1}/{len(batches)}: {response.status} {response.reason} - {error_text[:200]}"
537
- logger.error(f"_send_spans() {error_msg}")
632
+ logger.error(f"_send_spans: {error_msg}")
538
633
  errors.append((batch_idx + 1, error_msg))
539
634
  # Continue with other batches even if one fails
540
635
  continue
541
- logger.debug(f"_send_spans() batch {batch_idx + 1} successfully sent {len(batch)} spans")
636
+ logger.debug(f"_send_spans: batch {batch_idx + 1} successfully sent {len(batch)} spans")
542
637
  except (aiohttp.ClientError, asyncio.TimeoutError) as e:
543
638
  # Network errors and timeouts - log but don't fail completely
544
639
  error_msg = f"Network error in batch {batch_idx + 1}: {type(e).__name__}: {e}"
545
- logger.warning(f"_send_spans() {error_msg} - will retry on next flush")
640
+ logger.warning(f"_send_spans: {error_msg} - will retry on next flush")
546
641
  errors.append((batch_idx + 1, error_msg))
547
642
  # Continue with other batches
548
643
  except RuntimeError as e:
549
644
  if self._is_interpreter_shutdown_error(e):
550
645
  if self.shutdown_requested:
551
- logger.debug(f"_send_spans() skipped due to interpreter shutdown: {e}")
646
+ logger.debug(f"_send_spans: skipped due to interpreter shutdown: {e}")
552
647
  else:
553
- logger.warning(f"_send_spans() interrupted by interpreter shutdown: {e}")
648
+ logger.warning(f"_send_spans: interrupted by interpreter shutdown: {e}")
554
649
  raise
555
650
  error_msg = f"RuntimeError in batch {batch_idx + 1}: {type(e).__name__}: {e}"
556
- logger.error(f"_send_spans() {error_msg}")
651
+ logger.error(f"_send_spans: {error_msg}")
557
652
  errors.append((batch_idx + 1, error_msg))
558
653
  # Continue with other batches
559
654
  except Exception as e:
560
655
  error_msg = f"Exception in batch {batch_idx + 1}: {type(e).__name__}: {e}"
561
- logger.error(f"_send_spans() {error_msg}")
656
+ logger.error(f"_send_spans: {error_msg}")
562
657
  errors.append((batch_idx + 1, error_msg))
563
658
  # Continue with other batches
564
659
 
@@ -568,7 +663,7 @@ class AIQASpanExporter(SpanExporter):
568
663
  error_summary = "; ".join([f"batch {idx}: {msg}" for idx, msg in errors])
569
664
  raise Exception(f"Failed to send some spans: {error_summary}")
570
665
 
571
- logger.debug(f"_send_spans() successfully sent all {len(spans)} spans in {len(batches)} batch(es)")
666
+ logger.debug(f"_send_spans: successfully sent all {len(spans)} spans in {len(batches)} batch(es)")
572
667
 
573
668
  def _send_spans_sync(self, spans: List[Dict[str, Any]]) -> None:
574
669
  """Send spans to the server API (synchronous, for shutdown scenarios). Batches large payloads automatically."""
@@ -582,10 +677,9 @@ class AIQASpanExporter(SpanExporter):
582
677
  url = self._get_span_url()
583
678
  headers = self._build_request_headers()
584
679
 
585
- if self.api_key:
586
- logger.debug("_send_spans_sync() using API key authentication")
587
- else:
588
- logger.debug("_send_spans_sync() no API key provided")
680
+ if not self._api_key:
681
+ logger.error(f"_send_spans_sync() fail - no API key provided")
682
+ return
589
683
 
590
684
  errors = []
591
685
  for batch_idx, batch in enumerate(batches):
@@ -616,64 +710,63 @@ class AIQASpanExporter(SpanExporter):
616
710
 
617
711
  def shutdown(self) -> None:
618
712
  """Shutdown the exporter, flushing any remaining spans. Call before process exit."""
619
- logger.info("shutdown() called - initiating exporter shutdown")
713
+ logger.info(f"shutdown: called - initiating exporter shutdown")
620
714
  self.shutdown_requested = True
621
715
 
622
716
  # Check buffer state before shutdown
623
717
  with self.buffer_lock:
624
718
  buffer_size = len(self.buffer)
625
- logger.info(f"shutdown() buffer contains {buffer_size} span(s) before shutdown")
719
+ logger.info(f"shutdown: buffer contains {buffer_size} span(s) before shutdown")
626
720
 
627
721
  # Wait for flush thread to finish (it will do final flush)
628
722
  # Only wait if thread was actually started
629
723
  if self._auto_flush_started and self.flush_timer and self.flush_timer.is_alive():
630
- logger.info("shutdown() waiting for auto-flush thread to complete (timeout=10s)")
724
+ logger.info(f"shutdown: waiting for auto-flush thread to complete (timeout=10s)")
631
725
  self.flush_timer.join(timeout=10.0)
632
726
  if self.flush_timer.is_alive():
633
- logger.warning("shutdown() auto-flush thread did not complete within timeout")
727
+ logger.warning(f"shutdown: auto-flush thread did not complete within timeout")
634
728
  else:
635
- logger.info("shutdown() auto-flush thread completed")
729
+ logger.info(f"shutdown: auto-flush thread completed")
636
730
  else:
637
- logger.debug("shutdown() no active auto-flush thread to wait for")
731
+ logger.debug(f"shutdown: no active auto-flush thread to wait for")
638
732
 
639
733
  # Final flush attempt (use synchronous send to avoid event loop issues)
640
734
  with self.flush_lock:
641
- logger.debug("shutdown() performing final flush with synchronous send")
735
+ logger.debug(f"shutdown: performing final flush with synchronous send")
642
736
  # Atomically extract and remove spans to prevent race conditions
643
737
  spans_to_flush = self._extract_and_remove_spans_from_buffer()
644
- logger.debug(f"shutdown() extracted {len(spans_to_flush)} span(s) from buffer for final flush")
738
+ logger.debug(f"shutdown: extracted {len(spans_to_flush)} span(s) from buffer for final flush")
645
739
 
646
740
  if spans_to_flush:
647
- if not self.server_url:
648
- logger.warning(
649
- f"shutdown() skipping final flush: AIQA_SERVER_URL is not set. "
741
+ if not self._server_url:
742
+ logger.warning(f"shutdown: skipping final flush: AIQA_SERVER_URL is not set. "
650
743
  f"{len(spans_to_flush)} span(s) will not be sent."
651
744
  )
652
745
  # Spans already removed from buffer, clear their keys to free memory
653
746
  self._remove_span_keys_from_tracking(spans_to_flush)
654
747
  else:
655
- logger.info(f"shutdown() sending {len(spans_to_flush)} span(s) to server (synchronous)")
748
+ logger.info(f"shutdown: sending {len(spans_to_flush)} span(s) to server (synchronous)")
656
749
  try:
657
750
  self._send_spans_sync(spans_to_flush)
658
- logger.info(f"shutdown() successfully sent {len(spans_to_flush)} span(s) to server")
751
+ logger.info(f"shutdown: successfully sent {len(spans_to_flush)} span(s) to server")
659
752
  # Spans already removed from buffer during extraction
660
753
  # Clear their keys from tracking set to free memory
661
754
  self._remove_span_keys_from_tracking(spans_to_flush)
662
755
  except Exception as e:
663
- logger.error(f"shutdown() failed to send spans: {e}")
756
+ logger.error(f"shutdown: failed to send spans: {e}")
664
757
  # Spans already removed, but process is exiting anyway
665
- logger.warning(f"shutdown() {len(spans_to_flush)} span(s) were not sent due to error")
758
+ logger.warning(f"shutdown: {len(spans_to_flush)} span(s) were not sent due to error")
666
759
  # Keys will remain in tracking set, but process is exiting so memory will be freed
667
760
  else:
668
- logger.debug("shutdown() no spans to flush")
761
+ logger.debug(f"shutdown: no spans to flush")
669
762
 
670
763
  # Check buffer state after shutdown
671
764
  with self.buffer_lock:
672
765
  buffer_size = len(self.buffer)
673
766
  if buffer_size > 0:
674
- logger.warning(f"shutdown() buffer still contains {buffer_size} span(s) after shutdown")
767
+ logger.warning(f"shutdown: buffer still contains {buffer_size} span(s) after shutdown")
675
768
  else:
676
- logger.info("shutdown() buffer is empty after shutdown")
769
+ logger.info(f"shutdown: buffer is empty after shutdown")
677
770
 
678
- logger.info("shutdown() completed")
771
+ logger.info(f"shutdown: completed")
679
772