aiqa-client 0.2.1__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aiqa/__init__.py CHANGED
@@ -1,5 +1,22 @@
1
1
  """
2
2
  Python client for AIQA server - OpenTelemetry tracing decorators.
3
+
4
+ IMPORTANT: Before using any AIQA functionality, you must call get_aiqa_client() to initialize
5
+ the client and load environment variables (AIQA_SERVER_URL, AIQA_API_KEY, AIQA_COMPONENT_TAG, etc.).
6
+
7
+ Example:
8
+ from dotenv import load_dotenv
9
+ from aiqa import get_aiqa_client, WithTracing
10
+
11
+ # Load environment variables from .env file (if using one)
12
+ load_dotenv()
13
+
14
+ # Initialize client (must be called before using WithTracing or other functions)
15
+ get_aiqa_client()
16
+
17
+ @WithTracing
18
+ def my_function():
19
+ return "Hello, AIQA!"
3
20
  """
4
21
 
5
22
  from .tracing import (
@@ -11,10 +28,19 @@ from .tracing import (
11
28
  get_active_span,
12
29
  get_provider,
13
30
  get_exporter,
31
+ get_trace_id,
32
+ get_span_id,
33
+ create_span_from_trace_id,
34
+ inject_trace_context,
35
+ extract_trace_context,
36
+ set_conversation_id,
37
+ set_component_tag,
38
+ get_span,
14
39
  )
15
- from .client import get_client
40
+ from .client import get_aiqa_client
41
+ from .experiment_runner import ExperimentRunner
16
42
 
17
- __version__ = "0.2.1"
43
+ __version__ = "0.3.4"
18
44
 
19
45
  __all__ = [
20
46
  "WithTracing",
@@ -25,7 +51,16 @@ __all__ = [
25
51
  "get_active_span",
26
52
  "get_provider",
27
53
  "get_exporter",
28
- "get_client",
54
+ "get_aiqa_client",
55
+ "ExperimentRunner",
56
+ "get_trace_id",
57
+ "get_span_id",
58
+ "create_span_from_trace_id",
59
+ "inject_trace_context",
60
+ "extract_trace_context",
61
+ "set_conversation_id",
62
+ "set_component_tag",
63
+ "get_span",
29
64
  "__version__",
30
65
  ]
31
66
 
aiqa/aiqa_exporter.py CHANGED
@@ -8,11 +8,12 @@ import json
8
8
  import logging
9
9
  import threading
10
10
  import time
11
+ import io
11
12
  from typing import List, Dict, Any, Optional
12
13
  from opentelemetry.sdk.trace import ReadableSpan
13
14
  from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
14
15
 
15
- logger = logging.getLogger(__name__)
16
+ logger = logging.getLogger("AIQA")
16
17
 
17
18
 
18
19
  class AIQASpanExporter(SpanExporter):
@@ -26,6 +27,7 @@ class AIQASpanExporter(SpanExporter):
26
27
  server_url: Optional[str] = None,
27
28
  api_key: Optional[str] = None,
28
29
  flush_interval_seconds: float = 5.0,
30
+ max_batch_size_bytes: int = 5 * 1024 * 1024, # 5MB default
29
31
  ):
30
32
  """
31
33
  Initialize the AIQA span exporter.
@@ -34,11 +36,14 @@ class AIQASpanExporter(SpanExporter):
34
36
  server_url: URL of the AIQA server (defaults to AIQA_SERVER_URL env var)
35
37
  api_key: API key for authentication (defaults to AIQA_API_KEY env var)
36
38
  flush_interval_seconds: How often to flush spans to the server
39
+ max_batch_size_bytes: Maximum size of a single batch in bytes (default: 5mb)
37
40
  """
38
41
  self._server_url = server_url
39
42
  self._api_key = api_key
40
43
  self.flush_interval_ms = flush_interval_seconds * 1000
44
+ self.max_batch_size_bytes = max_batch_size_bytes
41
45
  self.buffer: List[Dict[str, Any]] = []
46
+ self.buffer_span_keys: set = set() # Track (traceId, spanId) tuples to prevent duplicates (Python 3.8 compatible)
42
47
  self.buffer_lock = threading.Lock()
43
48
  self.flush_lock = threading.Lock()
44
49
  self.shutdown_requested = False
@@ -61,21 +66,39 @@ class AIQASpanExporter(SpanExporter):
61
66
  def export(self, spans: List[ReadableSpan]) -> SpanExportResult:
62
67
  """
63
68
  Export spans to the AIQA server. Adds spans to buffer for async flushing.
69
+ Deduplicates spans based on (traceId, spanId) to prevent repeated exports.
64
70
  """
65
71
  if not spans:
66
72
  logger.debug("export() called with empty spans list")
67
73
  return SpanExportResult.SUCCESS
68
74
  logger.debug(f"AIQA export() called with {len(spans)} spans")
69
- # Serialize and add to buffer
75
+ # Serialize and add to buffer, deduplicating by (traceId, spanId)
70
76
  with self.buffer_lock:
71
- serialized_spans = [self._serialize_span(span) for span in spans]
77
+ serialized_spans = []
78
+ duplicates_count = 0
79
+ for span in spans:
80
+ serialized = self._serialize_span(span)
81
+ span_key = (serialized["traceId"], serialized["spanId"])
82
+ if span_key not in self.buffer_span_keys:
83
+ serialized_spans.append(serialized)
84
+ self.buffer_span_keys.add(span_key)
85
+ else:
86
+ duplicates_count += 1
87
+ logger.debug(f"export() skipping duplicate span: traceId={serialized['traceId']}, spanId={serialized['spanId']}")
88
+
72
89
  self.buffer.extend(serialized_spans)
73
90
  buffer_size = len(self.buffer)
74
91
 
75
- logger.debug(
76
- f"export() added {len(spans)} span(s) to buffer. "
77
- f"Total buffered: {buffer_size}"
78
- )
92
+ if duplicates_count > 0:
93
+ logger.debug(
94
+ f"export() added {len(serialized_spans)} span(s) to buffer, skipped {duplicates_count} duplicate(s). "
95
+ f"Total buffered: {buffer_size}"
96
+ )
97
+ else:
98
+ logger.debug(
99
+ f"export() added {len(spans)} span(s) to buffer. "
100
+ f"Total buffered: {buffer_size}"
101
+ )
79
102
 
80
103
  return SpanExportResult.SUCCESS
81
104
 
@@ -138,8 +161,8 @@ class AIQASpanExporter(SpanExporter):
138
161
  "duration": self._time_to_tuple(span.end_time - span.start_time) if span.end_time else None,
139
162
  "ended": span.end_time is not None,
140
163
  "instrumentationLibrary": {
141
- "name": span.instrumentation_info.name if hasattr(span, "instrumentation_info") else "",
142
- "version": span.instrumentation_info.version if hasattr(span, "instrumentation_info") else None,
164
+ "name": self._get_instrumentation_name(),
165
+ "version": self._get_instrumentation_version(),
143
166
  },
144
167
  }
145
168
 
@@ -148,6 +171,19 @@ class AIQASpanExporter(SpanExporter):
148
171
  seconds = int(nanoseconds // 1_000_000_000)
149
172
  nanos = int(nanoseconds % 1_000_000_000)
150
173
  return (seconds, nanos)
174
+
175
+ def _get_instrumentation_name(self) -> str:
176
+ """Get instrumentation library name - always 'aiqa-tracer'."""
177
+ from .client import AIQA_TRACER_NAME
178
+ return AIQA_TRACER_NAME
179
+
180
+ def _get_instrumentation_version(self) -> Optional[str]:
181
+ """Get instrumentation library version from __version__."""
182
+ try:
183
+ from . import __version__
184
+ return __version__
185
+ except (ImportError, AttributeError):
186
+ return None
151
187
 
152
188
  def _build_request_headers(self) -> Dict[str, str]:
153
189
  """Build HTTP headers for span requests."""
@@ -177,24 +213,91 @@ class AIQASpanExporter(SpanExporter):
177
213
  Atomically extract and remove all spans from buffer (thread-safe).
178
214
  Returns the extracted spans. This prevents race conditions where spans
179
215
  are added between extraction and clearing.
216
+ Note: Does NOT clear buffer_span_keys - that should be done after successful send
217
+ to avoid unnecessary clearing/rebuilding on failures.
180
218
  """
181
219
  with self.buffer_lock:
182
220
  spans = self.buffer[:]
183
221
  self.buffer.clear()
184
222
  return spans
223
+
224
+ def _remove_span_keys_from_tracking(self, spans: List[Dict[str, Any]]) -> None:
225
+ """
226
+ Remove span keys from tracking set (thread-safe). Called after successful send.
227
+ """
228
+ with self.buffer_lock:
229
+ for span in spans:
230
+ span_key = (span["traceId"], span["spanId"])
231
+ self.buffer_span_keys.discard(span_key)
185
232
 
186
233
  def _prepend_spans_to_buffer(self, spans: List[Dict[str, Any]]) -> None:
187
234
  """
188
235
  Prepend spans back to buffer (thread-safe). Used to restore spans
189
- if sending fails.
236
+ if sending fails. Rebuilds the span keys tracking set.
190
237
  """
191
238
  with self.buffer_lock:
192
239
  self.buffer[:0] = spans
240
+ # Rebuild span keys set from current buffer contents
241
+ self.buffer_span_keys = {(span["traceId"], span["spanId"]) for span in self.buffer}
193
242
 
194
243
  def _clear_buffer(self) -> None:
195
244
  """Clear the buffer (thread-safe)."""
196
245
  with self.buffer_lock:
197
246
  self.buffer.clear()
247
+ self.buffer_span_keys.clear()
248
+
249
+ def _split_into_batches(self, spans: List[Dict[str, Any]]) -> List[List[Dict[str, Any]]]:
250
+ """
251
+ Split spans into batches based on max_batch_size_bytes.
252
+ Each batch will be as large as possible without exceeding the limit.
253
+ If a single span exceeds the limit, it will be sent in its own batch with a warning.
254
+ """
255
+ if not spans:
256
+ return []
257
+
258
+ batches = []
259
+ current_batch = []
260
+ current_batch_size = 0
261
+
262
+ for span in spans:
263
+ # Estimate size of this span when serialized
264
+ span_json = json.dumps(span)
265
+ span_size = len(span_json.encode('utf-8'))
266
+
267
+ # Check if this single span exceeds the limit
268
+ if span_size > self.max_batch_size_bytes:
269
+ # If we have a current batch, save it first
270
+ if current_batch:
271
+ batches.append(current_batch)
272
+ current_batch = []
273
+ current_batch_size = 0
274
+
275
+ # Log warning about oversized span
276
+ span_name = span.get('name', 'unknown')
277
+ span_trace_id = span.get('traceId', 'unknown')
278
+ logger.warning(
279
+ f"Span '{span_name}' (traceId={span_trace_id}) exceeds max_batch_size_bytes "
280
+ f"({span_size} bytes > {self.max_batch_size_bytes} bytes). "
281
+ f"Will attempt to send it anyway - may fail if server/nginx limit is exceeded."
282
+ )
283
+ # Still create a batch with just this span - we'll try to send it
284
+ batches.append([span])
285
+ continue
286
+
287
+ # If adding this span would exceed the limit, start a new batch
288
+ if current_batch and current_batch_size + span_size > self.max_batch_size_bytes:
289
+ batches.append(current_batch)
290
+ current_batch = []
291
+ current_batch_size = 0
292
+
293
+ current_batch.append(span)
294
+ current_batch_size += span_size
295
+
296
+ # Add the last batch if it has any spans
297
+ if current_batch:
298
+ batches.append(current_batch)
299
+
300
+ return batches
198
301
 
199
302
  async def flush(self) -> None:
200
303
  """
@@ -218,7 +321,8 @@ class AIQASpanExporter(SpanExporter):
218
321
  logger.warning(
219
322
  f"Skipping flush: AIQA_SERVER_URL is not set. {len(spans_to_flush)} span(s) will not be sent."
220
323
  )
221
- # Spans already removed from buffer, nothing to clear
324
+ # Spans already removed from buffer, clear their keys to free memory
325
+ self._remove_span_keys_from_tracking(spans_to_flush)
222
326
  return
223
327
 
224
328
  logger.info(f"flush() sending {len(spans_to_flush)} span(s) to server")
@@ -226,6 +330,8 @@ class AIQASpanExporter(SpanExporter):
226
330
  await self._send_spans(spans_to_flush)
227
331
  logger.info(f"flush() successfully sent {len(spans_to_flush)} span(s) to server")
228
332
  # Spans already removed from buffer during extraction
333
+ # Now clear their keys from tracking set to free memory
334
+ self._remove_span_keys_from_tracking(spans_to_flush)
229
335
  except RuntimeError as error:
230
336
  if self._is_interpreter_shutdown_error(error):
231
337
  if self.shutdown_requested:
@@ -237,12 +343,12 @@ class AIQASpanExporter(SpanExporter):
237
343
  # Put spans back for retry
238
344
  self._prepend_spans_to_buffer(spans_to_flush)
239
345
  raise
240
- logger.error(f"Error flushing spans to server: {error}", exc_info=True)
346
+ logger.error(f"Error flushing spans to server: {error}")
241
347
  # Put spans back for retry
242
348
  self._prepend_spans_to_buffer(spans_to_flush)
243
349
  raise
244
350
  except Exception as error:
245
- logger.error(f"Error flushing spans to server: {error}", exc_info=True)
351
+ logger.error(f"Error flushing spans to server: {error}")
246
352
  # Put spans back for retry
247
353
  self._prepend_spans_to_buffer(spans_to_flush)
248
354
  if self.shutdown_requested:
@@ -271,7 +377,7 @@ class AIQASpanExporter(SpanExporter):
271
377
  logger.debug(f"Auto-flush cycle #{cycle_count} completed, sleeping {self.flush_interval_ms / 1000.0}s")
272
378
  time.sleep(self.flush_interval_ms / 1000.0)
273
379
  except Exception as e:
274
- logger.error(f"Error in auto-flush cycle #{cycle_count}: {e}", exc_info=True)
380
+ logger.error(f"Error in auto-flush cycle #{cycle_count}: {e}")
275
381
  logger.debug(f"Auto-flush cycle #{cycle_count} error handled, sleeping {self.flush_interval_ms / 1000.0}s")
276
382
  time.sleep(self.flush_interval_ms / 1000.0)
277
383
 
@@ -295,73 +401,108 @@ class AIQASpanExporter(SpanExporter):
295
401
  logger.info(f"Auto-flush thread started: {flush_thread.name} (daemon={flush_thread.daemon})")
296
402
 
297
403
  async def _send_spans(self, spans: List[Dict[str, Any]]) -> None:
298
- """Send spans to the server API (async)."""
404
+ """Send spans to the server API (async). Batches large payloads automatically."""
299
405
  import aiohttp
300
406
 
407
+ # Split into batches if needed
408
+ batches = self._split_into_batches(spans)
409
+ if len(batches) > 1:
410
+ logger.info(f"_send_spans() splitting {len(spans)} spans into {len(batches)} batches")
411
+
301
412
  url = self._get_span_url()
302
413
  headers = self._build_request_headers()
303
- logger.debug(f"_send_spans() sending {len(spans)} spans to {url}")
414
+
304
415
  if self.api_key:
305
416
  logger.debug("_send_spans() using API key authentication")
306
417
  else:
307
418
  logger.debug("_send_spans() no API key provided")
308
419
 
309
- try:
310
- async with aiohttp.ClientSession() as session:
311
- logger.debug(f"_send_spans() POST request starting to {url}")
312
- async with session.post(url, json=spans, headers=headers) as response:
313
- logger.debug(f"_send_spans() received response: status={response.status}")
314
- if not response.ok:
315
- error_text = await response.text()
316
- logger.error(
317
- f"_send_spans() failed: status={response.status}, "
318
- f"reason={response.reason}, error={error_text[:200]}"
319
- )
320
- raise Exception(
321
- f"Failed to send spans: {response.status} {response.reason} - {error_text}"
322
- )
323
- logger.debug(f"_send_spans() successfully sent {len(spans)} spans")
324
- except RuntimeError as e:
325
- if self._is_interpreter_shutdown_error(e):
326
- if self.shutdown_requested:
327
- logger.debug(f"_send_spans() skipped due to interpreter shutdown: {e}")
328
- else:
329
- logger.warning(f"_send_spans() interrupted by interpreter shutdown: {e}")
330
- raise
331
- logger.error(f"_send_spans() RuntimeError: {type(e).__name__}: {e}", exc_info=True)
332
- raise
333
- except Exception as e:
334
- logger.error(f"_send_spans() exception: {type(e).__name__}: {e}", exc_info=True)
335
- raise
420
+ errors = []
421
+ async with aiohttp.ClientSession() as session:
422
+ for batch_idx, batch in enumerate(batches):
423
+ try:
424
+ logger.debug(f"_send_spans() sending batch {batch_idx + 1}/{len(batches)} with {len(batch)} spans to {url}")
425
+ # Pre-serialize JSON to bytes and wrap in BytesIO to avoid blocking event loop
426
+ json_bytes = json.dumps(batch).encode('utf-8')
427
+ data = io.BytesIO(json_bytes)
428
+
429
+ async with session.post(url, data=data, headers=headers) as response:
430
+ logger.debug(f"_send_spans() batch {batch_idx + 1} received response: status={response.status}")
431
+ if not response.ok:
432
+ error_text = await response.text()
433
+ error_msg = f"Failed to send batch {batch_idx + 1}/{len(batches)}: {response.status} {response.reason} - {error_text[:200]}"
434
+ logger.error(f"_send_spans() {error_msg}")
435
+ errors.append((batch_idx + 1, error_msg))
436
+ # Continue with other batches even if one fails
437
+ continue
438
+ logger.debug(f"_send_spans() batch {batch_idx + 1} successfully sent {len(batch)} spans")
439
+ except RuntimeError as e:
440
+ if self._is_interpreter_shutdown_error(e):
441
+ if self.shutdown_requested:
442
+ logger.debug(f"_send_spans() skipped due to interpreter shutdown: {e}")
443
+ else:
444
+ logger.warning(f"_send_spans() interrupted by interpreter shutdown: {e}")
445
+ raise
446
+ error_msg = f"RuntimeError in batch {batch_idx + 1}: {type(e).__name__}: {e}"
447
+ logger.error(f"_send_spans() {error_msg}")
448
+ errors.append((batch_idx + 1, error_msg))
449
+ # Continue with other batches
450
+ except Exception as e:
451
+ error_msg = f"Exception in batch {batch_idx + 1}: {type(e).__name__}: {e}"
452
+ logger.error(f"_send_spans() {error_msg}")
453
+ errors.append((batch_idx + 1, error_msg))
454
+ # Continue with other batches
455
+
456
+ # If any batches failed, raise an exception with details
457
+ if errors:
458
+ error_summary = "; ".join([f"batch {idx}: {msg}" for idx, msg in errors])
459
+ raise Exception(f"Failed to send some spans: {error_summary}")
460
+
461
+ logger.debug(f"_send_spans() successfully sent all {len(spans)} spans in {len(batches)} batch(es)")
336
462
 
337
463
  def _send_spans_sync(self, spans: List[Dict[str, Any]]) -> None:
338
- """Send spans to the server API (synchronous, for shutdown scenarios)."""
464
+ """Send spans to the server API (synchronous, for shutdown scenarios). Batches large payloads automatically."""
339
465
  import requests
340
466
 
467
+ # Split into batches if needed
468
+ batches = self._split_into_batches(spans)
469
+ if len(batches) > 1:
470
+ logger.info(f"_send_spans_sync() splitting {len(spans)} spans into {len(batches)} batches")
471
+
341
472
  url = self._get_span_url()
342
473
  headers = self._build_request_headers()
343
- logger.debug(f"_send_spans_sync() sending {len(spans)} spans to {url}")
474
+
344
475
  if self.api_key:
345
476
  logger.debug("_send_spans_sync() using API key authentication")
346
477
  else:
347
478
  logger.debug("_send_spans_sync() no API key provided")
348
479
 
349
- try:
350
- response = requests.post(url, json=spans, headers=headers, timeout=10.0)
351
- logger.debug(f"_send_spans_sync() received response: status={response.status_code}")
352
- if not response.ok:
353
- error_text = response.text[:200] if response.text else ""
354
- logger.error(
355
- f"_send_spans_sync() failed: status={response.status_code}, "
356
- f"reason={response.reason}, error={error_text}"
357
- )
358
- raise Exception(
359
- f"Failed to send spans: {response.status_code} {response.reason} - {error_text}"
360
- )
361
- logger.debug(f"_send_spans_sync() successfully sent {len(spans)} spans")
362
- except Exception as e:
363
- logger.error(f"_send_spans_sync() exception: {type(e).__name__}: {e}", exc_info=True)
364
- raise
480
+ errors = []
481
+ for batch_idx, batch in enumerate(batches):
482
+ try:
483
+ logger.debug(f"_send_spans_sync() sending batch {batch_idx + 1}/{len(batches)} with {len(batch)} spans to {url}")
484
+ response = requests.post(url, json=batch, headers=headers, timeout=10.0)
485
+ logger.debug(f"_send_spans_sync() batch {batch_idx + 1} received response: status={response.status_code}")
486
+ if not response.ok:
487
+ error_text = response.text[:200] if response.text else ""
488
+ error_msg = f"Failed to send batch {batch_idx + 1}/{len(batches)}: {response.status_code} {response.reason} - {error_text}"
489
+ logger.error(f"_send_spans_sync() {error_msg}")
490
+ errors.append((batch_idx + 1, error_msg))
491
+ # Continue with other batches even if one fails
492
+ continue
493
+ logger.debug(f"_send_spans_sync() batch {batch_idx + 1} successfully sent {len(batch)} spans")
494
+ except Exception as e:
495
+ error_msg = f"Exception in batch {batch_idx + 1}: {type(e).__name__}: {e}"
496
+ logger.error(f"_send_spans_sync() {error_msg}")
497
+ errors.append((batch_idx + 1, error_msg))
498
+ # Continue with other batches
499
+
500
+ # If any batches failed, raise an exception with details
501
+ if errors:
502
+ error_summary = "; ".join([f"batch {idx}: {msg}" for idx, msg in errors])
503
+ raise Exception(f"Failed to send some spans: {error_summary}")
504
+
505
+ logger.debug(f"_send_spans_sync() successfully sent all {len(spans)} spans in {len(batches)} batch(es)")
365
506
 
366
507
  def shutdown(self) -> None:
367
508
  """Shutdown the exporter, flushing any remaining spans. Call before process exit."""
@@ -397,17 +538,21 @@ class AIQASpanExporter(SpanExporter):
397
538
  f"shutdown() skipping final flush: AIQA_SERVER_URL is not set. "
398
539
  f"{len(spans_to_flush)} span(s) will not be sent."
399
540
  )
400
- # Spans already removed from buffer
541
+ # Spans already removed from buffer, clear their keys to free memory
542
+ self._remove_span_keys_from_tracking(spans_to_flush)
401
543
  else:
402
544
  logger.info(f"shutdown() sending {len(spans_to_flush)} span(s) to server (synchronous)")
403
545
  try:
404
546
  self._send_spans_sync(spans_to_flush)
405
547
  logger.info(f"shutdown() successfully sent {len(spans_to_flush)} span(s) to server")
406
548
  # Spans already removed from buffer during extraction
549
+ # Clear their keys from tracking set to free memory
550
+ self._remove_span_keys_from_tracking(spans_to_flush)
407
551
  except Exception as e:
408
- logger.error(f"shutdown() failed to send spans: {e}", exc_info=True)
552
+ logger.error(f"shutdown() failed to send spans: {e}")
409
553
  # Spans already removed, but process is exiting anyway
410
554
  logger.warning(f"shutdown() {len(spans_to_flush)} span(s) were not sent due to error")
555
+ # Keys will remain in tracking set, but process is exiting so memory will be freed
411
556
  else:
412
557
  logger.debug("shutdown() no spans to flush")
413
558