PyPI - aiqa-client - Versions diffs - 0.2.1__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

aiqa-client 0.2.1py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

aiqa/__init__.py +38 -3
aiqa/aiqa_exporter.py +208 -63
aiqa/client.py +146 -25
aiqa/experiment_runner.py +336 -0
aiqa/object_serialiser.py +396 -0
aiqa/test_experiment_runner.py +176 -0
aiqa/test_tracing.py +230 -0
aiqa/tracing.py +785 -72
{aiqa_client-0.2.1.dist-info → aiqa_client-0.3.4.dist-info}/METADATA +94 -4
aiqa_client-0.3.4.dist-info/RECORD +14 -0
aiqa_client-0.2.1.dist-info/RECORD +0 -10
{aiqa_client-0.2.1.dist-info → aiqa_client-0.3.4.dist-info}/WHEEL +0 -0
{aiqa_client-0.2.1.dist-info → aiqa_client-0.3.4.dist-info}/licenses/LICENSE +0 -0
{aiqa_client-0.2.1.dist-info → aiqa_client-0.3.4.dist-info}/top_level.txt +0 -0

aiqa/__init__.py CHANGED Viewed

@@ -1,5 +1,22 @@
 """
 Python client for AIQA server - OpenTelemetry tracing decorators.
+IMPORTANT: Before using any AIQA functionality, you must call get_aiqa_client() to initialize
+the client and load environment variables (AIQA_SERVER_URL, AIQA_API_KEY, AIQA_COMPONENT_TAG, etc.).
+Example:
+    from dotenv import load_dotenv
+    from aiqa import get_aiqa_client, WithTracing
+    # Load environment variables from .env file (if using one)
+    load_dotenv()
+    # Initialize client (must be called before using WithTracing or other functions)
+    get_aiqa_client()
+    @WithTracing
+    def my_function():
+        return "Hello, AIQA!"
 """
 from .tracing import (
@@ -11,10 +28,19 @@ from .tracing import (
     get_active_span,
     get_provider,
     get_exporter,
+    get_trace_id,
+    get_span_id,
+    create_span_from_trace_id,
+    inject_trace_context,
+    extract_trace_context,
+    set_conversation_id,
+    set_component_tag,
+    get_span,
 )
-from .client import get_client
+from .client import get_aiqa_client
+from .experiment_runner import ExperimentRunner
-__version__ = "0.2.1"
+__version__ = "0.3.4"
 __all__ = [
     "WithTracing",
@@ -25,7 +51,16 @@ __all__ = [
     "get_active_span",
     "get_provider",
     "get_exporter",
-    "get_client",
+    "get_aiqa_client",
+    "ExperimentRunner",
+    "get_trace_id",
+    "get_span_id",
+    "create_span_from_trace_id",
+    "inject_trace_context",
+    "extract_trace_context",
+    "set_conversation_id",
+    "set_component_tag",
+    "get_span",
     "__version__",
 ]

aiqa/aiqa_exporter.py CHANGED Viewed

@@ -8,11 +8,12 @@ import json
 import logging
 import threading
 import time
+import io
 from typing import List, Dict, Any, Optional
 from opentelemetry.sdk.trace import ReadableSpan
 from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
-logger = logging.getLogger(__name__)
+logger = logging.getLogger("AIQA")
 class AIQASpanExporter(SpanExporter):
@@ -26,6 +27,7 @@ class AIQASpanExporter(SpanExporter):
         server_url: Optional[str] = None,
         api_key: Optional[str] = None,
         flush_interval_seconds: float = 5.0,
+        max_batch_size_bytes: int = 5 * 1024 * 1024,  # 5MB default
     ):
         """
         Initialize the AIQA span exporter.
@@ -34,11 +36,14 @@ class AIQASpanExporter(SpanExporter):
             server_url: URL of the AIQA server (defaults to AIQA_SERVER_URL env var)
             api_key: API key for authentication (defaults to AIQA_API_KEY env var)
             flush_interval_seconds: How often to flush spans to the server
+            max_batch_size_bytes: Maximum size of a single batch in bytes (default: 5mb)
         """
         self._server_url = server_url
         self._api_key = api_key
         self.flush_interval_ms = flush_interval_seconds * 1000
+        self.max_batch_size_bytes = max_batch_size_bytes
         self.buffer: List[Dict[str, Any]] = []
+        self.buffer_span_keys: set = set()  # Track (traceId, spanId) tuples to prevent duplicates (Python 3.8 compatible)
         self.buffer_lock = threading.Lock()
         self.flush_lock = threading.Lock()
         self.shutdown_requested = False
@@ -61,21 +66,39 @@ class AIQASpanExporter(SpanExporter):
     def export(self, spans: List[ReadableSpan]) -> SpanExportResult:
         """
         Export spans to the AIQA server. Adds spans to buffer for async flushing.
+        Deduplicates spans based on (traceId, spanId) to prevent repeated exports.
         """
         if not spans:
             logger.debug("export() called with empty spans list")
             return SpanExportResult.SUCCESS
         logger.debug(f"AIQA export() called with {len(spans)} spans")
-        # Serialize and add to buffer
+        # Serialize and add to buffer, deduplicating by (traceId, spanId)
         with self.buffer_lock:
-            serialized_spans = [self._serialize_span(span) for span in spans]
+            serialized_spans = []
+            duplicates_count = 0
+            for span in spans:
+                serialized = self._serialize_span(span)
+                span_key = (serialized["traceId"], serialized["spanId"])
+                if span_key not in self.buffer_span_keys:
+                    serialized_spans.append(serialized)
+                    self.buffer_span_keys.add(span_key)
+                else:
+                    duplicates_count += 1
+                    logger.debug(f"export() skipping duplicate span: traceId={serialized['traceId']}, spanId={serialized['spanId']}")
             self.buffer.extend(serialized_spans)
             buffer_size = len(self.buffer)
-        logger.debug(
-            f"export() added {len(spans)} span(s) to buffer. "
-            f"Total buffered: {buffer_size}"
-        )
+        if duplicates_count > 0:
+            logger.debug(
+                f"export() added {len(serialized_spans)} span(s) to buffer, skipped {duplicates_count} duplicate(s). "
+                f"Total buffered: {buffer_size}"
+            )
+        else:
+            logger.debug(
+                f"export() added {len(spans)} span(s) to buffer. "
+                f"Total buffered: {buffer_size}"
+            )
         return SpanExportResult.SUCCESS
@@ -138,8 +161,8 @@ class AIQASpanExporter(SpanExporter):
             "duration": self._time_to_tuple(span.end_time - span.start_time) if span.end_time else None,
             "ended": span.end_time is not None,
             "instrumentationLibrary": {
-                "name": span.instrumentation_info.name if hasattr(span, "instrumentation_info") else "",
-                "version": span.instrumentation_info.version if hasattr(span, "instrumentation_info") else None,
+                "name": self._get_instrumentation_name(),
+                "version": self._get_instrumentation_version(),
             },
         }
@@ -148,6 +171,19 @@ class AIQASpanExporter(SpanExporter):
         seconds = int(nanoseconds // 1_000_000_000)
         nanos = int(nanoseconds % 1_000_000_000)
         return (seconds, nanos)
+    def _get_instrumentation_name(self) -> str:
+        """Get instrumentation library name - always 'aiqa-tracer'."""
+        from .client import AIQA_TRACER_NAME
+        return AIQA_TRACER_NAME
+    def _get_instrumentation_version(self) -> Optional[str]:
+        """Get instrumentation library version from __version__."""
+        try:
+            from . import __version__
+            return __version__
+        except (ImportError, AttributeError):
+            return None
     def _build_request_headers(self) -> Dict[str, str]:
         """Build HTTP headers for span requests."""
@@ -177,24 +213,91 @@ class AIQASpanExporter(SpanExporter):
         Atomically extract and remove all spans from buffer (thread-safe).
         Returns the extracted spans. This prevents race conditions where spans
         are added between extraction and clearing.
+        Note: Does NOT clear buffer_span_keys - that should be done after successful send
+        to avoid unnecessary clearing/rebuilding on failures.
         """
         with self.buffer_lock:
             spans = self.buffer[:]
             self.buffer.clear()
             return spans
+    def _remove_span_keys_from_tracking(self, spans: List[Dict[str, Any]]) -> None:
+        """
+        Remove span keys from tracking set (thread-safe). Called after successful send.
+        """
+        with self.buffer_lock:
+            for span in spans:
+                span_key = (span["traceId"], span["spanId"])
+                self.buffer_span_keys.discard(span_key)
     def _prepend_spans_to_buffer(self, spans: List[Dict[str, Any]]) -> None:
         """
         Prepend spans back to buffer (thread-safe). Used to restore spans
-        if sending fails.
+        if sending fails. Rebuilds the span keys tracking set.
         """
         with self.buffer_lock:
             self.buffer[:0] = spans
+            # Rebuild span keys set from current buffer contents
+            self.buffer_span_keys = {(span["traceId"], span["spanId"]) for span in self.buffer}
     def _clear_buffer(self) -> None:
         """Clear the buffer (thread-safe)."""
         with self.buffer_lock:
             self.buffer.clear()
+            self.buffer_span_keys.clear()
+    def _split_into_batches(self, spans: List[Dict[str, Any]]) -> List[List[Dict[str, Any]]]:
+        """
+        Split spans into batches based on max_batch_size_bytes.
+        Each batch will be as large as possible without exceeding the limit.
+        If a single span exceeds the limit, it will be sent in its own batch with a warning.
+        """
+        if not spans:
+            return []
+        batches = []
+        current_batch = []
+        current_batch_size = 0
+        for span in spans:
+            # Estimate size of this span when serialized
+            span_json = json.dumps(span)
+            span_size = len(span_json.encode('utf-8'))
+            # Check if this single span exceeds the limit
+            if span_size > self.max_batch_size_bytes:
+                # If we have a current batch, save it first
+                if current_batch:
+                    batches.append(current_batch)
+                    current_batch = []
+                    current_batch_size = 0
+                # Log warning about oversized span
+                span_name = span.get('name', 'unknown')
+                span_trace_id = span.get('traceId', 'unknown')
+                logger.warning(
+                    f"Span '{span_name}' (traceId={span_trace_id}) exceeds max_batch_size_bytes "
+                    f"({span_size} bytes > {self.max_batch_size_bytes} bytes). "
+                    f"Will attempt to send it anyway - may fail if server/nginx limit is exceeded."
+                )
+                # Still create a batch with just this span - we'll try to send it
+                batches.append([span])
+                continue
+            # If adding this span would exceed the limit, start a new batch
+            if current_batch and current_batch_size + span_size > self.max_batch_size_bytes:
+                batches.append(current_batch)
+                current_batch = []
+                current_batch_size = 0
+            current_batch.append(span)
+            current_batch_size += span_size
+        # Add the last batch if it has any spans
+        if current_batch:
+            batches.append(current_batch)
+        return batches
     async def flush(self) -> None:
         """
@@ -218,7 +321,8 @@ class AIQASpanExporter(SpanExporter):
                 logger.warning(
                     f"Skipping flush: AIQA_SERVER_URL is not set. {len(spans_to_flush)} span(s) will not be sent."
                 )
-                # Spans already removed from buffer, nothing to clear
+                # Spans already removed from buffer, clear their keys to free memory
+                self._remove_span_keys_from_tracking(spans_to_flush)
                 return
             logger.info(f"flush() sending {len(spans_to_flush)} span(s) to server")
@@ -226,6 +330,8 @@ class AIQASpanExporter(SpanExporter):
                 await self._send_spans(spans_to_flush)
                 logger.info(f"flush() successfully sent {len(spans_to_flush)} span(s) to server")
                 # Spans already removed from buffer during extraction
+                # Now clear their keys from tracking set to free memory
+                self._remove_span_keys_from_tracking(spans_to_flush)
             except RuntimeError as error:
                 if self._is_interpreter_shutdown_error(error):
                     if self.shutdown_requested:
@@ -237,12 +343,12 @@ class AIQASpanExporter(SpanExporter):
                         # Put spans back for retry
                         self._prepend_spans_to_buffer(spans_to_flush)
                     raise
-                logger.error(f"Error flushing spans to server: {error}", exc_info=True)
+                logger.error(f"Error flushing spans to server: {error}")
                 # Put spans back for retry
                 self._prepend_spans_to_buffer(spans_to_flush)
                 raise
             except Exception as error:
-                logger.error(f"Error flushing spans to server: {error}", exc_info=True)
+                logger.error(f"Error flushing spans to server: {error}")
                 # Put spans back for retry
                 self._prepend_spans_to_buffer(spans_to_flush)
                 if self.shutdown_requested:
@@ -271,7 +377,7 @@ class AIQASpanExporter(SpanExporter):
                     logger.debug(f"Auto-flush cycle #{cycle_count} completed, sleeping {self.flush_interval_ms / 1000.0}s")
                     time.sleep(self.flush_interval_ms / 1000.0)
                 except Exception as e:
-                    logger.error(f"Error in auto-flush cycle #{cycle_count}: {e}", exc_info=True)
+                    logger.error(f"Error in auto-flush cycle #{cycle_count}: {e}")
                     logger.debug(f"Auto-flush cycle #{cycle_count} error handled, sleeping {self.flush_interval_ms / 1000.0}s")
                     time.sleep(self.flush_interval_ms / 1000.0)
@@ -295,73 +401,108 @@ class AIQASpanExporter(SpanExporter):
         logger.info(f"Auto-flush thread started: {flush_thread.name} (daemon={flush_thread.daemon})")
     async def _send_spans(self, spans: List[Dict[str, Any]]) -> None:
-        """Send spans to the server API (async)."""
+        """Send spans to the server API (async). Batches large payloads automatically."""
         import aiohttp
+        # Split into batches if needed
+        batches = self._split_into_batches(spans)
+        if len(batches) > 1:
+            logger.info(f"_send_spans() splitting {len(spans)} spans into {len(batches)} batches")
         url = self._get_span_url()
         headers = self._build_request_headers()
-        logger.debug(f"_send_spans() sending {len(spans)} spans to {url}")
         if self.api_key:
             logger.debug("_send_spans() using API key authentication")
         else:
             logger.debug("_send_spans() no API key provided")
-        try:
-            async with aiohttp.ClientSession() as session:
-                logger.debug(f"_send_spans() POST request starting to {url}")
-                async with session.post(url, json=spans, headers=headers) as response:
-                    logger.debug(f"_send_spans() received response: status={response.status}")
-                    if not response.ok:
-                        error_text = await response.text()
-                        logger.error(
-                            f"_send_spans() failed: status={response.status}, "
-                            f"reason={response.reason}, error={error_text[:200]}"
-                        )
-                        raise Exception(
-                            f"Failed to send spans: {response.status} {response.reason} - {error_text}"
-                        )
-                    logger.debug(f"_send_spans() successfully sent {len(spans)} spans")
-        except RuntimeError as e:
-            if self._is_interpreter_shutdown_error(e):
-                if self.shutdown_requested:
-                    logger.debug(f"_send_spans() skipped due to interpreter shutdown: {e}")
-                else:
-                    logger.warning(f"_send_spans() interrupted by interpreter shutdown: {e}")
-                raise
-            logger.error(f"_send_spans() RuntimeError: {type(e).__name__}: {e}", exc_info=True)
-            raise
-        except Exception as e:
-            logger.error(f"_send_spans() exception: {type(e).__name__}: {e}", exc_info=True)
-            raise
+        errors = []
+        async with aiohttp.ClientSession() as session:
+            for batch_idx, batch in enumerate(batches):
+                try:
+                    logger.debug(f"_send_spans() sending batch {batch_idx + 1}/{len(batches)} with {len(batch)} spans to {url}")
+                    # Pre-serialize JSON to bytes and wrap in BytesIO to avoid blocking event loop
+                    json_bytes = json.dumps(batch).encode('utf-8')
+                    data = io.BytesIO(json_bytes)
+                    async with session.post(url, data=data, headers=headers) as response:
+                        logger.debug(f"_send_spans() batch {batch_idx + 1} received response: status={response.status}")
+                        if not response.ok:
+                            error_text = await response.text()
+                            error_msg = f"Failed to send batch {batch_idx + 1}/{len(batches)}: {response.status} {response.reason} - {error_text[:200]}"
+                            logger.error(f"_send_spans() {error_msg}")
+                            errors.append((batch_idx + 1, error_msg))
+                            # Continue with other batches even if one fails
+                            continue
+                        logger.debug(f"_send_spans() batch {batch_idx + 1} successfully sent {len(batch)} spans")
+                except RuntimeError as e:
+                    if self._is_interpreter_shutdown_error(e):
+                        if self.shutdown_requested:
+                            logger.debug(f"_send_spans() skipped due to interpreter shutdown: {e}")
+                        else:
+                            logger.warning(f"_send_spans() interrupted by interpreter shutdown: {e}")
+                        raise
+                    error_msg = f"RuntimeError in batch {batch_idx + 1}: {type(e).__name__}: {e}"
+                    logger.error(f"_send_spans() {error_msg}")
+                    errors.append((batch_idx + 1, error_msg))
+                    # Continue with other batches
+                except Exception as e:
+                    error_msg = f"Exception in batch {batch_idx + 1}: {type(e).__name__}: {e}"
+                    logger.error(f"_send_spans() {error_msg}")
+                    errors.append((batch_idx + 1, error_msg))
+                    # Continue with other batches
+        # If any batches failed, raise an exception with details
+        if errors:
+            error_summary = "; ".join([f"batch {idx}: {msg}" for idx, msg in errors])
+            raise Exception(f"Failed to send some spans: {error_summary}")
+        logger.debug(f"_send_spans() successfully sent all {len(spans)} spans in {len(batches)} batch(es)")
     def _send_spans_sync(self, spans: List[Dict[str, Any]]) -> None:
-        """Send spans to the server API (synchronous, for shutdown scenarios)."""
+        """Send spans to the server API (synchronous, for shutdown scenarios). Batches large payloads automatically."""
         import requests
+        # Split into batches if needed
+        batches = self._split_into_batches(spans)
+        if len(batches) > 1:
+            logger.info(f"_send_spans_sync() splitting {len(spans)} spans into {len(batches)} batches")
         url = self._get_span_url()
         headers = self._build_request_headers()
-        logger.debug(f"_send_spans_sync() sending {len(spans)} spans to {url}")
         if self.api_key:
             logger.debug("_send_spans_sync() using API key authentication")
         else:
             logger.debug("_send_spans_sync() no API key provided")
-        try:
-            response = requests.post(url, json=spans, headers=headers, timeout=10.0)
-            logger.debug(f"_send_spans_sync() received response: status={response.status_code}")
-            if not response.ok:
-                error_text = response.text[:200] if response.text else ""
-                logger.error(
-                    f"_send_spans_sync() failed: status={response.status_code}, "
-                    f"reason={response.reason}, error={error_text}"
-                )
-                raise Exception(
-                    f"Failed to send spans: {response.status_code} {response.reason} - {error_text}"
-                )
-            logger.debug(f"_send_spans_sync() successfully sent {len(spans)} spans")
-        except Exception as e:
-            logger.error(f"_send_spans_sync() exception: {type(e).__name__}: {e}", exc_info=True)
-            raise
+        errors = []
+        for batch_idx, batch in enumerate(batches):
+            try:
+                logger.debug(f"_send_spans_sync() sending batch {batch_idx + 1}/{len(batches)} with {len(batch)} spans to {url}")
+                response = requests.post(url, json=batch, headers=headers, timeout=10.0)
+                logger.debug(f"_send_spans_sync() batch {batch_idx + 1} received response: status={response.status_code}")
+                if not response.ok:
+                    error_text = response.text[:200] if response.text else ""
+                    error_msg = f"Failed to send batch {batch_idx + 1}/{len(batches)}: {response.status_code} {response.reason} - {error_text}"
+                    logger.error(f"_send_spans_sync() {error_msg}")
+                    errors.append((batch_idx + 1, error_msg))
+                    # Continue with other batches even if one fails
+                    continue
+                logger.debug(f"_send_spans_sync() batch {batch_idx + 1} successfully sent {len(batch)} spans")
+            except Exception as e:
+                error_msg = f"Exception in batch {batch_idx + 1}: {type(e).__name__}: {e}"
+                logger.error(f"_send_spans_sync() {error_msg}")
+                errors.append((batch_idx + 1, error_msg))
+                # Continue with other batches
+        # If any batches failed, raise an exception with details
+        if errors:
+            error_summary = "; ".join([f"batch {idx}: {msg}" for idx, msg in errors])
+            raise Exception(f"Failed to send some spans: {error_summary}")
+        logger.debug(f"_send_spans_sync() successfully sent all {len(spans)} spans in {len(batches)} batch(es)")
     def shutdown(self) -> None:
         """Shutdown the exporter, flushing any remaining spans. Call before process exit."""
@@ -397,17 +538,21 @@ class AIQASpanExporter(SpanExporter):
                         f"shutdown() skipping final flush: AIQA_SERVER_URL is not set. "
                         f"{len(spans_to_flush)} span(s) will not be sent."
                     )
-                    # Spans already removed from buffer
+                    # Spans already removed from buffer, clear their keys to free memory
+                    self._remove_span_keys_from_tracking(spans_to_flush)
                 else:
                     logger.info(f"shutdown() sending {len(spans_to_flush)} span(s) to server (synchronous)")
                     try:
                         self._send_spans_sync(spans_to_flush)
                         logger.info(f"shutdown() successfully sent {len(spans_to_flush)} span(s) to server")
                         # Spans already removed from buffer during extraction
+                        # Clear their keys from tracking set to free memory
+                        self._remove_span_keys_from_tracking(spans_to_flush)
                     except Exception as e:
-                        logger.error(f"shutdown() failed to send spans: {e}", exc_info=True)
+                        logger.error(f"shutdown() failed to send spans: {e}")
                         # Spans already removed, but process is exiting anyway
                         logger.warning(f"shutdown() {len(spans_to_flush)} span(s) were not sent due to error")
+                        # Keys will remain in tracking set, but process is exiting so memory will be freed
             else:
                 logger.debug("shutdown() no spans to flush")

aiqa-client 0.2.1__py3-none-any.whl → 0.3.4__py3-none-any.whl

aiqa-client 0.2.1py3-none-any.whl → 0.3.4py3-none-any.whl