aiqa-client 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aiqa/__init__.py ADDED
@@ -0,0 +1,66 @@
1
+ """
2
+ Python client for AIQA server - OpenTelemetry tracing decorators.
3
+
4
+ IMPORTANT: Before using any AIQA functionality, you must call get_aiqa_client() to initialize
5
+ the client and load environment variables (AIQA_SERVER_URL, AIQA_API_KEY, AIQA_COMPONENT_TAG, etc.).
6
+
7
+ Example:
8
+ from dotenv import load_dotenv
9
+ from aiqa import get_aiqa_client, WithTracing
10
+
11
+ # Load environment variables from .env file (if using one)
12
+ load_dotenv()
13
+
14
+ # Initialize client (must be called before using WithTracing or other functions)
15
+ get_aiqa_client()
16
+
17
+ @WithTracing
18
+ def my_function():
19
+ return "Hello, AIQA!"
20
+ """
21
+
22
+ from .tracing import (
23
+ WithTracing,
24
+ flush_tracing,
25
+ shutdown_tracing,
26
+ set_span_attribute,
27
+ set_span_name,
28
+ get_active_span,
29
+ get_provider,
30
+ get_exporter,
31
+ get_trace_id,
32
+ get_span_id,
33
+ create_span_from_trace_id,
34
+ inject_trace_context,
35
+ extract_trace_context,
36
+ set_conversation_id,
37
+ set_component_tag,
38
+ get_span,
39
+ )
40
+ from .client import get_aiqa_client
41
+ from .experiment_runner import ExperimentRunner
42
+
43
+ __version__ = "0.3.1"
44
+
45
+ __all__ = [
46
+ "WithTracing",
47
+ "flush_tracing",
48
+ "shutdown_tracing",
49
+ "set_span_attribute",
50
+ "set_span_name",
51
+ "get_active_span",
52
+ "get_provider",
53
+ "get_exporter",
54
+ "get_aiqa_client",
55
+ "ExperimentRunner",
56
+ "get_trace_id",
57
+ "get_span_id",
58
+ "create_span_from_trace_id",
59
+ "inject_trace_context",
60
+ "extract_trace_context",
61
+ "set_conversation_id",
62
+ "set_component_tag",
63
+ "get_span",
64
+ "__version__",
65
+ ]
66
+
aiqa/aiqa_exporter.py ADDED
@@ -0,0 +1,481 @@
1
+ """
2
+ OpenTelemetry span exporter that sends spans to the AIQA server API.
3
+ Buffers spans and flushes them periodically or on shutdown. Thread-safe.
4
+ """
5
+
6
+ import os
7
+ import json
8
+ import logging
9
+ import threading
10
+ import time
11
+ import io
12
+ from typing import List, Dict, Any, Optional
13
+ from opentelemetry.sdk.trace import ReadableSpan
14
+ from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
15
+
16
+ logger = logging.getLogger("AIQA")
17
+
18
+
19
+ class AIQASpanExporter(SpanExporter):
20
+ """
21
+ Exports spans to AIQA server. Buffers spans and auto-flushes every flush_interval_seconds.
22
+ Call shutdown() before process exit to flush remaining spans.
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ server_url: Optional[str] = None,
28
+ api_key: Optional[str] = None,
29
+ flush_interval_seconds: float = 5.0,
30
+ ):
31
+ """
32
+ Initialize the AIQA span exporter.
33
+
34
+ Args:
35
+ server_url: URL of the AIQA server (defaults to AIQA_SERVER_URL env var)
36
+ api_key: API key for authentication (defaults to AIQA_API_KEY env var)
37
+ flush_interval_seconds: How often to flush spans to the server
38
+ """
39
+ self._server_url = server_url
40
+ self._api_key = api_key
41
+ self.flush_interval_ms = flush_interval_seconds * 1000
42
+ self.buffer: List[Dict[str, Any]] = []
43
+ self.buffer_span_keys: set = set() # Track (traceId, spanId) tuples to prevent duplicates (Python 3.8 compatible)
44
+ self.buffer_lock = threading.Lock()
45
+ self.flush_lock = threading.Lock()
46
+ self.shutdown_requested = False
47
+ self.flush_timer: Optional[threading.Thread] = None
48
+
49
+ logger.info(
50
+ f"Initializing AIQASpanExporter: server_url={self.server_url or 'not set'}, "
51
+ f"flush_interval={flush_interval_seconds}s"
52
+ )
53
+ self._start_auto_flush()
54
+
55
+ @property
56
+ def server_url(self) -> str:
57
+ return self._server_url or os.getenv("AIQA_SERVER_URL", "").rstrip("/")
58
+
59
+ @property
60
+ def api_key(self) -> str:
61
+ return self._api_key or os.getenv("AIQA_API_KEY", "")
62
+
63
+ def export(self, spans: List[ReadableSpan]) -> SpanExportResult:
64
+ """
65
+ Export spans to the AIQA server. Adds spans to buffer for async flushing.
66
+ Deduplicates spans based on (traceId, spanId) to prevent repeated exports.
67
+ """
68
+ if not spans:
69
+ logger.debug("export() called with empty spans list")
70
+ return SpanExportResult.SUCCESS
71
+ logger.debug(f"AIQA export() called with {len(spans)} spans")
72
+ # Serialize and add to buffer, deduplicating by (traceId, spanId)
73
+ with self.buffer_lock:
74
+ serialized_spans = []
75
+ duplicates_count = 0
76
+ for span in spans:
77
+ serialized = self._serialize_span(span)
78
+ span_key = (serialized["traceId"], serialized["spanId"])
79
+ if span_key not in self.buffer_span_keys:
80
+ serialized_spans.append(serialized)
81
+ self.buffer_span_keys.add(span_key)
82
+ else:
83
+ duplicates_count += 1
84
+ logger.debug(f"export() skipping duplicate span: traceId={serialized['traceId']}, spanId={serialized['spanId']}")
85
+
86
+ self.buffer.extend(serialized_spans)
87
+ buffer_size = len(self.buffer)
88
+
89
+ if duplicates_count > 0:
90
+ logger.debug(
91
+ f"export() added {len(serialized_spans)} span(s) to buffer, skipped {duplicates_count} duplicate(s). "
92
+ f"Total buffered: {buffer_size}"
93
+ )
94
+ else:
95
+ logger.debug(
96
+ f"export() added {len(spans)} span(s) to buffer. "
97
+ f"Total buffered: {buffer_size}"
98
+ )
99
+
100
+ return SpanExportResult.SUCCESS
101
+
102
+ def _serialize_span(self, span: ReadableSpan) -> Dict[str, Any]:
103
+ """Convert ReadableSpan to a serializable format."""
104
+ span_context = span.get_span_context()
105
+
106
+ # Get parent span ID
107
+ parent_span_id = None
108
+ if hasattr(span, "parent") and span.parent:
109
+ parent_span_id = format(span.parent.span_id, "016x")
110
+ elif hasattr(span, "parent_span_id") and span.parent_span_id:
111
+ parent_span_id = format(span.parent_span_id, "016x")
112
+
113
+ # Get span kind (handle both enum and int)
114
+ span_kind = span.kind
115
+ if hasattr(span_kind, "value"):
116
+ span_kind = span_kind.value
117
+
118
+ # Get status code (handle both enum and int)
119
+ status_code = span.status.status_code
120
+ if hasattr(status_code, "value"):
121
+ status_code = status_code.value
122
+
123
+ return {
124
+ "name": span.name,
125
+ "kind": span_kind,
126
+ "parentSpanId": parent_span_id,
127
+ "startTime": self._time_to_tuple(span.start_time),
128
+ "endTime": self._time_to_tuple(span.end_time) if span.end_time else None,
129
+ "status": {
130
+ "code": status_code,
131
+ "message": getattr(span.status, "description", None),
132
+ },
133
+ "attributes": dict(span.attributes) if span.attributes else {},
134
+ "links": [
135
+ {
136
+ "context": {
137
+ "traceId": format(link.context.trace_id, "032x"),
138
+ "spanId": format(link.context.span_id, "016x"),
139
+ },
140
+ "attributes": dict(link.attributes) if link.attributes else {},
141
+ }
142
+ for link in (span.links or [])
143
+ ],
144
+ "events": [
145
+ {
146
+ "name": event.name,
147
+ "time": self._time_to_tuple(event.timestamp),
148
+ "attributes": dict(event.attributes) if event.attributes else {},
149
+ }
150
+ for event in (span.events or [])
151
+ ],
152
+ "resource": {
153
+ "attributes": dict(span.resource.attributes) if span.resource.attributes else {},
154
+ },
155
+ "traceId": format(span_context.trace_id, "032x"),
156
+ "spanId": format(span_context.span_id, "016x"),
157
+ "traceFlags": span_context.trace_flags,
158
+ "duration": self._time_to_tuple(span.end_time - span.start_time) if span.end_time else None,
159
+ "ended": span.end_time is not None,
160
+ "instrumentationLibrary": {
161
+ "name": self._get_instrumentation_name(),
162
+ "version": self._get_instrumentation_version(),
163
+ },
164
+ }
165
+
166
+ def _time_to_tuple(self, nanoseconds: int) -> tuple:
167
+ """Convert nanoseconds to (seconds, nanoseconds) tuple."""
168
+ seconds = int(nanoseconds // 1_000_000_000)
169
+ nanos = int(nanoseconds % 1_000_000_000)
170
+ return (seconds, nanos)
171
+
172
+ def _get_instrumentation_name(self) -> str:
173
+ """Get instrumentation library name - always 'aiqa-tracer'."""
174
+ from .client import AIQA_TRACER_NAME
175
+ return AIQA_TRACER_NAME
176
+
177
+ def _get_instrumentation_version(self) -> Optional[str]:
178
+ """Get instrumentation library version from __version__."""
179
+ try:
180
+ from . import __version__
181
+ return __version__
182
+ except (ImportError, AttributeError):
183
+ return None
184
+
185
+ def _build_request_headers(self) -> Dict[str, str]:
186
+ """Build HTTP headers for span requests."""
187
+ headers = {"Content-Type": "application/json"}
188
+ if self.api_key:
189
+ headers["Authorization"] = f"ApiKey {self.api_key}"
190
+ return headers
191
+
192
+ def _get_span_url(self) -> str:
193
+ """Get the URL for sending spans."""
194
+ if not self.server_url:
195
+ raise ValueError("AIQA_SERVER_URL is not set. Cannot send spans to server.")
196
+ return f"{self.server_url}/span"
197
+
198
+ def _is_interpreter_shutdown_error(self, error: Exception) -> bool:
199
+ """Check if error is due to interpreter shutdown."""
200
+ error_str = str(error)
201
+ return "cannot schedule new futures after" in error_str or "interpreter shutdown" in error_str
202
+
203
+ def _extract_spans_from_buffer(self) -> List[Dict[str, Any]]:
204
+ """Extract spans from buffer (thread-safe). Returns copy of buffer."""
205
+ with self.buffer_lock:
206
+ return self.buffer[:]
207
+
208
+ def _extract_and_remove_spans_from_buffer(self) -> List[Dict[str, Any]]:
209
+ """
210
+ Atomically extract and remove all spans from buffer (thread-safe).
211
+ Returns the extracted spans. This prevents race conditions where spans
212
+ are added between extraction and clearing.
213
+ Note: Does NOT clear buffer_span_keys - that should be done after successful send
214
+ to avoid unnecessary clearing/rebuilding on failures.
215
+ """
216
+ with self.buffer_lock:
217
+ spans = self.buffer[:]
218
+ self.buffer.clear()
219
+ return spans
220
+
221
+ def _remove_span_keys_from_tracking(self, spans: List[Dict[str, Any]]) -> None:
222
+ """
223
+ Remove span keys from tracking set (thread-safe). Called after successful send.
224
+ """
225
+ with self.buffer_lock:
226
+ for span in spans:
227
+ span_key = (span["traceId"], span["spanId"])
228
+ self.buffer_span_keys.discard(span_key)
229
+
230
+ def _prepend_spans_to_buffer(self, spans: List[Dict[str, Any]]) -> None:
231
+ """
232
+ Prepend spans back to buffer (thread-safe). Used to restore spans
233
+ if sending fails. Rebuilds the span keys tracking set.
234
+ """
235
+ with self.buffer_lock:
236
+ self.buffer[:0] = spans
237
+ # Rebuild span keys set from current buffer contents
238
+ self.buffer_span_keys = {(span["traceId"], span["spanId"]) for span in self.buffer}
239
+
240
+ def _clear_buffer(self) -> None:
241
+ """Clear the buffer (thread-safe)."""
242
+ with self.buffer_lock:
243
+ self.buffer.clear()
244
+ self.buffer_span_keys.clear()
245
+
246
+ async def flush(self) -> None:
247
+ """
248
+ Flush buffered spans to the server. Thread-safe: ensures only one flush operation runs at a time.
249
+ Atomically extracts spans to prevent race conditions with concurrent export() calls.
250
+ """
251
+ logger.debug("flush() called - attempting to acquire flush lock")
252
+ with self.flush_lock:
253
+ logger.debug("flush() acquired flush lock")
254
+ # Atomically extract and remove spans to prevent race conditions
255
+ # where export() adds spans between extraction and clearing
256
+ spans_to_flush = self._extract_and_remove_spans_from_buffer()
257
+ logger.debug(f"flush() extracted {len(spans_to_flush)} span(s) from buffer")
258
+
259
+ if not spans_to_flush:
260
+ logger.debug("flush() completed: no spans to flush")
261
+ return
262
+
263
+ # Skip sending if server URL is not configured
264
+ if not self.server_url:
265
+ logger.warning(
266
+ f"Skipping flush: AIQA_SERVER_URL is not set. {len(spans_to_flush)} span(s) will not be sent."
267
+ )
268
+ # Spans already removed from buffer, clear their keys to free memory
269
+ self._remove_span_keys_from_tracking(spans_to_flush)
270
+ return
271
+
272
+ logger.info(f"flush() sending {len(spans_to_flush)} span(s) to server")
273
+ try:
274
+ await self._send_spans(spans_to_flush)
275
+ logger.info(f"flush() successfully sent {len(spans_to_flush)} span(s) to server")
276
+ # Spans already removed from buffer during extraction
277
+ # Now clear their keys from tracking set to free memory
278
+ self._remove_span_keys_from_tracking(spans_to_flush)
279
+ except RuntimeError as error:
280
+ if self._is_interpreter_shutdown_error(error):
281
+ if self.shutdown_requested:
282
+ logger.debug(f"flush() skipped due to interpreter shutdown: {error}")
283
+ # Put spans back for retry with sync send during shutdown
284
+ self._prepend_spans_to_buffer(spans_to_flush)
285
+ else:
286
+ logger.warning(f"flush() interrupted by interpreter shutdown: {error}")
287
+ # Put spans back for retry
288
+ self._prepend_spans_to_buffer(spans_to_flush)
289
+ raise
290
+ logger.error(f"Error flushing spans to server: {error}")
291
+ # Put spans back for retry
292
+ self._prepend_spans_to_buffer(spans_to_flush)
293
+ raise
294
+ except Exception as error:
295
+ logger.error(f"Error flushing spans to server: {error}")
296
+ # Put spans back for retry
297
+ self._prepend_spans_to_buffer(spans_to_flush)
298
+ if self.shutdown_requested:
299
+ raise
300
+
301
+ def _start_auto_flush(self) -> None:
302
+ """Start the auto-flush timer."""
303
+ if self.shutdown_requested:
304
+ logger.warning("_start_auto_flush() called but shutdown already requested")
305
+ return
306
+
307
+ logger.info(f"Starting auto-flush thread with interval {self.flush_interval_ms / 1000.0}s")
308
+
309
+ def flush_worker():
310
+ import asyncio
311
+ logger.debug("Auto-flush worker thread started")
312
+ loop = asyncio.new_event_loop()
313
+ asyncio.set_event_loop(loop)
314
+
315
+ cycle_count = 0
316
+ while not self.shutdown_requested:
317
+ cycle_count += 1
318
+ logger.debug(f"Auto-flush cycle #{cycle_count} starting")
319
+ try:
320
+ loop.run_until_complete(self.flush())
321
+ logger.debug(f"Auto-flush cycle #{cycle_count} completed, sleeping {self.flush_interval_ms / 1000.0}s")
322
+ time.sleep(self.flush_interval_ms / 1000.0)
323
+ except Exception as e:
324
+ logger.error(f"Error in auto-flush cycle #{cycle_count}: {e}")
325
+ logger.debug(f"Auto-flush cycle #{cycle_count} error handled, sleeping {self.flush_interval_ms / 1000.0}s")
326
+ time.sleep(self.flush_interval_ms / 1000.0)
327
+
328
+ logger.info(f"Auto-flush worker thread stopping (shutdown requested). Completed {cycle_count} cycles.")
329
+
330
+ # Don't do final flush here - shutdown() will handle it with synchronous send
331
+ # This avoids event loop shutdown issues
332
+ logger.debug("Auto-flush thread skipping final flush (will be handled by shutdown() with sync send)")
333
+
334
+ # Close the event loop
335
+ try:
336
+ if not loop.is_closed():
337
+ loop.close()
338
+ logger.debug("Auto-flush worker thread event loop closed")
339
+ except Exception:
340
+ pass # Ignore errors during cleanup
341
+
342
+ flush_thread = threading.Thread(target=flush_worker, daemon=True, name="AIQA-AutoFlush")
343
+ flush_thread.start()
344
+ self.flush_timer = flush_thread
345
+ logger.info(f"Auto-flush thread started: {flush_thread.name} (daemon={flush_thread.daemon})")
346
+
347
+ async def _send_spans(self, spans: List[Dict[str, Any]]) -> None:
348
+ """Send spans to the server API (async)."""
349
+ import aiohttp
350
+
351
+ url = self._get_span_url()
352
+ headers = self._build_request_headers()
353
+ logger.debug(f"_send_spans() sending {len(spans)} spans to {url}")
354
+ if self.api_key:
355
+ logger.debug("_send_spans() using API key authentication")
356
+ else:
357
+ logger.debug("_send_spans() no API key provided")
358
+
359
+ try:
360
+ # Pre-serialize JSON to bytes and wrap in BytesIO to avoid blocking event loop
361
+ json_bytes = json.dumps(spans).encode('utf-8')
362
+ data = io.BytesIO(json_bytes)
363
+
364
+ async with aiohttp.ClientSession() as session:
365
+ logger.debug(f"_send_spans() POST request starting to {url}")
366
+ async with session.post(url, data=data, headers=headers) as response:
367
+ logger.debug(f"_send_spans() received response: status={response.status}")
368
+ if not response.ok:
369
+ error_text = await response.text()
370
+ logger.error(
371
+ f"_send_spans() failed: status={response.status}, "
372
+ f"reason={response.reason}, error={error_text[:200]}"
373
+ )
374
+ raise Exception(
375
+ f"Failed to send spans: {response.status} {response.reason} - {error_text}"
376
+ )
377
+ logger.debug(f"_send_spans() successfully sent {len(spans)} spans")
378
+ except RuntimeError as e:
379
+ if self._is_interpreter_shutdown_error(e):
380
+ if self.shutdown_requested:
381
+ logger.debug(f"_send_spans() skipped due to interpreter shutdown: {e}")
382
+ else:
383
+ logger.warning(f"_send_spans() interrupted by interpreter shutdown: {e}")
384
+ raise
385
+ logger.error(f"_send_spans() RuntimeError: {type(e).__name__}: {e}")
386
+ raise
387
+ except Exception as e:
388
+ logger.error(f"_send_spans() exception: {type(e).__name__}: {e}")
389
+ raise
390
+
391
+ def _send_spans_sync(self, spans: List[Dict[str, Any]]) -> None:
392
+ """Send spans to the server API (synchronous, for shutdown scenarios)."""
393
+ import requests
394
+
395
+ url = self._get_span_url()
396
+ headers = self._build_request_headers()
397
+ logger.debug(f"_send_spans_sync() sending {len(spans)} spans to {url}")
398
+ if self.api_key:
399
+ logger.debug("_send_spans_sync() using API key authentication")
400
+ else:
401
+ logger.debug("_send_spans_sync() no API key provided")
402
+
403
+ try:
404
+ response = requests.post(url, json=spans, headers=headers, timeout=10.0)
405
+ logger.debug(f"_send_spans_sync() received response: status={response.status_code}")
406
+ if not response.ok:
407
+ error_text = response.text[:200] if response.text else ""
408
+ logger.error(
409
+ f"_send_spans_sync() failed: status={response.status_code}, "
410
+ f"reason={response.reason}, error={error_text}"
411
+ )
412
+ raise Exception(
413
+ f"Failed to send spans: {response.status_code} {response.reason} - {error_text}"
414
+ )
415
+ logger.debug(f"_send_spans_sync() successfully sent {len(spans)} spans")
416
+ except Exception as e:
417
+ logger.error(f"_send_spans_sync() exception: {type(e).__name__}: {e}")
418
+ raise
419
+
420
+ def shutdown(self) -> None:
421
+ """Shutdown the exporter, flushing any remaining spans. Call before process exit."""
422
+ logger.info("shutdown() called - initiating exporter shutdown")
423
+ self.shutdown_requested = True
424
+
425
+ # Check buffer state before shutdown
426
+ with self.buffer_lock:
427
+ buffer_size = len(self.buffer)
428
+ logger.info(f"shutdown() buffer contains {buffer_size} span(s) before shutdown")
429
+
430
+ # Wait for flush thread to finish (it will do final flush)
431
+ if self.flush_timer and self.flush_timer.is_alive():
432
+ logger.info("shutdown() waiting for auto-flush thread to complete (timeout=10s)")
433
+ self.flush_timer.join(timeout=10.0)
434
+ if self.flush_timer.is_alive():
435
+ logger.warning("shutdown() auto-flush thread did not complete within timeout")
436
+ else:
437
+ logger.info("shutdown() auto-flush thread completed")
438
+ else:
439
+ logger.debug("shutdown() no active auto-flush thread to wait for")
440
+
441
+ # Final flush attempt (use synchronous send to avoid event loop issues)
442
+ with self.flush_lock:
443
+ logger.debug("shutdown() performing final flush with synchronous send")
444
+ # Atomically extract and remove spans to prevent race conditions
445
+ spans_to_flush = self._extract_and_remove_spans_from_buffer()
446
+ logger.debug(f"shutdown() extracted {len(spans_to_flush)} span(s) from buffer for final flush")
447
+
448
+ if spans_to_flush:
449
+ if not self.server_url:
450
+ logger.warning(
451
+ f"shutdown() skipping final flush: AIQA_SERVER_URL is not set. "
452
+ f"{len(spans_to_flush)} span(s) will not be sent."
453
+ )
454
+ # Spans already removed from buffer, clear their keys to free memory
455
+ self._remove_span_keys_from_tracking(spans_to_flush)
456
+ else:
457
+ logger.info(f"shutdown() sending {len(spans_to_flush)} span(s) to server (synchronous)")
458
+ try:
459
+ self._send_spans_sync(spans_to_flush)
460
+ logger.info(f"shutdown() successfully sent {len(spans_to_flush)} span(s) to server")
461
+ # Spans already removed from buffer during extraction
462
+ # Clear their keys from tracking set to free memory
463
+ self._remove_span_keys_from_tracking(spans_to_flush)
464
+ except Exception as e:
465
+ logger.error(f"shutdown() failed to send spans: {e}")
466
+ # Spans already removed, but process is exiting anyway
467
+ logger.warning(f"shutdown() {len(spans_to_flush)} span(s) were not sent due to error")
468
+ # Keys will remain in tracking set, but process is exiting so memory will be freed
469
+ else:
470
+ logger.debug("shutdown() no spans to flush")
471
+
472
+ # Check buffer state after shutdown
473
+ with self.buffer_lock:
474
+ buffer_size = len(self.buffer)
475
+ if buffer_size > 0:
476
+ logger.warning(f"shutdown() buffer still contains {buffer_size} span(s) after shutdown")
477
+ else:
478
+ logger.info("shutdown() buffer is empty after shutdown")
479
+
480
+ logger.info("shutdown() completed")
481
+