aiqa-client 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aiqa/__init__.py CHANGED
@@ -1,29 +1,66 @@
1
1
  """
2
2
  Python client for AIQA server - OpenTelemetry tracing decorators.
3
+
4
+ IMPORTANT: Before using any AIQA functionality, you must call get_aiqa_client() to initialize
5
+ the client and load environment variables (AIQA_SERVER_URL, AIQA_API_KEY, AIQA_COMPONENT_TAG, etc.).
6
+
7
+ Example:
8
+ from dotenv import load_dotenv
9
+ from aiqa import get_aiqa_client, WithTracing
10
+
11
+ # Load environment variables from .env file (if using one)
12
+ load_dotenv()
13
+
14
+ # Initialize client (must be called before using WithTracing or other functions)
15
+ get_aiqa_client()
16
+
17
+ @WithTracing
18
+ def my_function():
19
+ return "Hello, AIQA!"
3
20
  """
4
21
 
5
22
  from .tracing import (
6
23
  WithTracing,
7
- flush_spans,
24
+ flush_tracing,
8
25
  shutdown_tracing,
9
26
  set_span_attribute,
10
27
  set_span_name,
11
28
  get_active_span,
12
- provider,
13
- exporter,
29
+ get_provider,
30
+ get_exporter,
31
+ get_trace_id,
32
+ get_span_id,
33
+ create_span_from_trace_id,
34
+ inject_trace_context,
35
+ extract_trace_context,
36
+ set_conversation_id,
37
+ set_component_tag,
38
+ get_span,
14
39
  )
40
+ from .client import get_aiqa_client
41
+ from .experiment_runner import ExperimentRunner
15
42
 
16
- __version__ = "0.1.1"
43
+ __version__ = "0.1.2"
17
44
 
18
45
  __all__ = [
19
46
  "WithTracing",
20
- "flush_spans",
47
+ "flush_tracing",
21
48
  "shutdown_tracing",
22
49
  "set_span_attribute",
23
50
  "set_span_name",
24
51
  "get_active_span",
25
- "provider",
26
- "exporter",
52
+ "get_provider",
53
+ "get_exporter",
54
+ "get_aiqa_client",
55
+ "ExperimentRunner",
56
+ "get_trace_id",
57
+ "get_span_id",
58
+ "create_span_from_trace_id",
59
+ "inject_trace_context",
60
+ "extract_trace_context",
61
+ "set_conversation_id",
62
+ "set_component_tag",
63
+ "get_span",
27
64
  "__version__",
28
65
  ]
29
66
 
aiqa/aiqa_exporter.py CHANGED
@@ -8,11 +8,12 @@ import json
8
8
  import logging
9
9
  import threading
10
10
  import time
11
+ import io
11
12
  from typing import List, Dict, Any, Optional
12
13
  from opentelemetry.sdk.trace import ReadableSpan
13
14
  from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
14
15
 
15
- logger = logging.getLogger(__name__)
16
+ logger = logging.getLogger("AIQA")
16
17
 
17
18
 
18
19
  class AIQASpanExporter(SpanExporter):
@@ -39,6 +40,7 @@ class AIQASpanExporter(SpanExporter):
39
40
  self._api_key = api_key
40
41
  self.flush_interval_ms = flush_interval_seconds * 1000
41
42
  self.buffer: List[Dict[str, Any]] = []
43
+ self.buffer_span_keys: set = set() # Track (traceId, spanId) tuples to prevent duplicates (Python 3.8 compatible)
42
44
  self.buffer_lock = threading.Lock()
43
45
  self.flush_lock = threading.Lock()
44
46
  self.shutdown_requested = False
@@ -61,21 +63,39 @@ class AIQASpanExporter(SpanExporter):
61
63
  def export(self, spans: List[ReadableSpan]) -> SpanExportResult:
62
64
  """
63
65
  Export spans to the AIQA server. Adds spans to buffer for async flushing.
66
+ Deduplicates spans based on (traceId, spanId) to prevent repeated exports.
64
67
  """
65
68
  if not spans:
66
69
  logger.debug("export() called with empty spans list")
67
70
  return SpanExportResult.SUCCESS
68
-
69
- # Serialize and add to buffer
71
+ logger.debug(f"AIQA export() called with {len(spans)} spans")
72
+ # Serialize and add to buffer, deduplicating by (traceId, spanId)
70
73
  with self.buffer_lock:
71
- serialized_spans = [self._serialize_span(span) for span in spans]
74
+ serialized_spans = []
75
+ duplicates_count = 0
76
+ for span in spans:
77
+ serialized = self._serialize_span(span)
78
+ span_key = (serialized["traceId"], serialized["spanId"])
79
+ if span_key not in self.buffer_span_keys:
80
+ serialized_spans.append(serialized)
81
+ self.buffer_span_keys.add(span_key)
82
+ else:
83
+ duplicates_count += 1
84
+ logger.debug(f"export() skipping duplicate span: traceId={serialized['traceId']}, spanId={serialized['spanId']}")
85
+
72
86
  self.buffer.extend(serialized_spans)
73
87
  buffer_size = len(self.buffer)
74
88
 
75
- logger.debug(
76
- f"export() added {len(spans)} span(s) to buffer. "
77
- f"Total buffered: {buffer_size}"
78
- )
89
+ if duplicates_count > 0:
90
+ logger.debug(
91
+ f"export() added {len(serialized_spans)} span(s) to buffer, skipped {duplicates_count} duplicate(s). "
92
+ f"Total buffered: {buffer_size}"
93
+ )
94
+ else:
95
+ logger.debug(
96
+ f"export() added {len(spans)} span(s) to buffer. "
97
+ f"Total buffered: {buffer_size}"
98
+ )
79
99
 
80
100
  return SpanExportResult.SUCCESS
81
101
 
@@ -138,8 +158,8 @@ class AIQASpanExporter(SpanExporter):
138
158
  "duration": self._time_to_tuple(span.end_time - span.start_time) if span.end_time else None,
139
159
  "ended": span.end_time is not None,
140
160
  "instrumentationLibrary": {
141
- "name": span.instrumentation_info.name if hasattr(span, "instrumentation_info") else "",
142
- "version": span.instrumentation_info.version if hasattr(span, "instrumentation_info") else None,
161
+ "name": self._get_instrumentation_name(),
162
+ "version": self._get_instrumentation_version(),
143
163
  },
144
164
  }
145
165
 
@@ -148,19 +168,93 @@ class AIQASpanExporter(SpanExporter):
148
168
  seconds = int(nanoseconds // 1_000_000_000)
149
169
  nanos = int(nanoseconds % 1_000_000_000)
150
170
  return (seconds, nanos)
171
+
172
+ def _get_instrumentation_name(self) -> str:
173
+ """Get instrumentation library name - always 'aiqa-tracer'."""
174
+ from .client import AIQA_TRACER_NAME
175
+ return AIQA_TRACER_NAME
176
+
177
+ def _get_instrumentation_version(self) -> Optional[str]:
178
+ """Get instrumentation library version from __version__."""
179
+ try:
180
+ from . import __version__
181
+ return __version__
182
+ except (ImportError, AttributeError):
183
+ return None
184
+
185
+ def _build_request_headers(self) -> Dict[str, str]:
186
+ """Build HTTP headers for span requests."""
187
+ headers = {"Content-Type": "application/json"}
188
+ if self.api_key:
189
+ headers["Authorization"] = f"ApiKey {self.api_key}"
190
+ return headers
191
+
192
+ def _get_span_url(self) -> str:
193
+ """Get the URL for sending spans."""
194
+ if not self.server_url:
195
+ raise ValueError("AIQA_SERVER_URL is not set. Cannot send spans to server.")
196
+ return f"{self.server_url}/span"
197
+
198
+ def _is_interpreter_shutdown_error(self, error: Exception) -> bool:
199
+ """Check if error is due to interpreter shutdown."""
200
+ error_str = str(error)
201
+ return "cannot schedule new futures after" in error_str or "interpreter shutdown" in error_str
202
+
203
+ def _extract_spans_from_buffer(self) -> List[Dict[str, Any]]:
204
+ """Extract spans from buffer (thread-safe). Returns copy of buffer."""
205
+ with self.buffer_lock:
206
+ return self.buffer[:]
207
+
208
+ def _extract_and_remove_spans_from_buffer(self) -> List[Dict[str, Any]]:
209
+ """
210
+ Atomically extract and remove all spans from buffer (thread-safe).
211
+ Returns the extracted spans. This prevents race conditions where spans
212
+ are added between extraction and clearing.
213
+ Note: Does NOT clear buffer_span_keys - that should be done after successful send
214
+ to avoid unnecessary clearing/rebuilding on failures.
215
+ """
216
+ with self.buffer_lock:
217
+ spans = self.buffer[:]
218
+ self.buffer.clear()
219
+ return spans
220
+
221
+ def _remove_span_keys_from_tracking(self, spans: List[Dict[str, Any]]) -> None:
222
+ """
223
+ Remove span keys from tracking set (thread-safe). Called after successful send.
224
+ """
225
+ with self.buffer_lock:
226
+ for span in spans:
227
+ span_key = (span["traceId"], span["spanId"])
228
+ self.buffer_span_keys.discard(span_key)
229
+
230
+ def _prepend_spans_to_buffer(self, spans: List[Dict[str, Any]]) -> None:
231
+ """
232
+ Prepend spans back to buffer (thread-safe). Used to restore spans
233
+ if sending fails. Rebuilds the span keys tracking set.
234
+ """
235
+ with self.buffer_lock:
236
+ self.buffer[:0] = spans
237
+ # Rebuild span keys set from current buffer contents
238
+ self.buffer_span_keys = {(span["traceId"], span["spanId"]) for span in self.buffer}
239
+
240
+ def _clear_buffer(self) -> None:
241
+ """Clear the buffer (thread-safe)."""
242
+ with self.buffer_lock:
243
+ self.buffer.clear()
244
+ self.buffer_span_keys.clear()
151
245
 
152
246
  async def flush(self) -> None:
153
247
  """
154
248
  Flush buffered spans to the server. Thread-safe: ensures only one flush operation runs at a time.
249
+ Atomically extracts spans to prevent race conditions with concurrent export() calls.
155
250
  """
156
251
  logger.debug("flush() called - attempting to acquire flush lock")
157
252
  with self.flush_lock:
158
253
  logger.debug("flush() acquired flush lock")
159
- # Get current buffer and clear it atomically
160
- with self.buffer_lock:
161
- spans_to_flush = self.buffer[:]
162
- self.buffer.clear()
163
- logger.debug(f"flush() extracted {len(spans_to_flush)} span(s) from buffer")
254
+ # Atomically extract and remove spans to prevent race conditions
255
+ # where export() adds spans between extraction and clearing
256
+ spans_to_flush = self._extract_and_remove_spans_from_buffer()
257
+ logger.debug(f"flush() extracted {len(spans_to_flush)} span(s) from buffer")
164
258
 
165
259
  if not spans_to_flush:
166
260
  logger.debug("flush() completed: no spans to flush")
@@ -171,14 +265,36 @@ class AIQASpanExporter(SpanExporter):
171
265
  logger.warning(
172
266
  f"Skipping flush: AIQA_SERVER_URL is not set. {len(spans_to_flush)} span(s) will not be sent."
173
267
  )
268
+ # Spans already removed from buffer, clear their keys to free memory
269
+ self._remove_span_keys_from_tracking(spans_to_flush)
174
270
  return
175
271
 
176
272
  logger.info(f"flush() sending {len(spans_to_flush)} span(s) to server")
177
273
  try:
178
274
  await self._send_spans(spans_to_flush)
179
275
  logger.info(f"flush() successfully sent {len(spans_to_flush)} span(s) to server")
276
+ # Spans already removed from buffer during extraction
277
+ # Now clear their keys from tracking set to free memory
278
+ self._remove_span_keys_from_tracking(spans_to_flush)
279
+ except RuntimeError as error:
280
+ if self._is_interpreter_shutdown_error(error):
281
+ if self.shutdown_requested:
282
+ logger.debug(f"flush() skipped due to interpreter shutdown: {error}")
283
+ # Put spans back for retry with sync send during shutdown
284
+ self._prepend_spans_to_buffer(spans_to_flush)
285
+ else:
286
+ logger.warning(f"flush() interrupted by interpreter shutdown: {error}")
287
+ # Put spans back for retry
288
+ self._prepend_spans_to_buffer(spans_to_flush)
289
+ raise
290
+ logger.error(f"Error flushing spans to server: {error}")
291
+ # Put spans back for retry
292
+ self._prepend_spans_to_buffer(spans_to_flush)
293
+ raise
180
294
  except Exception as error:
181
- logger.error(f"Error flushing spans to server: {error}", exc_info=True)
295
+ logger.error(f"Error flushing spans to server: {error}")
296
+ # Put spans back for retry
297
+ self._prepend_spans_to_buffer(spans_to_flush)
182
298
  if self.shutdown_requested:
183
299
  raise
184
300
 
@@ -205,23 +321,23 @@ class AIQASpanExporter(SpanExporter):
205
321
  logger.debug(f"Auto-flush cycle #{cycle_count} completed, sleeping {self.flush_interval_ms / 1000.0}s")
206
322
  time.sleep(self.flush_interval_ms / 1000.0)
207
323
  except Exception as e:
208
- logger.error(f"Error in auto-flush cycle #{cycle_count}: {e}", exc_info=True)
324
+ logger.error(f"Error in auto-flush cycle #{cycle_count}: {e}")
209
325
  logger.debug(f"Auto-flush cycle #{cycle_count} error handled, sleeping {self.flush_interval_ms / 1000.0}s")
210
326
  time.sleep(self.flush_interval_ms / 1000.0)
211
327
 
212
328
  logger.info(f"Auto-flush worker thread stopping (shutdown requested). Completed {cycle_count} cycles.")
213
329
 
214
- # Final flush on shutdown
215
- if self.shutdown_requested:
216
- logger.info("Performing final flush on shutdown")
217
- try:
218
- loop.run_until_complete(self.flush())
219
- logger.info("Final flush completed successfully")
220
- except Exception as e:
221
- logger.error(f"Error in final flush: {e}", exc_info=True)
222
- finally:
330
+ # Don't do final flush here - shutdown() will handle it with synchronous send
331
+ # This avoids event loop shutdown issues
332
+ logger.debug("Auto-flush thread skipping final flush (will be handled by shutdown() with sync send)")
333
+
334
+ # Close the event loop
335
+ try:
336
+ if not loop.is_closed():
223
337
  loop.close()
224
- logger.debug("Auto-flush worker thread event loop closed")
338
+ logger.debug("Auto-flush worker thread event loop closed")
339
+ except Exception:
340
+ pass # Ignore errors during cleanup
225
341
 
226
342
  flush_thread = threading.Thread(target=flush_worker, daemon=True, name="AIQA-AutoFlush")
227
343
  flush_thread.start()
@@ -229,32 +345,25 @@ class AIQASpanExporter(SpanExporter):
229
345
  logger.info(f"Auto-flush thread started: {flush_thread.name} (daemon={flush_thread.daemon})")
230
346
 
231
347
  async def _send_spans(self, spans: List[Dict[str, Any]]) -> None:
232
- """Send spans to the server API."""
233
- if not self.server_url:
234
- raise ValueError("AIQA_SERVER_URL is not set. Cannot send spans to server.")
235
-
348
+ """Send spans to the server API (async)."""
236
349
  import aiohttp
237
350
 
238
- url = f"{self.server_url}/span"
351
+ url = self._get_span_url()
352
+ headers = self._build_request_headers()
239
353
  logger.debug(f"_send_spans() sending {len(spans)} spans to {url}")
240
-
241
- headers = {
242
- "Content-Type": "application/json",
243
- }
244
354
  if self.api_key:
245
- headers["Authorization"] = f"ApiKey {self.api_key[:10]}..." # Log partial key for security
246
355
  logger.debug("_send_spans() using API key authentication")
247
356
  else:
248
357
  logger.debug("_send_spans() no API key provided")
249
358
 
250
359
  try:
360
+ # Pre-serialize JSON to bytes and wrap in BytesIO to avoid blocking event loop
361
+ json_bytes = json.dumps(spans).encode('utf-8')
362
+ data = io.BytesIO(json_bytes)
363
+
251
364
  async with aiohttp.ClientSession() as session:
252
365
  logger.debug(f"_send_spans() POST request starting to {url}")
253
- async with session.post(
254
- url,
255
- json=spans,
256
- headers=headers,
257
- ) as response:
366
+ async with session.post(url, data=data, headers=headers) as response:
258
367
  logger.debug(f"_send_spans() received response: status={response.status}")
259
368
  if not response.ok:
260
369
  error_text = await response.text()
@@ -266,8 +375,46 @@ class AIQASpanExporter(SpanExporter):
266
375
  f"Failed to send spans: {response.status} {response.reason} - {error_text}"
267
376
  )
268
377
  logger.debug(f"_send_spans() successfully sent {len(spans)} spans")
378
+ except RuntimeError as e:
379
+ if self._is_interpreter_shutdown_error(e):
380
+ if self.shutdown_requested:
381
+ logger.debug(f"_send_spans() skipped due to interpreter shutdown: {e}")
382
+ else:
383
+ logger.warning(f"_send_spans() interrupted by interpreter shutdown: {e}")
384
+ raise
385
+ logger.error(f"_send_spans() RuntimeError: {type(e).__name__}: {e}")
386
+ raise
269
387
  except Exception as e:
270
- logger.error(f"_send_spans() exception: {type(e).__name__}: {e}", exc_info=True)
388
+ logger.error(f"_send_spans() exception: {type(e).__name__}: {e}")
389
+ raise
390
+
391
+ def _send_spans_sync(self, spans: List[Dict[str, Any]]) -> None:
392
+ """Send spans to the server API (synchronous, for shutdown scenarios)."""
393
+ import requests
394
+
395
+ url = self._get_span_url()
396
+ headers = self._build_request_headers()
397
+ logger.debug(f"_send_spans_sync() sending {len(spans)} spans to {url}")
398
+ if self.api_key:
399
+ logger.debug("_send_spans_sync() using API key authentication")
400
+ else:
401
+ logger.debug("_send_spans_sync() no API key provided")
402
+
403
+ try:
404
+ response = requests.post(url, json=spans, headers=headers, timeout=10.0)
405
+ logger.debug(f"_send_spans_sync() received response: status={response.status_code}")
406
+ if not response.ok:
407
+ error_text = response.text[:200] if response.text else ""
408
+ logger.error(
409
+ f"_send_spans_sync() failed: status={response.status_code}, "
410
+ f"reason={response.reason}, error={error_text}"
411
+ )
412
+ raise Exception(
413
+ f"Failed to send spans: {response.status_code} {response.reason} - {error_text}"
414
+ )
415
+ logger.debug(f"_send_spans_sync() successfully sent {len(spans)} spans")
416
+ except Exception as e:
417
+ logger.error(f"_send_spans_sync() exception: {type(e).__name__}: {e}")
271
418
  raise
272
419
 
273
420
  def shutdown(self) -> None:
@@ -291,24 +438,36 @@ class AIQASpanExporter(SpanExporter):
291
438
  else:
292
439
  logger.debug("shutdown() no active auto-flush thread to wait for")
293
440
 
294
- # Final flush attempt (synchronous)
295
- import asyncio
296
- try:
297
- loop = asyncio.get_event_loop()
298
- if loop.is_running():
299
- logger.debug("shutdown() event loop is running, using ThreadPoolExecutor for final flush")
300
- # If loop is running, schedule flush
301
- import concurrent.futures
302
- with concurrent.futures.ThreadPoolExecutor() as executor:
303
- future = executor.submit(asyncio.run, self.flush())
304
- future.result(timeout=10.0)
441
+ # Final flush attempt (use synchronous send to avoid event loop issues)
442
+ with self.flush_lock:
443
+ logger.debug("shutdown() performing final flush with synchronous send")
444
+ # Atomically extract and remove spans to prevent race conditions
445
+ spans_to_flush = self._extract_and_remove_spans_from_buffer()
446
+ logger.debug(f"shutdown() extracted {len(spans_to_flush)} span(s) from buffer for final flush")
447
+
448
+ if spans_to_flush:
449
+ if not self.server_url:
450
+ logger.warning(
451
+ f"shutdown() skipping final flush: AIQA_SERVER_URL is not set. "
452
+ f"{len(spans_to_flush)} span(s) will not be sent."
453
+ )
454
+ # Spans already removed from buffer, clear their keys to free memory
455
+ self._remove_span_keys_from_tracking(spans_to_flush)
456
+ else:
457
+ logger.info(f"shutdown() sending {len(spans_to_flush)} span(s) to server (synchronous)")
458
+ try:
459
+ self._send_spans_sync(spans_to_flush)
460
+ logger.info(f"shutdown() successfully sent {len(spans_to_flush)} span(s) to server")
461
+ # Spans already removed from buffer during extraction
462
+ # Clear their keys from tracking set to free memory
463
+ self._remove_span_keys_from_tracking(spans_to_flush)
464
+ except Exception as e:
465
+ logger.error(f"shutdown() failed to send spans: {e}")
466
+ # Spans already removed, but process is exiting anyway
467
+ logger.warning(f"shutdown() {len(spans_to_flush)} span(s) were not sent due to error")
468
+ # Keys will remain in tracking set, but process is exiting so memory will be freed
305
469
  else:
306
- logger.debug("shutdown() event loop exists but not running, using run_until_complete")
307
- loop.run_until_complete(self.flush())
308
- except RuntimeError:
309
- # No event loop, create one
310
- logger.debug("shutdown() no event loop found, creating new one for final flush")
311
- asyncio.run(self.flush())
470
+ logger.debug("shutdown() no spans to flush")
312
471
 
313
472
  # Check buffer state after shutdown
314
473
  with self.buffer_lock:
aiqa/client.py ADDED
@@ -0,0 +1,170 @@
1
+ # aiqa/client.py
2
+ import os
3
+ import logging
4
+ from functools import lru_cache
5
+ from opentelemetry import trace
6
+ from opentelemetry.sdk.trace import TracerProvider
7
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
8
+
9
+ logger = logging.getLogger("AIQA")
10
+
11
+ # Compatibility import for TraceIdRatioBased sampler
12
+ # In older OpenTelemetry versions it was TraceIdRatioBasedSampler
13
+ # In newer versions (>=1.24.0) it's TraceIdRatioBased
14
+ TraceIdRatioBased = None
15
+ try:
16
+ from opentelemetry.sdk.trace.sampling import TraceIdRatioBased
17
+ except ImportError:
18
+ try:
19
+ from opentelemetry.sdk.trace.sampling import TraceIdRatioBasedSampler as TraceIdRatioBased
20
+ except ImportError:
21
+ logger.warning(
22
+ "Could not import TraceIdRatioBased or TraceIdRatioBasedSampler from "
23
+ "opentelemetry.sdk.trace.sampling. AIQA tracing may not work correctly. "
24
+ "Please ensure opentelemetry-sdk>=1.24.0 is installed. "
25
+ "Try: pip install --upgrade opentelemetry-sdk"
26
+ )
27
+ # Set to None so we can check later
28
+ TraceIdRatioBased = None
29
+
30
+ from .aiqa_exporter import AIQASpanExporter
31
+
32
+ AIQA_TRACER_NAME = "aiqa-tracer"
33
+
34
+ client = {
35
+ "provider": None,
36
+ "exporter": None,
37
+ }
38
+
39
+ # Component tag to add to all spans (can be set via AIQA_COMPONENT_TAG env var or programmatically)
40
+ _component_tag: str = ""
41
+
42
+
43
+ def get_component_tag() -> str:
44
+ """Get the current component tag."""
45
+ return _component_tag
46
+
47
+
48
+ def set_component_tag(tag: str | None) -> None:
49
+ """Set the component tag programmatically (overrides environment variable)."""
50
+ global _component_tag
51
+ _component_tag = tag or ""
52
+
53
+
54
+ @lru_cache(maxsize=1)
55
+ def get_aiqa_client():
56
+ """
57
+ Initialize and return the AIQA client.
58
+
59
+ This function must be called before using any AIQA tracing functionality to ensure
60
+ that environment variables (such as AIQA_SERVER_URL, AIQA_API_KEY, AIQA_COMPONENT_TAG)
61
+ are properly loaded and the tracing system is initialized.
62
+
63
+ The function is idempotent - calling it multiple times is safe and will only
64
+ initialize once.
65
+
66
+ Example:
67
+ from aiqa import get_aiqa_client, WithTracing
68
+
69
+ # Initialize client (loads env vars)
70
+ get_aiqa_client()
71
+
72
+ @WithTracing
73
+ def my_function():
74
+ pass
75
+ """
76
+ global client
77
+ try:
78
+ _init_tracing()
79
+ except Exception as e:
80
+ logger.error(f"Failed to initialize AIQA tracing: {e}")
81
+ logger.warning("AIQA tracing is disabled. Your application will continue to run without tracing.")
82
+ # optionally return a richer client object; for now you just need init
83
+ return client
84
+
85
+ def _init_tracing():
86
+ """Initialize tracing system and load configuration from environment variables."""
87
+ try:
88
+ # Initialize component tag from environment variable
89
+ set_component_tag(os.getenv("AIQA_COMPONENT_TAG", None))
90
+
91
+ provider = trace.get_tracer_provider()
92
+
93
+ # Get sampling rate from environment (default: 1.0 = sample all)
94
+ sampling_rate = 1.0
95
+ if env_rate := os.getenv("AIQA_SAMPLING_RATE"):
96
+ try:
97
+ rate = float(env_rate)
98
+ sampling_rate = max(0.0, min(1.0, rate)) # Clamp to [0, 1]
99
+ except ValueError:
100
+ logger.warning(f"Invalid AIQA_SAMPLING_RATE value '{env_rate}', using default 1.0")
101
+
102
+ # If it's still the default proxy, install a real SDK provider
103
+ if not isinstance(provider, TracerProvider):
104
+ if TraceIdRatioBased is None:
105
+ raise ImportError(
106
+ "TraceIdRatioBased sampler is not available. "
107
+ "Please install opentelemetry-sdk>=1.24.0"
108
+ )
109
+
110
+ # Create sampler based on trace-id for deterministic sampling
111
+ sampler = TraceIdRatioBased(sampling_rate)
112
+ provider = TracerProvider(sampler=sampler)
113
+ trace.set_tracer_provider(provider)
114
+
115
+ # Idempotently add your processor
116
+ _attach_aiqa_processor(provider)
117
+ global client
118
+ client["provider"] = provider
119
+
120
+ # Log successful initialization
121
+ server_url = os.getenv("AIQA_SERVER_URL", "not configured")
122
+ logger.info(f"AIQA initialized and tracing (sampling rate: {sampling_rate:.2f}, server: {server_url})")
123
+
124
+ except Exception as e:
125
+ logger.error(f"Error initializing AIQA tracing: {e}")
126
+ raise
127
+
128
+ def _attach_aiqa_processor(provider: TracerProvider):
129
+ """Attach AIQA span processor to the provider. Idempotent - safe to call multiple times."""
130
+ try:
131
+ # Avoid double-adding if get_aiqa_client() is called multiple times
132
+ for p in provider._active_span_processor._span_processors:
133
+ if isinstance(getattr(p, "exporter", None), AIQASpanExporter):
134
+ logger.debug("AIQA span processor already attached, skipping")
135
+ return
136
+
137
+ exporter = AIQASpanExporter(
138
+ server_url=os.getenv("AIQA_SERVER_URL"),
139
+ api_key=os.getenv("AIQA_API_KEY"),
140
+ )
141
+ provider.add_span_processor(BatchSpanProcessor(exporter))
142
+ global client
143
+ client["exporter"] = exporter
144
+ logger.debug("AIQA span processor attached successfully")
145
+ except Exception as e:
146
+ logger.error(f"Error attaching AIQA span processor: {e}")
147
+ # Re-raise to let _init_tracing handle it - it will log and continue
148
+ raise
149
+
150
+
151
+ def get_aiqa_tracer():
152
+ """
153
+ Get the AIQA tracer with version from __init__.py __version__.
154
+ This should be used instead of trace.get_tracer() to ensure version is set.
155
+ """
156
+ try:
157
+ # Import here to avoid circular import
158
+ from . import __version__
159
+
160
+ # Compatibility: version parameter may not be supported in older OpenTelemetry versions
161
+ try:
162
+ # Try with version parameter (newer OpenTelemetry versions)
163
+ return trace.get_tracer(AIQA_TRACER_NAME, version=__version__)
164
+ except TypeError:
165
+ # Fall back to without version parameter (older versions)
166
+ return trace.get_tracer(AIQA_TRACER_NAME)
167
+ except Exception as e:
168
+ logger.error(f"Error getting AIQA tracer: {e}")
169
+ # Return a basic tracer as fallback to prevent crashes
170
+ return trace.get_tracer(AIQA_TRACER_NAME)