aiqa-client 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aiqa/__init__.py CHANGED
@@ -1,29 +1,66 @@
1
1
  """
2
2
  Python client for AIQA server - OpenTelemetry tracing decorators.
3
+
4
+ IMPORTANT: Before using any AIQA functionality, you must call get_aiqa_client() to initialize
5
+ the client and load environment variables (AIQA_SERVER_URL, AIQA_API_KEY, AIQA_COMPONENT_TAG, etc.).
6
+
7
+ Example:
8
+ from dotenv import load_dotenv
9
+ from aiqa import get_aiqa_client, WithTracing
10
+
11
+ # Load environment variables from .env file (if using one)
12
+ load_dotenv()
13
+
14
+ # Initialize client (must be called before using WithTracing or other functions)
15
+ get_aiqa_client()
16
+
17
+ @WithTracing
18
+ def my_function():
19
+ return "Hello, AIQA!"
3
20
  """
4
21
 
5
22
  from .tracing import (
6
23
  WithTracing,
7
- flush_spans,
24
+ flush_tracing,
8
25
  shutdown_tracing,
9
26
  set_span_attribute,
10
27
  set_span_name,
11
28
  get_active_span,
12
- provider,
13
- exporter,
29
+ get_provider,
30
+ get_exporter,
31
+ get_trace_id,
32
+ get_span_id,
33
+ create_span_from_trace_id,
34
+ inject_trace_context,
35
+ extract_trace_context,
36
+ set_conversation_id,
37
+ set_component_tag,
38
+ get_span,
14
39
  )
40
+ from .client import get_aiqa_client
41
+ from .experiment_runner import ExperimentRunner
15
42
 
16
- __version__ = "0.1.0"
43
+ __version__ = "0.1.2"
17
44
 
18
45
  __all__ = [
19
46
  "WithTracing",
20
- "flush_spans",
47
+ "flush_tracing",
21
48
  "shutdown_tracing",
22
49
  "set_span_attribute",
23
50
  "set_span_name",
24
51
  "get_active_span",
25
- "provider",
26
- "exporter",
52
+ "get_provider",
53
+ "get_exporter",
54
+ "get_aiqa_client",
55
+ "ExperimentRunner",
56
+ "get_trace_id",
57
+ "get_span_id",
58
+ "create_span_from_trace_id",
59
+ "inject_trace_context",
60
+ "extract_trace_context",
61
+ "set_conversation_id",
62
+ "set_component_tag",
63
+ "get_span",
27
64
  "__version__",
28
65
  ]
29
66
 
aiqa/aiqa_exporter.py CHANGED
@@ -8,11 +8,12 @@ import json
8
8
  import logging
9
9
  import threading
10
10
  import time
11
+ import io
11
12
  from typing import List, Dict, Any, Optional
12
13
  from opentelemetry.sdk.trace import ReadableSpan
13
14
  from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
14
15
 
15
- logger = logging.getLogger(__name__)
16
+ logger = logging.getLogger("AIQA")
16
17
 
17
18
 
18
19
  class AIQASpanExporter(SpanExporter):
@@ -39,10 +40,16 @@ class AIQASpanExporter(SpanExporter):
39
40
  self._api_key = api_key
40
41
  self.flush_interval_ms = flush_interval_seconds * 1000
41
42
  self.buffer: List[Dict[str, Any]] = []
43
+ self.buffer_span_keys: set = set() # Track (traceId, spanId) tuples to prevent duplicates (Python 3.8 compatible)
42
44
  self.buffer_lock = threading.Lock()
43
45
  self.flush_lock = threading.Lock()
44
46
  self.shutdown_requested = False
45
47
  self.flush_timer: Optional[threading.Thread] = None
48
+
49
+ logger.info(
50
+ f"Initializing AIQASpanExporter: server_url={self.server_url or 'not set'}, "
51
+ f"flush_interval={flush_interval_seconds}s"
52
+ )
46
53
  self._start_auto_flush()
47
54
 
48
55
  @property
@@ -56,14 +63,39 @@ class AIQASpanExporter(SpanExporter):
56
63
  def export(self, spans: List[ReadableSpan]) -> SpanExportResult:
57
64
  """
58
65
  Export spans to the AIQA server. Adds spans to buffer for async flushing.
66
+ Deduplicates spans based on (traceId, spanId) to prevent repeated exports.
59
67
  """
60
68
  if not spans:
69
+ logger.debug("export() called with empty spans list")
61
70
  return SpanExportResult.SUCCESS
62
-
63
- # Serialize and add to buffer
71
+ logger.debug(f"AIQA export() called with {len(spans)} spans")
72
+ # Serialize and add to buffer, deduplicating by (traceId, spanId)
64
73
  with self.buffer_lock:
65
- serialized_spans = [self._serialize_span(span) for span in spans]
74
+ serialized_spans = []
75
+ duplicates_count = 0
76
+ for span in spans:
77
+ serialized = self._serialize_span(span)
78
+ span_key = (serialized["traceId"], serialized["spanId"])
79
+ if span_key not in self.buffer_span_keys:
80
+ serialized_spans.append(serialized)
81
+ self.buffer_span_keys.add(span_key)
82
+ else:
83
+ duplicates_count += 1
84
+ logger.debug(f"export() skipping duplicate span: traceId={serialized['traceId']}, spanId={serialized['spanId']}")
85
+
66
86
  self.buffer.extend(serialized_spans)
87
+ buffer_size = len(self.buffer)
88
+
89
+ if duplicates_count > 0:
90
+ logger.debug(
91
+ f"export() added {len(serialized_spans)} span(s) to buffer, skipped {duplicates_count} duplicate(s). "
92
+ f"Total buffered: {buffer_size}"
93
+ )
94
+ else:
95
+ logger.debug(
96
+ f"export() added {len(spans)} span(s) to buffer. "
97
+ f"Total buffered: {buffer_size}"
98
+ )
67
99
 
68
100
  return SpanExportResult.SUCCESS
69
101
 
@@ -126,8 +158,8 @@ class AIQASpanExporter(SpanExporter):
126
158
  "duration": self._time_to_tuple(span.end_time - span.start_time) if span.end_time else None,
127
159
  "ended": span.end_time is not None,
128
160
  "instrumentationLibrary": {
129
- "name": span.instrumentation_info.name if hasattr(span, "instrumentation_info") else "",
130
- "version": span.instrumentation_info.version if hasattr(span, "instrumentation_info") else None,
161
+ "name": self._get_instrumentation_name(),
162
+ "version": self._get_instrumentation_version(),
131
163
  },
132
164
  }
133
165
 
@@ -136,18 +168,96 @@ class AIQASpanExporter(SpanExporter):
136
168
  seconds = int(nanoseconds // 1_000_000_000)
137
169
  nanos = int(nanoseconds % 1_000_000_000)
138
170
  return (seconds, nanos)
171
+
172
+ def _get_instrumentation_name(self) -> str:
173
+ """Get instrumentation library name - always 'aiqa-tracer'."""
174
+ from .client import AIQA_TRACER_NAME
175
+ return AIQA_TRACER_NAME
176
+
177
+ def _get_instrumentation_version(self) -> Optional[str]:
178
+ """Get instrumentation library version from __version__."""
179
+ try:
180
+ from . import __version__
181
+ return __version__
182
+ except (ImportError, AttributeError):
183
+ return None
184
+
185
+ def _build_request_headers(self) -> Dict[str, str]:
186
+ """Build HTTP headers for span requests."""
187
+ headers = {"Content-Type": "application/json"}
188
+ if self.api_key:
189
+ headers["Authorization"] = f"ApiKey {self.api_key}"
190
+ return headers
191
+
192
+ def _get_span_url(self) -> str:
193
+ """Get the URL for sending spans."""
194
+ if not self.server_url:
195
+ raise ValueError("AIQA_SERVER_URL is not set. Cannot send spans to server.")
196
+ return f"{self.server_url}/span"
197
+
198
+ def _is_interpreter_shutdown_error(self, error: Exception) -> bool:
199
+ """Check if error is due to interpreter shutdown."""
200
+ error_str = str(error)
201
+ return "cannot schedule new futures after" in error_str or "interpreter shutdown" in error_str
202
+
203
+ def _extract_spans_from_buffer(self) -> List[Dict[str, Any]]:
204
+ """Extract spans from buffer (thread-safe). Returns copy of buffer."""
205
+ with self.buffer_lock:
206
+ return self.buffer[:]
207
+
208
+ def _extract_and_remove_spans_from_buffer(self) -> List[Dict[str, Any]]:
209
+ """
210
+ Atomically extract and remove all spans from buffer (thread-safe).
211
+ Returns the extracted spans. This prevents race conditions where spans
212
+ are added between extraction and clearing.
213
+ Note: Does NOT clear buffer_span_keys - that should be done after successful send
214
+ to avoid unnecessary clearing/rebuilding on failures.
215
+ """
216
+ with self.buffer_lock:
217
+ spans = self.buffer[:]
218
+ self.buffer.clear()
219
+ return spans
220
+
221
+ def _remove_span_keys_from_tracking(self, spans: List[Dict[str, Any]]) -> None:
222
+ """
223
+ Remove span keys from tracking set (thread-safe). Called after successful send.
224
+ """
225
+ with self.buffer_lock:
226
+ for span in spans:
227
+ span_key = (span["traceId"], span["spanId"])
228
+ self.buffer_span_keys.discard(span_key)
229
+
230
+ def _prepend_spans_to_buffer(self, spans: List[Dict[str, Any]]) -> None:
231
+ """
232
+ Prepend spans back to buffer (thread-safe). Used to restore spans
233
+ if sending fails. Rebuilds the span keys tracking set.
234
+ """
235
+ with self.buffer_lock:
236
+ self.buffer[:0] = spans
237
+ # Rebuild span keys set from current buffer contents
238
+ self.buffer_span_keys = {(span["traceId"], span["spanId"]) for span in self.buffer}
239
+
240
+ def _clear_buffer(self) -> None:
241
+ """Clear the buffer (thread-safe)."""
242
+ with self.buffer_lock:
243
+ self.buffer.clear()
244
+ self.buffer_span_keys.clear()
139
245
 
140
246
  async def flush(self) -> None:
141
247
  """
142
248
  Flush buffered spans to the server. Thread-safe: ensures only one flush operation runs at a time.
249
+ Atomically extracts spans to prevent race conditions with concurrent export() calls.
143
250
  """
251
+ logger.debug("flush() called - attempting to acquire flush lock")
144
252
  with self.flush_lock:
145
- # Get current buffer and clear it atomically
146
- with self.buffer_lock:
147
- spans_to_flush = self.buffer[:]
148
- self.buffer.clear()
253
+ logger.debug("flush() acquired flush lock")
254
+ # Atomically extract and remove spans to prevent race conditions
255
+ # where export() adds spans between extraction and clearing
256
+ spans_to_flush = self._extract_and_remove_spans_from_buffer()
257
+ logger.debug(f"flush() extracted {len(spans_to_flush)} span(s) from buffer")
149
258
 
150
259
  if not spans_to_flush:
260
+ logger.debug("flush() completed: no spans to flush")
151
261
  return
152
262
 
153
263
  # Skip sending if server URL is not configured
@@ -155,94 +265,217 @@ class AIQASpanExporter(SpanExporter):
155
265
  logger.warning(
156
266
  f"Skipping flush: AIQA_SERVER_URL is not set. {len(spans_to_flush)} span(s) will not be sent."
157
267
  )
268
+ # Spans already removed from buffer, clear their keys to free memory
269
+ self._remove_span_keys_from_tracking(spans_to_flush)
158
270
  return
159
271
 
272
+ logger.info(f"flush() sending {len(spans_to_flush)} span(s) to server")
160
273
  try:
161
274
  await self._send_spans(spans_to_flush)
275
+ logger.info(f"flush() successfully sent {len(spans_to_flush)} span(s) to server")
276
+ # Spans already removed from buffer during extraction
277
+ # Now clear their keys from tracking set to free memory
278
+ self._remove_span_keys_from_tracking(spans_to_flush)
279
+ except RuntimeError as error:
280
+ if self._is_interpreter_shutdown_error(error):
281
+ if self.shutdown_requested:
282
+ logger.debug(f"flush() skipped due to interpreter shutdown: {error}")
283
+ # Put spans back for retry with sync send during shutdown
284
+ self._prepend_spans_to_buffer(spans_to_flush)
285
+ else:
286
+ logger.warning(f"flush() interrupted by interpreter shutdown: {error}")
287
+ # Put spans back for retry
288
+ self._prepend_spans_to_buffer(spans_to_flush)
289
+ raise
290
+ logger.error(f"Error flushing spans to server: {error}")
291
+ # Put spans back for retry
292
+ self._prepend_spans_to_buffer(spans_to_flush)
293
+ raise
162
294
  except Exception as error:
163
295
  logger.error(f"Error flushing spans to server: {error}")
296
+ # Put spans back for retry
297
+ self._prepend_spans_to_buffer(spans_to_flush)
164
298
  if self.shutdown_requested:
165
299
  raise
166
300
 
167
301
  def _start_auto_flush(self) -> None:
168
302
  """Start the auto-flush timer."""
169
303
  if self.shutdown_requested:
304
+ logger.warning("_start_auto_flush() called but shutdown already requested")
170
305
  return
171
306
 
307
+ logger.info(f"Starting auto-flush thread with interval {self.flush_interval_ms / 1000.0}s")
308
+
172
309
  def flush_worker():
173
310
  import asyncio
311
+ logger.debug("Auto-flush worker thread started")
174
312
  loop = asyncio.new_event_loop()
175
313
  asyncio.set_event_loop(loop)
176
314
 
315
+ cycle_count = 0
177
316
  while not self.shutdown_requested:
317
+ cycle_count += 1
318
+ logger.debug(f"Auto-flush cycle #{cycle_count} starting")
178
319
  try:
179
320
  loop.run_until_complete(self.flush())
321
+ logger.debug(f"Auto-flush cycle #{cycle_count} completed, sleeping {self.flush_interval_ms / 1000.0}s")
180
322
  time.sleep(self.flush_interval_ms / 1000.0)
181
323
  except Exception as e:
182
- logger.error(f"Error in auto-flush: {e}")
324
+ logger.error(f"Error in auto-flush cycle #{cycle_count}: {e}")
325
+ logger.debug(f"Auto-flush cycle #{cycle_count} error handled, sleeping {self.flush_interval_ms / 1000.0}s")
183
326
  time.sleep(self.flush_interval_ms / 1000.0)
184
327
 
185
- # Final flush on shutdown
186
- if self.shutdown_requested:
187
- try:
188
- loop.run_until_complete(self.flush())
189
- except Exception as e:
190
- logger.error(f"Error in final flush: {e}")
191
- finally:
328
+ logger.info(f"Auto-flush worker thread stopping (shutdown requested). Completed {cycle_count} cycles.")
329
+
330
+ # Don't do final flush here - shutdown() will handle it with synchronous send
331
+ # This avoids event loop shutdown issues
332
+ logger.debug("Auto-flush thread skipping final flush (will be handled by shutdown() with sync send)")
333
+
334
+ # Close the event loop
335
+ try:
336
+ if not loop.is_closed():
192
337
  loop.close()
338
+ logger.debug("Auto-flush worker thread event loop closed")
339
+ except Exception:
340
+ pass # Ignore errors during cleanup
193
341
 
194
- flush_thread = threading.Thread(target=flush_worker, daemon=True)
342
+ flush_thread = threading.Thread(target=flush_worker, daemon=True, name="AIQA-AutoFlush")
195
343
  flush_thread.start()
196
344
  self.flush_timer = flush_thread
345
+ logger.info(f"Auto-flush thread started: {flush_thread.name} (daemon={flush_thread.daemon})")
197
346
 
198
347
  async def _send_spans(self, spans: List[Dict[str, Any]]) -> None:
199
- """Send spans to the server API."""
200
- if not self.server_url:
201
- raise ValueError("AIQA_SERVER_URL is not set. Cannot send spans to server.")
202
-
348
+ """Send spans to the server API (async)."""
203
349
  import aiohttp
204
350
 
205
- logger.debug(f"Sending {len(spans)} spans to server: {self.server_url}")
351
+ url = self._get_span_url()
352
+ headers = self._build_request_headers()
353
+ logger.debug(f"_send_spans() sending {len(spans)} spans to {url}")
354
+ if self.api_key:
355
+ logger.debug("_send_spans() using API key authentication")
356
+ else:
357
+ logger.debug("_send_spans() no API key provided")
206
358
 
207
- headers = {
208
- "Content-Type": "application/json",
209
- }
359
+ try:
360
+ # Pre-serialize JSON to bytes and wrap in BytesIO to avoid blocking event loop
361
+ json_bytes = json.dumps(spans).encode('utf-8')
362
+ data = io.BytesIO(json_bytes)
363
+
364
+ async with aiohttp.ClientSession() as session:
365
+ logger.debug(f"_send_spans() POST request starting to {url}")
366
+ async with session.post(url, data=data, headers=headers) as response:
367
+ logger.debug(f"_send_spans() received response: status={response.status}")
368
+ if not response.ok:
369
+ error_text = await response.text()
370
+ logger.error(
371
+ f"_send_spans() failed: status={response.status}, "
372
+ f"reason={response.reason}, error={error_text[:200]}"
373
+ )
374
+ raise Exception(
375
+ f"Failed to send spans: {response.status} {response.reason} - {error_text}"
376
+ )
377
+ logger.debug(f"_send_spans() successfully sent {len(spans)} spans")
378
+ except RuntimeError as e:
379
+ if self._is_interpreter_shutdown_error(e):
380
+ if self.shutdown_requested:
381
+ logger.debug(f"_send_spans() skipped due to interpreter shutdown: {e}")
382
+ else:
383
+ logger.warning(f"_send_spans() interrupted by interpreter shutdown: {e}")
384
+ raise
385
+ logger.error(f"_send_spans() RuntimeError: {type(e).__name__}: {e}")
386
+ raise
387
+ except Exception as e:
388
+ logger.error(f"_send_spans() exception: {type(e).__name__}: {e}")
389
+ raise
390
+
391
+ def _send_spans_sync(self, spans: List[Dict[str, Any]]) -> None:
392
+ """Send spans to the server API (synchronous, for shutdown scenarios)."""
393
+ import requests
394
+
395
+ url = self._get_span_url()
396
+ headers = self._build_request_headers()
397
+ logger.debug(f"_send_spans_sync() sending {len(spans)} spans to {url}")
210
398
  if self.api_key:
211
- headers["Authorization"] = f"ApiKey {self.api_key}"
399
+ logger.debug("_send_spans_sync() using API key authentication")
400
+ else:
401
+ logger.debug("_send_spans_sync() no API key provided")
212
402
 
213
- async with aiohttp.ClientSession() as session:
214
- async with session.post(
215
- f"{self.server_url}/span",
216
- json=spans,
217
- headers=headers,
218
- ) as response:
219
- if not response.ok:
220
- error_text = await response.text()
221
- raise Exception(
222
- f"Failed to send spans: {response.status} {response.reason} - {error_text}"
223
- )
403
+ try:
404
+ response = requests.post(url, json=spans, headers=headers, timeout=10.0)
405
+ logger.debug(f"_send_spans_sync() received response: status={response.status_code}")
406
+ if not response.ok:
407
+ error_text = response.text[:200] if response.text else ""
408
+ logger.error(
409
+ f"_send_spans_sync() failed: status={response.status_code}, "
410
+ f"reason={response.reason}, error={error_text}"
411
+ )
412
+ raise Exception(
413
+ f"Failed to send spans: {response.status_code} {response.reason} - {error_text}"
414
+ )
415
+ logger.debug(f"_send_spans_sync() successfully sent {len(spans)} spans")
416
+ except Exception as e:
417
+ logger.error(f"_send_spans_sync() exception: {type(e).__name__}: {e}")
418
+ raise
224
419
 
225
420
  def shutdown(self) -> None:
226
421
  """Shutdown the exporter, flushing any remaining spans. Call before process exit."""
422
+ logger.info("shutdown() called - initiating exporter shutdown")
227
423
  self.shutdown_requested = True
228
424
 
425
+ # Check buffer state before shutdown
426
+ with self.buffer_lock:
427
+ buffer_size = len(self.buffer)
428
+ logger.info(f"shutdown() buffer contains {buffer_size} span(s) before shutdown")
429
+
229
430
  # Wait for flush thread to finish (it will do final flush)
230
431
  if self.flush_timer and self.flush_timer.is_alive():
432
+ logger.info("shutdown() waiting for auto-flush thread to complete (timeout=10s)")
231
433
  self.flush_timer.join(timeout=10.0)
434
+ if self.flush_timer.is_alive():
435
+ logger.warning("shutdown() auto-flush thread did not complete within timeout")
436
+ else:
437
+ logger.info("shutdown() auto-flush thread completed")
438
+ else:
439
+ logger.debug("shutdown() no active auto-flush thread to wait for")
232
440
 
233
- # Final flush attempt (synchronous)
234
- import asyncio
235
- try:
236
- loop = asyncio.get_event_loop()
237
- if loop.is_running():
238
- # If loop is running, schedule flush
239
- import concurrent.futures
240
- with concurrent.futures.ThreadPoolExecutor() as executor:
241
- future = executor.submit(asyncio.run, self.flush())
242
- future.result(timeout=10.0)
441
+ # Final flush attempt (use synchronous send to avoid event loop issues)
442
+ with self.flush_lock:
443
+ logger.debug("shutdown() performing final flush with synchronous send")
444
+ # Atomically extract and remove spans to prevent race conditions
445
+ spans_to_flush = self._extract_and_remove_spans_from_buffer()
446
+ logger.debug(f"shutdown() extracted {len(spans_to_flush)} span(s) from buffer for final flush")
447
+
448
+ if spans_to_flush:
449
+ if not self.server_url:
450
+ logger.warning(
451
+ f"shutdown() skipping final flush: AIQA_SERVER_URL is not set. "
452
+ f"{len(spans_to_flush)} span(s) will not be sent."
453
+ )
454
+ # Spans already removed from buffer, clear their keys to free memory
455
+ self._remove_span_keys_from_tracking(spans_to_flush)
456
+ else:
457
+ logger.info(f"shutdown() sending {len(spans_to_flush)} span(s) to server (synchronous)")
458
+ try:
459
+ self._send_spans_sync(spans_to_flush)
460
+ logger.info(f"shutdown() successfully sent {len(spans_to_flush)} span(s) to server")
461
+ # Spans already removed from buffer during extraction
462
+ # Clear their keys from tracking set to free memory
463
+ self._remove_span_keys_from_tracking(spans_to_flush)
464
+ except Exception as e:
465
+ logger.error(f"shutdown() failed to send spans: {e}")
466
+ # Spans already removed, but process is exiting anyway
467
+ logger.warning(f"shutdown() {len(spans_to_flush)} span(s) were not sent due to error")
468
+ # Keys will remain in tracking set, but process is exiting so memory will be freed
243
469
  else:
244
- loop.run_until_complete(self.flush())
245
- except RuntimeError:
246
- # No event loop, create one
247
- asyncio.run(self.flush())
470
+ logger.debug("shutdown() no spans to flush")
471
+
472
+ # Check buffer state after shutdown
473
+ with self.buffer_lock:
474
+ buffer_size = len(self.buffer)
475
+ if buffer_size > 0:
476
+ logger.warning(f"shutdown() buffer still contains {buffer_size} span(s) after shutdown")
477
+ else:
478
+ logger.info("shutdown() buffer is empty after shutdown")
479
+
480
+ logger.info("shutdown() completed")
248
481