aiqa-client 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aiqa/__init__.py CHANGED
@@ -1,22 +1,29 @@
1
1
  """
2
2
  Python client for AIQA server - OpenTelemetry tracing decorators.
3
3
 
4
- IMPORTANT: Before using any AIQA functionality, you must call get_aiqa_client() to initialize
5
- the client and load environment variables (AIQA_SERVER_URL, AIQA_API_KEY, AIQA_COMPONENT_TAG, etc.).
4
+ Initialization is automatic - you don't need to call get_aiqa_client() explicitly.
5
+ The client initializes automatically when WithTracing is first used.
6
+
7
+ Set environment variables:
8
+ AIQA_SERVER_URL: URL of the AIQA server
9
+ AIQA_API_KEY: API key for authentication
10
+ AIQA_COMPONENT_TAG: Optional component identifier
11
+ AIQA_STARTUP_DELAY_SECONDS: Optional delay before first flush (default: 10s)
6
12
 
7
13
  Example:
8
14
  from dotenv import load_dotenv
9
- from aiqa import get_aiqa_client, WithTracing
15
+ from aiqa import WithTracing
10
16
 
11
17
  # Load environment variables from .env file (if using one)
12
18
  load_dotenv()
13
19
 
14
- # Initialize client (must be called before using WithTracing or other functions)
15
- get_aiqa_client()
16
-
20
+ # No explicit initialization needed - it happens automatically when used
17
21
  @WithTracing
18
22
  def my_function():
19
23
  return "Hello, AIQA!"
24
+
25
+ # Call the function - initialization happens on first use
26
+ result = my_function()
20
27
  """
21
28
 
22
29
  from .tracing import (
aiqa/aiqa_exporter.py CHANGED
@@ -68,14 +68,18 @@ class AIQASpanExporter(SpanExporter):
68
68
  self.buffer_span_keys: set = set() # Track (traceId, spanId) tuples to prevent duplicates (Python 3.8 compatible)
69
69
  self.buffer_lock = threading.Lock()
70
70
  self.flush_lock = threading.Lock()
71
+ # shutdown_requested is only set once (in shutdown()) and read many times
72
+ # No lock needed: worst case is reading stale False, which is acceptable
71
73
  self.shutdown_requested = False
72
74
  self.flush_timer: Optional[threading.Thread] = None
75
+ self._auto_flush_started = False
76
+ self._auto_flush_lock = threading.Lock() # Lock for lazy thread creation
73
77
 
74
78
  logger.info(
75
79
  f"Initializing AIQASpanExporter: server_url={self.server_url or 'not set'}, "
76
80
  f"flush_interval={flush_interval_seconds}s, startup_delay={startup_delay_seconds}s"
77
81
  )
78
- self._start_auto_flush()
82
+ # Don't start thread immediately - start lazily on first export to avoid startup issues
79
83
 
80
84
  @property
81
85
  def server_url(self) -> str:
@@ -106,6 +110,11 @@ class AIQASpanExporter(SpanExporter):
106
110
  pass
107
111
 
108
112
  logger.debug(f"AIQA export() called with {len(spans)} spans")
113
+
114
+ # Lazy initialization: start auto-flush thread on first export
115
+ # This avoids thread creation during initialization, which can cause issues in ECS deployments
116
+ self._ensure_auto_flush_started()
117
+
109
118
  # Serialize and add to buffer, deduplicating by (traceId, spanId)
110
119
  with self.buffer_lock:
111
120
  serialized_spans = []
@@ -342,6 +351,8 @@ class AIQASpanExporter(SpanExporter):
342
351
  """
343
352
  Flush buffered spans to the server. Thread-safe: ensures only one flush operation runs at a time.
344
353
  Atomically extracts spans to prevent race conditions with concurrent export() calls.
354
+
355
+ Lock ordering: flush_lock -> buffer_lock (must be consistent to avoid deadlocks)
345
356
  """
346
357
  logger.debug("flush() called - attempting to acquire flush lock")
347
358
  with self.flush_lock:
@@ -364,71 +375,88 @@ class AIQASpanExporter(SpanExporter):
364
375
  self._remove_span_keys_from_tracking(spans_to_flush)
365
376
  return
366
377
 
367
- logger.info(f"flush() sending {len(spans_to_flush)} span(s) to server")
368
- try:
369
- await self._send_spans(spans_to_flush)
370
- logger.info(f"flush() successfully sent {len(spans_to_flush)} span(s) to server")
371
- # Spans already removed from buffer during extraction
372
- # Now clear their keys from tracking set to free memory
373
- self._remove_span_keys_from_tracking(spans_to_flush)
374
- except RuntimeError as error:
375
- if self._is_interpreter_shutdown_error(error):
376
- if self.shutdown_requested:
377
- logger.debug(f"flush() skipped due to interpreter shutdown: {error}")
378
- # Put spans back for retry with sync send during shutdown
379
- self._prepend_spans_to_buffer(spans_to_flush)
380
- else:
381
- logger.warning(f"flush() interrupted by interpreter shutdown: {error}")
382
- # Put spans back for retry
383
- self._prepend_spans_to_buffer(spans_to_flush)
384
- raise
385
- logger.error(f"Error flushing spans to server: {error}")
386
- # Put spans back for retry
378
+ # Release flush_lock before I/O to avoid blocking other flush attempts
379
+ # Spans are already extracted, so concurrent exports won't interfere
380
+ logger.info(f"flush() sending {len(spans_to_flush)} span(s) to server")
381
+ try:
382
+ await self._send_spans(spans_to_flush)
383
+ logger.info(f"flush() successfully sent {len(spans_to_flush)} span(s) to server")
384
+ # Spans already removed from buffer during extraction
385
+ # Now clear their keys from tracking set to free memory
386
+ self._remove_span_keys_from_tracking(spans_to_flush)
387
+ except RuntimeError as error:
388
+ if self._is_interpreter_shutdown_error(error):
389
+ if self.shutdown_requested:
390
+ logger.debug(f"flush() skipped due to interpreter shutdown: {error}")
391
+ else:
392
+ logger.warning(f"flush() interrupted by interpreter shutdown: {error}")
393
+ # Put spans back for retry with sync send during shutdown
387
394
  self._prepend_spans_to_buffer(spans_to_flush)
388
395
  raise
389
- except Exception as error:
390
- logger.error(f"Error flushing spans to server: {error}")
391
- # Put spans back for retry
392
- self._prepend_spans_to_buffer(spans_to_flush)
393
- if self.shutdown_requested:
394
- raise
396
+ logger.error(f"Error flushing spans to server: {error}")
397
+ # Put spans back for retry
398
+ self._prepend_spans_to_buffer(spans_to_flush)
399
+ raise
400
+ except Exception as error:
401
+ logger.error(f"Error flushing spans to server: {error}")
402
+ # Put spans back for retry
403
+ self._prepend_spans_to_buffer(spans_to_flush)
404
+ if self.shutdown_requested:
405
+ raise
395
406
 
396
- def _start_auto_flush(self) -> None:
397
- """Start the auto-flush timer with startup delay."""
398
- if self.shutdown_requested:
399
- logger.warning("_start_auto_flush() called but shutdown already requested")
407
+ def _ensure_auto_flush_started(self) -> None:
408
+ """Ensure auto-flush thread is started (lazy initialization). Thread-safe."""
409
+ # Fast path: check without lock first
410
+ if self._auto_flush_started or self.shutdown_requested:
400
411
  return
401
-
402
- logger.info(
403
- f"Starting auto-flush thread with interval {self.flush_interval_ms / 1000.0}s, "
404
- f"startup delay {self.startup_delay_seconds}s"
405
- )
406
-
407
- def flush_worker():
408
- import asyncio
409
- logger.debug("Auto-flush worker thread started")
412
+
413
+ # Slow path: acquire lock and double-check
414
+ with self._auto_flush_lock:
415
+ if self._auto_flush_started or self.shutdown_requested:
416
+ return
410
417
 
411
- # Wait for startup delay before beginning flush operations
412
- # This gives the container/application time to stabilize, which helps avoid startup issues (seen with AWS ECS, Dec 2025).
413
- if self.startup_delay_seconds > 0:
414
- logger.info(f"Auto-flush waiting {self.startup_delay_seconds}s before first flush (startup delay)")
415
- # Sleep in small increments to allow for early shutdown
416
- sleep_interval = 0.5
417
- remaining_delay = self.startup_delay_seconds
418
- while remaining_delay > 0 and not self.shutdown_requested:
419
- sleep_time = min(sleep_interval, remaining_delay)
420
- time.sleep(sleep_time)
421
- remaining_delay -= sleep_time
422
-
423
- if self.shutdown_requested:
424
- logger.debug("Auto-flush startup delay interrupted by shutdown")
425
- return
426
-
427
- logger.info("Auto-flush startup delay complete, beginning flush operations")
418
+ try:
419
+ self._start_auto_flush()
420
+ self._auto_flush_started = True
421
+ except Exception as e:
422
+ logger.error(f"Failed to start auto-flush thread: {e}", exc_info=True)
423
+ # Don't raise - allow spans to be buffered even if auto-flush fails
424
+ # They can still be flushed manually or on shutdown
425
+
426
+ def _flush_worker(self) -> None:
427
+ """Worker function for auto-flush thread. Runs in a separate thread with its own event loop."""
428
+ import asyncio
429
+ logger.debug("Auto-flush worker thread started")
430
+
431
+ # Wait for startup delay before beginning flush operations
432
+ # This gives the container/application time to stabilize, which helps avoid startup issues (seen with AWS ECS, Dec 2025).
433
+ if self.startup_delay_seconds > 0:
434
+ logger.info(f"Auto-flush waiting {self.startup_delay_seconds}s before first flush (startup delay)")
435
+ # Sleep in small increments to allow for early shutdown
436
+ sleep_interval = 0.5
437
+ remaining_delay = self.startup_delay_seconds
438
+ while remaining_delay > 0 and not self.shutdown_requested:
439
+ sleep_time = min(sleep_interval, remaining_delay)
440
+ time.sleep(sleep_time)
441
+ remaining_delay -= sleep_time
428
442
 
443
+ if self.shutdown_requested:
444
+ logger.debug("Auto-flush startup delay interrupted by shutdown")
445
+ return
446
+
447
+ logger.info("Auto-flush startup delay complete, beginning flush operations")
448
+
449
+ # Create event loop in this thread (isolated from main thread's event loop)
450
+ # This prevents interference with the main application's event loop
451
+ try:
429
452
  loop = asyncio.new_event_loop()
430
453
  asyncio.set_event_loop(loop)
431
-
454
+ except Exception as e:
455
+ logger.error(f"Failed to create event loop for auto-flush thread: {e}", exc_info=True)
456
+ return
457
+
458
+ # Ensure event loop is always closed, even if an exception occurs
459
+ try:
432
460
  cycle_count = 0
433
461
  while not self.shutdown_requested:
434
462
  cycle_count += 1
@@ -436,27 +464,39 @@ class AIQASpanExporter(SpanExporter):
436
464
  try:
437
465
  loop.run_until_complete(self.flush())
438
466
  logger.debug(f"Auto-flush cycle #{cycle_count} completed, sleeping {self.flush_interval_ms / 1000.0}s")
439
- time.sleep(self.flush_interval_ms / 1000.0)
440
467
  except Exception as e:
441
468
  logger.error(f"Error in auto-flush cycle #{cycle_count}: {e}")
442
469
  logger.debug(f"Auto-flush cycle #{cycle_count} error handled, sleeping {self.flush_interval_ms / 1000.0}s")
470
+
471
+ # Sleep after each cycle (including errors) to avoid tight loops
472
+ if not self.shutdown_requested:
443
473
  time.sleep(self.flush_interval_ms / 1000.0)
444
474
 
445
475
  logger.info(f"Auto-flush worker thread stopping (shutdown requested). Completed {cycle_count} cycles.")
446
-
447
476
  # Don't do final flush here - shutdown() will handle it with synchronous send
448
477
  # This avoids event loop shutdown issues
449
478
  logger.debug("Auto-flush thread skipping final flush (will be handled by shutdown() with sync send)")
450
-
451
- # Close the event loop
479
+ finally:
480
+ # Always close the event loop, even if an exception occurs
452
481
  try:
453
482
  if not loop.is_closed():
454
483
  loop.close()
455
484
  logger.debug("Auto-flush worker thread event loop closed")
456
485
  except Exception:
457
486
  pass # Ignore errors during cleanup
487
+
488
+ def _start_auto_flush(self) -> None:
489
+ """Start the auto-flush timer with startup delay."""
490
+ if self.shutdown_requested:
491
+ logger.warning("_start_auto_flush() called but shutdown already requested")
492
+ return
493
+
494
+ logger.info(
495
+ f"Starting auto-flush thread with interval {self.flush_interval_ms / 1000.0}s, "
496
+ f"startup delay {self.startup_delay_seconds}s"
497
+ )
458
498
 
459
- flush_thread = threading.Thread(target=flush_worker, daemon=True, name="AIQA-AutoFlush")
499
+ flush_thread = threading.Thread(target=self._flush_worker, daemon=True, name="AIQA-AutoFlush")
460
500
  flush_thread.start()
461
501
  self.flush_timer = flush_thread
462
502
  logger.info(f"Auto-flush thread started: {flush_thread.name} (daemon={flush_thread.daemon})")
@@ -585,7 +625,8 @@ class AIQASpanExporter(SpanExporter):
585
625
  logger.info(f"shutdown() buffer contains {buffer_size} span(s) before shutdown")
586
626
 
587
627
  # Wait for flush thread to finish (it will do final flush)
588
- if self.flush_timer and self.flush_timer.is_alive():
628
+ # Only wait if thread was actually started
629
+ if self._auto_flush_started and self.flush_timer and self.flush_timer.is_alive():
589
630
  logger.info("shutdown() waiting for auto-flush thread to complete (timeout=10s)")
590
631
  self.flush_timer.join(timeout=10.0)
591
632
  if self.flush_timer.is_alive():
aiqa/client.py CHANGED
@@ -129,9 +129,14 @@ def get_aiqa_client() -> AIQAClient:
129
129
  """
130
130
  Initialize and return the AIQA client singleton.
131
131
 
132
- This function must be called before using any AIQA tracing functionality to ensure
133
- that environment variables (such as AIQA_SERVER_URL, AIQA_API_KEY, AIQA_COMPONENT_TAG)
134
- are properly loaded and the tracing system is initialized.
132
+ This function is called automatically when WithTracing is first used, so you typically
133
+ don't need to call it explicitly. However, you can call it manually if you want to:
134
+ - Check if tracing is enabled (client.enabled)
135
+ - Initialize before the first @WithTracing usage
136
+ - Access the client object for advanced usage
137
+
138
+ The function loads environment variables (AIQA_SERVER_URL, AIQA_API_KEY, AIQA_COMPONENT_TAG)
139
+ and initializes the tracing system.
135
140
 
136
141
  The client object manages the tracing system state. Tracing is done by the WithTracing
137
142
  decorator. Experiments are run by the ExperimentRunner class.
@@ -142,12 +147,14 @@ def get_aiqa_client() -> AIQAClient:
142
147
  Example:
143
148
  from aiqa import get_aiqa_client, WithTracing
144
149
 
145
- # Initialize client (loads env vars)
150
+ # Optional: Initialize explicitly (usually not needed)
146
151
  client = get_aiqa_client()
152
+ if client.enabled:
153
+ print("Tracing is enabled")
147
154
 
148
155
  @WithTracing
149
156
  def my_function():
150
- pass
157
+ pass # Initialization happens automatically here if not done above
151
158
  """
152
159
  global client
153
160
  try:
aiqa/constants.py CHANGED
@@ -3,4 +3,4 @@ Constants used across the AIQA client package.
3
3
  """
4
4
 
5
5
  AIQA_TRACER_NAME = "aiqa-tracer"
6
- VERSION = "0.4.0" # automatically updated by set-version-json.sh
6
+ VERSION = "0.4.1" # automatically updated by set-version-json.sh
@@ -0,0 +1,249 @@
1
+ """
2
+ Test startup reliability - simulates ECS deployment scenarios where rapid initialization
3
+ and network issues could cause deployment failures.
4
+
5
+ These tests verify that:
6
+ 1. Exporter initialization doesn't block or create threads immediately
7
+ 2. Thread creation is lazy (only on first export)
8
+ 3. Network failures during startup don't cause hangs
9
+ 4. Multiple rapid initializations don't cause issues
10
+ """
11
+
12
+ import os
13
+ import time
14
+ import threading
15
+ import pytest
16
+ from unittest.mock import patch, MagicMock
17
+ from opentelemetry.sdk.trace import TracerProvider
18
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
19
+
20
+ from aiqa.client import get_aiqa_client, AIQAClient
21
+ from aiqa.aiqa_exporter import AIQASpanExporter
22
+
23
+
24
+ class TestStartupReliability:
25
+ """Tests for startup reliability in ECS-like scenarios."""
26
+
27
+ def test_exporter_initialization_does_not_create_thread_immediately(self):
28
+ """Verify that creating an exporter doesn't immediately start a thread."""
29
+ with patch.dict(
30
+ os.environ,
31
+ {
32
+ "AIQA_SERVER_URL": "http://localhost:3000",
33
+ "AIQA_API_KEY": "test-api-key",
34
+ },
35
+ ):
36
+ exporter = AIQASpanExporter(startup_delay_seconds=0.1)
37
+
38
+ # Thread should not be created immediately
39
+ assert exporter.flush_timer is None
40
+ assert not exporter._auto_flush_started
41
+
42
+ # Cleanup
43
+ exporter.shutdown()
44
+
45
+ def test_thread_created_lazily_on_first_export(self):
46
+ """Verify thread is only created when first span is exported."""
47
+ with patch.dict(
48
+ os.environ,
49
+ {
50
+ "AIQA_SERVER_URL": "http://localhost:3000",
51
+ "AIQA_API_KEY": "test-api-key",
52
+ },
53
+ ):
54
+ exporter = AIQASpanExporter(startup_delay_seconds=0.1)
55
+
56
+ # Thread should not exist yet
57
+ assert exporter.flush_timer is None
58
+
59
+ # Create a mock span and export it
60
+ from opentelemetry.sdk.trace import ReadableSpan
61
+ from opentelemetry.trace import SpanContext, TraceFlags
62
+
63
+ mock_span = MagicMock(spec=ReadableSpan)
64
+ mock_span.get_span_context.return_value = SpanContext(
65
+ trace_id=1, span_id=1, is_remote=False, trace_flags=TraceFlags(0x01)
66
+ )
67
+ mock_span.name = "test_span"
68
+ mock_span.kind = 1
69
+ mock_span.start_time = 1000000000
70
+ mock_span.end_time = 2000000000
71
+ mock_span.status.status_code = 1
72
+ mock_span.attributes = {}
73
+ mock_span.links = []
74
+ mock_span.events = []
75
+ mock_span.resource.attributes = {}
76
+ mock_span.parent = None
77
+
78
+ # Export should trigger thread creation
79
+ result = exporter.export([mock_span])
80
+
81
+ # Give thread a moment to start
82
+ time.sleep(0.2)
83
+
84
+ # Now thread should exist
85
+ assert exporter._auto_flush_started
86
+ assert exporter.flush_timer is not None
87
+ assert exporter.flush_timer.is_alive()
88
+
89
+ # Cleanup
90
+ exporter.shutdown()
91
+ if exporter.flush_timer:
92
+ exporter.flush_timer.join(timeout=2.0)
93
+
94
+ def test_rapid_multiple_initializations(self):
95
+ """Test that multiple rapid initializations don't cause issues (simulates health checks)."""
96
+ with patch.dict(
97
+ os.environ,
98
+ {
99
+ "AIQA_SERVER_URL": "http://localhost:3000",
100
+ "AIQA_API_KEY": "test-api-key",
101
+ },
102
+ ):
103
+ # Simulate rapid health check calls
104
+ clients = []
105
+ for _ in range(10):
106
+ client = get_aiqa_client()
107
+ clients.append(client)
108
+ time.sleep(0.01) # Very short delay
109
+
110
+ # All should be the same singleton
111
+ assert all(c is clients[0] for c in clients)
112
+
113
+ # Should not have created multiple threads
114
+ if clients[0].exporter:
115
+ assert clients[0].exporter._auto_flush_started or clients[0].exporter.flush_timer is None
116
+
117
+ def test_initialization_with_unreachable_server(self):
118
+ """Test that initialization doesn't hang when server is unreachable."""
119
+ with patch.dict(
120
+ os.environ,
121
+ {
122
+ "AIQA_SERVER_URL": "http://unreachable-server:3000",
123
+ "AIQA_API_KEY": "test-api-key",
124
+ },
125
+ ):
126
+ # Should not block or raise
127
+ client = get_aiqa_client()
128
+ assert client is not None
129
+ assert client._initialized
130
+
131
+ # Exporter should exist but thread shouldn't be started yet
132
+ if client.exporter:
133
+ # Thread creation is lazy, so it might not exist
134
+ assert client.exporter.flush_timer is None or not client.exporter._auto_flush_started
135
+
136
+ def test_startup_delay_respected(self):
137
+ """Verify that startup delay prevents immediate flush attempts."""
138
+ with patch.dict(
139
+ os.environ,
140
+ {
141
+ "AIQA_SERVER_URL": "http://localhost:3000",
142
+ "AIQA_API_KEY": "test-api-key",
143
+ },
144
+ ):
145
+ exporter = AIQASpanExporter(startup_delay_seconds=0.5)
146
+
147
+ # Create and export a span to trigger thread creation
148
+ from opentelemetry.sdk.trace import ReadableSpan
149
+ from opentelemetry.trace import SpanContext, TraceFlags
150
+
151
+ mock_span = MagicMock(spec=ReadableSpan)
152
+ mock_span.get_span_context.return_value = SpanContext(
153
+ trace_id=1, span_id=1, is_remote=False, trace_flags=TraceFlags(0x01)
154
+ )
155
+ mock_span.name = "test_span"
156
+ mock_span.kind = 1
157
+ mock_span.start_time = 1000000000
158
+ mock_span.end_time = 2000000000
159
+ mock_span.status.status_code = 1
160
+ mock_span.attributes = {}
161
+ mock_span.links = []
162
+ mock_span.events = []
163
+ mock_span.resource.attributes = {}
164
+ mock_span.parent = None
165
+
166
+ exporter.export([mock_span])
167
+
168
+ # Thread should be created
169
+ time.sleep(0.1)
170
+ assert exporter._auto_flush_started
171
+
172
+ # But flush should not have happened yet (within delay period)
173
+ # We can't easily test this without mocking time, but we verify thread exists
174
+ assert exporter.flush_timer is not None
175
+
176
+ # Cleanup
177
+ exporter.shutdown()
178
+ if exporter.flush_timer:
179
+ exporter.flush_timer.join(timeout=2.0)
180
+
181
+ def test_concurrent_initialization(self):
182
+ """Test concurrent initialization from multiple threads (simulates ECS health checks)."""
183
+ with patch.dict(
184
+ os.environ,
185
+ {
186
+ "AIQA_SERVER_URL": "http://localhost:3000",
187
+ "AIQA_API_KEY": "test-api-key",
188
+ },
189
+ ):
190
+ clients = []
191
+ errors = []
192
+
193
+ def init_client():
194
+ try:
195
+ client = get_aiqa_client()
196
+ clients.append(client)
197
+ except Exception as e:
198
+ errors.append(e)
199
+
200
+ # Start multiple threads initializing simultaneously
201
+ threads = [threading.Thread(target=init_client) for _ in range(5)]
202
+ for t in threads:
203
+ t.start()
204
+ for t in threads:
205
+ t.join(timeout=5.0)
206
+
207
+ # Should have no errors
208
+ assert len(errors) == 0
209
+
210
+ # All should be the same singleton
211
+ assert len(set(id(c) for c in clients)) == 1
212
+
213
+ def test_shutdown_before_thread_starts(self):
214
+ """Test that shutdown works even if thread was never started."""
215
+ with patch.dict(
216
+ os.environ,
217
+ {
218
+ "AIQA_SERVER_URL": "http://localhost:3000",
219
+ "AIQA_API_KEY": "test-api-key",
220
+ },
221
+ ):
222
+ exporter = AIQASpanExporter(startup_delay_seconds=1.0)
223
+
224
+ # Thread should not exist
225
+ assert exporter.flush_timer is None
226
+
227
+ # Shutdown should work without errors
228
+ exporter.shutdown()
229
+
230
+ # Should still be able to call shutdown again
231
+ exporter.shutdown()
232
+
233
+ def test_initialization_timeout(self):
234
+ """Test that initialization completes quickly even with network issues."""
235
+ with patch.dict(
236
+ os.environ,
237
+ {
238
+ "AIQA_SERVER_URL": "http://localhost:3000",
239
+ "AIQA_API_KEY": "test-api-key",
240
+ },
241
+ ):
242
+ start_time = time.time()
243
+ client = get_aiqa_client()
244
+ elapsed = time.time() - start_time
245
+
246
+ # Initialization should be fast (< 1 second)
247
+ assert elapsed < 1.0
248
+ assert client is not None
249
+
aiqa/tracing.py CHANGED
@@ -590,7 +590,8 @@ def WithTracing(
590
590
  is_generator = inspect.isgeneratorfunction(fn)
591
591
  is_async_generator = inspect.isasyncgenfunction(fn) if hasattr(inspect, 'isasyncgenfunction') else False
592
592
 
593
- tracer = get_aiqa_tracer()
593
+ # Don't get tracer here - get it lazily when function is called
594
+ # This ensures initialization only happens when tracing is actually used
594
595
 
595
596
  def _setup_span(span: trace.Span, input_data: Any) -> bool:
596
597
  """Setup span with input data. Returns True if span is recording."""
@@ -627,10 +628,13 @@ def WithTracing(
627
628
  def _execute_with_span_sync(executor: Callable[[], Any], input_data: Any) -> Any:
628
629
  """Execute sync function within span context, handling input/output and exceptions."""
629
630
  # Ensure tracer provider is initialized before creating spans
631
+ # This is called lazily when the function runs, not at decorator definition time
630
632
  client = get_aiqa_client()
631
633
  if not client.enabled:
632
634
  return executor()
633
635
 
636
+ # Get tracer after initialization (lazy)
637
+ tracer = get_aiqa_tracer()
634
638
  with tracer.start_as_current_span(fn_name) as span:
635
639
  if not _setup_span(span, input_data):
636
640
  return executor()
@@ -646,10 +650,13 @@ def WithTracing(
646
650
  async def _execute_with_span_async(executor: Callable[[], Any], input_data: Any) -> Any:
647
651
  """Execute async function within span context, handling input/output and exceptions."""
648
652
  # Ensure tracer provider is initialized before creating spans
653
+ # This is called lazily when the function runs, not at decorator definition time
649
654
  client = get_aiqa_client()
650
655
  if not client.enabled:
651
656
  return await executor()
652
657
 
658
+ # Get tracer after initialization (lazy)
659
+ tracer = get_aiqa_tracer()
653
660
  with tracer.start_as_current_span(fn_name) as span:
654
661
  if not _setup_span(span, input_data):
655
662
  return await executor()
@@ -668,10 +675,13 @@ def WithTracing(
668
675
  def _execute_generator_sync(executor: Callable[[], Any], input_data: Any) -> Any:
669
676
  """Execute sync generator function, returning a traced generator."""
670
677
  # Ensure tracer provider is initialized before creating spans
678
+ # This is called lazily when the function runs, not at decorator definition time
671
679
  client = get_aiqa_client()
672
680
  if not client.enabled:
673
681
  return executor()
674
682
 
683
+ # Get tracer after initialization (lazy)
684
+ tracer = get_aiqa_tracer()
675
685
  # Create span but don't use 'with' - span will be closed by TracedGenerator
676
686
  span = tracer.start_span(fn_name)
677
687
  token = trace.context_api.attach(trace.context_api.set_span_in_context(span))
@@ -694,10 +704,13 @@ def WithTracing(
694
704
  async def _execute_generator_async(executor: Callable[[], Any], input_data: Any) -> Any:
695
705
  """Execute async generator function, returning a traced async generator."""
696
706
  # Ensure tracer provider is initialized before creating spans
707
+ # This is called lazily when the function runs, not at decorator definition time
697
708
  client = get_aiqa_client()
698
709
  if not client.enabled:
699
710
  return await executor()
700
711
 
712
+ # Get tracer after initialization (lazy)
713
+ tracer = get_aiqa_tracer()
701
714
  # Create span but don't use 'with' - span will be closed by TracedAsyncGenerator
702
715
  span = tracer.start_span(fn_name)
703
716
  token = trace.context_api.attach(trace.context_api.set_span_in_context(span))
@@ -935,7 +948,8 @@ def set_component_tag(tag: str) -> None:
935
948
  This can also be set via the AIQA_COMPONENT_TAG environment variable.
936
949
  The component tag allows you to identify which component/system generated the spans.
937
950
 
938
- Note: If using environment variables, ensure you call get_aiqa_client() first to initialize
951
+ Note: Initialization is automatic when WithTracing is first used. You can also call
952
+ get_aiqa_client() explicitly if needed.
939
953
  the client and load environment variables.
940
954
 
941
955
  Args:
@@ -1045,6 +1059,8 @@ def create_span_from_trace_id(
1045
1059
  from opentelemetry.trace import set_span_in_context
1046
1060
  parent_context = set_span_in_context(trace.NonRecordingSpan(parent_span_context))
1047
1061
 
1062
+ # Ensure initialization before creating span
1063
+ get_aiqa_client()
1048
1064
  # Start a new span in this context (it will be a child of the parent span)
1049
1065
  tracer = get_aiqa_tracer()
1050
1066
  span = tracer.start_span(span_name, context=parent_context)
@@ -1057,6 +1073,8 @@ def create_span_from_trace_id(
1057
1073
  return span
1058
1074
  except (ValueError, AttributeError) as e:
1059
1075
  logger.error(f"Error creating span from trace_id: {e}")
1076
+ # Ensure initialization before creating span
1077
+ get_aiqa_client()
1060
1078
  # Fallback: create a new span
1061
1079
  tracer = get_aiqa_tracer()
1062
1080
  span = tracer.start_span(span_name)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiqa-client
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: OpenTelemetry-based Python client for tracing functions and sending traces to the AIQA server
5
5
  Author-email: AIQA <info@aiqa.dev>
6
6
  License: MIT
@@ -56,6 +56,20 @@ pip install -r requirements.txt
56
56
  pip install -e .
57
57
  ```
58
58
 
59
+ ### Development Setup
60
+
61
+ For development, install with dev dependencies to run tests:
62
+
63
+ ```bash
64
+ pip install -e ".[dev]"
65
+ ```
66
+
67
+ Then run the unit tests:
68
+
69
+ ```bash
70
+ pytest
71
+ ```
72
+
59
73
  See [TESTING.md](TESTING.md) for detailed testing instructions.
60
74
 
61
75
  ## Setup
@@ -0,0 +1,16 @@
1
+ aiqa/__init__.py,sha256=8MQBrnisjeYNrwrbTheUafEWS09GtIF7ff0fBZ1Jb24,1710
2
+ aiqa/aiqa_exporter.py,sha256=ge8DOebzewWA5AW2BH4cQ4eVARtZn7jPqpgZZBDIJR4,32565
3
+ aiqa/client.py,sha256=Vm6CA4q0vNbkLXwGCjx1Khfp6tyXxEFtIwZ31PMdrYU,9920
4
+ aiqa/constants.py,sha256=-FmvbNT2blwHn_dmoWiseSseFZP7ZCNJbkjvmZkdr4k,153
5
+ aiqa/experiment_runner.py,sha256=ZEDwECstAv4lWXpcdB9WSxfDQj43iqkGzB_YzoY933M,12053
6
+ aiqa/object_serialiser.py,sha256=pgcBVw5sZH8f7N6n3-qOvEcbNhuPS5yq7qdhaNT6Sks,15236
7
+ aiqa/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ aiqa/test_experiment_runner.py,sha256=LM8BuCrzBZL0Wyu_ierK0tNLsOUxxMTAHbAGW2G0qp0,5562
9
+ aiqa/test_startup_reliability.py,sha256=bt3fc-W3BPWoVK8RIYhnbwS-saBUwtPx90W57D7nOEM,9216
10
+ aiqa/test_tracing.py,sha256=mSVrhRQ6Dz5djlSUkCt097sIr84562w6E0BnuQDpMrI,8347
11
+ aiqa/tracing.py,sha256=gdC1aHH-GUIQDqNgAZsXNH8-sGBzlB1ij4R-D02uYXk,50758
12
+ aiqa_client-0.4.1.dist-info/licenses/LICENSE,sha256=kIzkzLuzG0HHaWYm4F4W5FeJ1Yxut3Ec6bhLWyw798A,1062
13
+ aiqa_client-0.4.1.dist-info/METADATA,sha256=dRozyP6cybntCZwT29Z-4Du7wufive_AiuKDFy40IKY,7673
14
+ aiqa_client-0.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
+ aiqa_client-0.4.1.dist-info/top_level.txt,sha256=nwcsuVVSuWu27iLxZd4n1evVzv1W6FVTrSnCXCc-NQs,5
16
+ aiqa_client-0.4.1.dist-info/RECORD,,
@@ -1,15 +0,0 @@
1
- aiqa/__init__.py,sha256=MZGvPF4XM_EuBRiiAR6EA1bzctLpzmE6crcjuh3Ve6o,1459
2
- aiqa/aiqa_exporter.py,sha256=-yPJscH0Wc9yIVetwJiOAiwEqFQnL6AYXo_FwsoYGaE,30482
3
- aiqa/client.py,sha256=wYoVvOHoGnkc3qsEHL5vMRW13hOFPR2d9s_MPKGAbpE,9538
4
- aiqa/constants.py,sha256=hXRiXeNgAqLOizNeSgucSAMkFO0wGMtpZ2qjKhUWWhA,153
5
- aiqa/experiment_runner.py,sha256=ZEDwECstAv4lWXpcdB9WSxfDQj43iqkGzB_YzoY933M,12053
6
- aiqa/object_serialiser.py,sha256=pgcBVw5sZH8f7N6n3-qOvEcbNhuPS5yq7qdhaNT6Sks,15236
7
- aiqa/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- aiqa/test_experiment_runner.py,sha256=LM8BuCrzBZL0Wyu_ierK0tNLsOUxxMTAHbAGW2G0qp0,5562
9
- aiqa/test_tracing.py,sha256=mSVrhRQ6Dz5djlSUkCt097sIr84562w6E0BnuQDpMrI,8347
10
- aiqa/tracing.py,sha256=SsuK6WNgk3LbWt1aQwPPIDhitBmtyU6GOsMRvouXpDw,49706
11
- aiqa_client-0.4.0.dist-info/licenses/LICENSE,sha256=kIzkzLuzG0HHaWYm4F4W5FeJ1Yxut3Ec6bhLWyw798A,1062
12
- aiqa_client-0.4.0.dist-info/METADATA,sha256=toA-KJzaC0mlWOsYhqHqj83ASP83sox_7wowMczaxrE,7505
13
- aiqa_client-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
- aiqa_client-0.4.0.dist-info/top_level.txt,sha256=nwcsuVVSuWu27iLxZd4n1evVzv1W6FVTrSnCXCc-NQs,5
15
- aiqa_client-0.4.0.dist-info/RECORD,,