aiqa-client 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiqa/__init__.py +44 -7
- aiqa/aiqa_exporter.py +219 -60
- aiqa/client.py +170 -0
- aiqa/experiment_runner.py +336 -0
- aiqa/object_serialiser.py +361 -0
- aiqa/test_experiment_runner.py +176 -0
- aiqa/test_tracing.py +230 -0
- aiqa/tracing.py +1102 -161
- {aiqa_client-0.1.1.dist-info → aiqa_client-0.1.2.dist-info}/METADATA +95 -4
- aiqa_client-0.1.2.dist-info/RECORD +14 -0
- aiqa_client-0.1.1.dist-info/RECORD +0 -9
- {aiqa_client-0.1.1.dist-info → aiqa_client-0.1.2.dist-info}/WHEEL +0 -0
- {aiqa_client-0.1.1.dist-info → aiqa_client-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {aiqa_client-0.1.1.dist-info → aiqa_client-0.1.2.dist-info}/top_level.txt +0 -0
aiqa/__init__.py
CHANGED
|
@@ -1,29 +1,66 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Python client for AIQA server - OpenTelemetry tracing decorators.
|
|
3
|
+
|
|
4
|
+
IMPORTANT: Before using any AIQA functionality, you must call get_aiqa_client() to initialize
|
|
5
|
+
the client and load environment variables (AIQA_SERVER_URL, AIQA_API_KEY, AIQA_COMPONENT_TAG, etc.).
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
from dotenv import load_dotenv
|
|
9
|
+
from aiqa import get_aiqa_client, WithTracing
|
|
10
|
+
|
|
11
|
+
# Load environment variables from .env file (if using one)
|
|
12
|
+
load_dotenv()
|
|
13
|
+
|
|
14
|
+
# Initialize client (must be called before using WithTracing or other functions)
|
|
15
|
+
get_aiqa_client()
|
|
16
|
+
|
|
17
|
+
@WithTracing
|
|
18
|
+
def my_function():
|
|
19
|
+
return "Hello, AIQA!"
|
|
3
20
|
"""
|
|
4
21
|
|
|
5
22
|
from .tracing import (
|
|
6
23
|
WithTracing,
|
|
7
|
-
|
|
24
|
+
flush_tracing,
|
|
8
25
|
shutdown_tracing,
|
|
9
26
|
set_span_attribute,
|
|
10
27
|
set_span_name,
|
|
11
28
|
get_active_span,
|
|
12
|
-
|
|
13
|
-
|
|
29
|
+
get_provider,
|
|
30
|
+
get_exporter,
|
|
31
|
+
get_trace_id,
|
|
32
|
+
get_span_id,
|
|
33
|
+
create_span_from_trace_id,
|
|
34
|
+
inject_trace_context,
|
|
35
|
+
extract_trace_context,
|
|
36
|
+
set_conversation_id,
|
|
37
|
+
set_component_tag,
|
|
38
|
+
get_span,
|
|
14
39
|
)
|
|
40
|
+
from .client import get_aiqa_client
|
|
41
|
+
from .experiment_runner import ExperimentRunner
|
|
15
42
|
|
|
16
|
-
__version__ = "0.1.
|
|
43
|
+
__version__ = "0.1.2"
|
|
17
44
|
|
|
18
45
|
__all__ = [
|
|
19
46
|
"WithTracing",
|
|
20
|
-
"
|
|
47
|
+
"flush_tracing",
|
|
21
48
|
"shutdown_tracing",
|
|
22
49
|
"set_span_attribute",
|
|
23
50
|
"set_span_name",
|
|
24
51
|
"get_active_span",
|
|
25
|
-
"
|
|
26
|
-
"
|
|
52
|
+
"get_provider",
|
|
53
|
+
"get_exporter",
|
|
54
|
+
"get_aiqa_client",
|
|
55
|
+
"ExperimentRunner",
|
|
56
|
+
"get_trace_id",
|
|
57
|
+
"get_span_id",
|
|
58
|
+
"create_span_from_trace_id",
|
|
59
|
+
"inject_trace_context",
|
|
60
|
+
"extract_trace_context",
|
|
61
|
+
"set_conversation_id",
|
|
62
|
+
"set_component_tag",
|
|
63
|
+
"get_span",
|
|
27
64
|
"__version__",
|
|
28
65
|
]
|
|
29
66
|
|
aiqa/aiqa_exporter.py
CHANGED
|
@@ -8,11 +8,12 @@ import json
|
|
|
8
8
|
import logging
|
|
9
9
|
import threading
|
|
10
10
|
import time
|
|
11
|
+
import io
|
|
11
12
|
from typing import List, Dict, Any, Optional
|
|
12
13
|
from opentelemetry.sdk.trace import ReadableSpan
|
|
13
14
|
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
|
|
14
15
|
|
|
15
|
-
logger = logging.getLogger(
|
|
16
|
+
logger = logging.getLogger("AIQA")
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class AIQASpanExporter(SpanExporter):
|
|
@@ -39,6 +40,7 @@ class AIQASpanExporter(SpanExporter):
|
|
|
39
40
|
self._api_key = api_key
|
|
40
41
|
self.flush_interval_ms = flush_interval_seconds * 1000
|
|
41
42
|
self.buffer: List[Dict[str, Any]] = []
|
|
43
|
+
self.buffer_span_keys: set = set() # Track (traceId, spanId) tuples to prevent duplicates (Python 3.8 compatible)
|
|
42
44
|
self.buffer_lock = threading.Lock()
|
|
43
45
|
self.flush_lock = threading.Lock()
|
|
44
46
|
self.shutdown_requested = False
|
|
@@ -61,21 +63,39 @@ class AIQASpanExporter(SpanExporter):
|
|
|
61
63
|
def export(self, spans: List[ReadableSpan]) -> SpanExportResult:
|
|
62
64
|
"""
|
|
63
65
|
Export spans to the AIQA server. Adds spans to buffer for async flushing.
|
|
66
|
+
Deduplicates spans based on (traceId, spanId) to prevent repeated exports.
|
|
64
67
|
"""
|
|
65
68
|
if not spans:
|
|
66
69
|
logger.debug("export() called with empty spans list")
|
|
67
70
|
return SpanExportResult.SUCCESS
|
|
68
|
-
|
|
69
|
-
# Serialize and add to buffer
|
|
71
|
+
logger.debug(f"AIQA export() called with {len(spans)} spans")
|
|
72
|
+
# Serialize and add to buffer, deduplicating by (traceId, spanId)
|
|
70
73
|
with self.buffer_lock:
|
|
71
|
-
serialized_spans = [
|
|
74
|
+
serialized_spans = []
|
|
75
|
+
duplicates_count = 0
|
|
76
|
+
for span in spans:
|
|
77
|
+
serialized = self._serialize_span(span)
|
|
78
|
+
span_key = (serialized["traceId"], serialized["spanId"])
|
|
79
|
+
if span_key not in self.buffer_span_keys:
|
|
80
|
+
serialized_spans.append(serialized)
|
|
81
|
+
self.buffer_span_keys.add(span_key)
|
|
82
|
+
else:
|
|
83
|
+
duplicates_count += 1
|
|
84
|
+
logger.debug(f"export() skipping duplicate span: traceId={serialized['traceId']}, spanId={serialized['spanId']}")
|
|
85
|
+
|
|
72
86
|
self.buffer.extend(serialized_spans)
|
|
73
87
|
buffer_size = len(self.buffer)
|
|
74
88
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
89
|
+
if duplicates_count > 0:
|
|
90
|
+
logger.debug(
|
|
91
|
+
f"export() added {len(serialized_spans)} span(s) to buffer, skipped {duplicates_count} duplicate(s). "
|
|
92
|
+
f"Total buffered: {buffer_size}"
|
|
93
|
+
)
|
|
94
|
+
else:
|
|
95
|
+
logger.debug(
|
|
96
|
+
f"export() added {len(spans)} span(s) to buffer. "
|
|
97
|
+
f"Total buffered: {buffer_size}"
|
|
98
|
+
)
|
|
79
99
|
|
|
80
100
|
return SpanExportResult.SUCCESS
|
|
81
101
|
|
|
@@ -138,8 +158,8 @@ class AIQASpanExporter(SpanExporter):
|
|
|
138
158
|
"duration": self._time_to_tuple(span.end_time - span.start_time) if span.end_time else None,
|
|
139
159
|
"ended": span.end_time is not None,
|
|
140
160
|
"instrumentationLibrary": {
|
|
141
|
-
"name":
|
|
142
|
-
"version":
|
|
161
|
+
"name": self._get_instrumentation_name(),
|
|
162
|
+
"version": self._get_instrumentation_version(),
|
|
143
163
|
},
|
|
144
164
|
}
|
|
145
165
|
|
|
@@ -148,19 +168,93 @@ class AIQASpanExporter(SpanExporter):
|
|
|
148
168
|
seconds = int(nanoseconds // 1_000_000_000)
|
|
149
169
|
nanos = int(nanoseconds % 1_000_000_000)
|
|
150
170
|
return (seconds, nanos)
|
|
171
|
+
|
|
172
|
+
def _get_instrumentation_name(self) -> str:
|
|
173
|
+
"""Get instrumentation library name - always 'aiqa-tracer'."""
|
|
174
|
+
from .client import AIQA_TRACER_NAME
|
|
175
|
+
return AIQA_TRACER_NAME
|
|
176
|
+
|
|
177
|
+
def _get_instrumentation_version(self) -> Optional[str]:
|
|
178
|
+
"""Get instrumentation library version from __version__."""
|
|
179
|
+
try:
|
|
180
|
+
from . import __version__
|
|
181
|
+
return __version__
|
|
182
|
+
except (ImportError, AttributeError):
|
|
183
|
+
return None
|
|
184
|
+
|
|
185
|
+
def _build_request_headers(self) -> Dict[str, str]:
|
|
186
|
+
"""Build HTTP headers for span requests."""
|
|
187
|
+
headers = {"Content-Type": "application/json"}
|
|
188
|
+
if self.api_key:
|
|
189
|
+
headers["Authorization"] = f"ApiKey {self.api_key}"
|
|
190
|
+
return headers
|
|
191
|
+
|
|
192
|
+
def _get_span_url(self) -> str:
|
|
193
|
+
"""Get the URL for sending spans."""
|
|
194
|
+
if not self.server_url:
|
|
195
|
+
raise ValueError("AIQA_SERVER_URL is not set. Cannot send spans to server.")
|
|
196
|
+
return f"{self.server_url}/span"
|
|
197
|
+
|
|
198
|
+
def _is_interpreter_shutdown_error(self, error: Exception) -> bool:
|
|
199
|
+
"""Check if error is due to interpreter shutdown."""
|
|
200
|
+
error_str = str(error)
|
|
201
|
+
return "cannot schedule new futures after" in error_str or "interpreter shutdown" in error_str
|
|
202
|
+
|
|
203
|
+
def _extract_spans_from_buffer(self) -> List[Dict[str, Any]]:
|
|
204
|
+
"""Extract spans from buffer (thread-safe). Returns copy of buffer."""
|
|
205
|
+
with self.buffer_lock:
|
|
206
|
+
return self.buffer[:]
|
|
207
|
+
|
|
208
|
+
def _extract_and_remove_spans_from_buffer(self) -> List[Dict[str, Any]]:
|
|
209
|
+
"""
|
|
210
|
+
Atomically extract and remove all spans from buffer (thread-safe).
|
|
211
|
+
Returns the extracted spans. This prevents race conditions where spans
|
|
212
|
+
are added between extraction and clearing.
|
|
213
|
+
Note: Does NOT clear buffer_span_keys - that should be done after successful send
|
|
214
|
+
to avoid unnecessary clearing/rebuilding on failures.
|
|
215
|
+
"""
|
|
216
|
+
with self.buffer_lock:
|
|
217
|
+
spans = self.buffer[:]
|
|
218
|
+
self.buffer.clear()
|
|
219
|
+
return spans
|
|
220
|
+
|
|
221
|
+
def _remove_span_keys_from_tracking(self, spans: List[Dict[str, Any]]) -> None:
|
|
222
|
+
"""
|
|
223
|
+
Remove span keys from tracking set (thread-safe). Called after successful send.
|
|
224
|
+
"""
|
|
225
|
+
with self.buffer_lock:
|
|
226
|
+
for span in spans:
|
|
227
|
+
span_key = (span["traceId"], span["spanId"])
|
|
228
|
+
self.buffer_span_keys.discard(span_key)
|
|
229
|
+
|
|
230
|
+
def _prepend_spans_to_buffer(self, spans: List[Dict[str, Any]]) -> None:
|
|
231
|
+
"""
|
|
232
|
+
Prepend spans back to buffer (thread-safe). Used to restore spans
|
|
233
|
+
if sending fails. Rebuilds the span keys tracking set.
|
|
234
|
+
"""
|
|
235
|
+
with self.buffer_lock:
|
|
236
|
+
self.buffer[:0] = spans
|
|
237
|
+
# Rebuild span keys set from current buffer contents
|
|
238
|
+
self.buffer_span_keys = {(span["traceId"], span["spanId"]) for span in self.buffer}
|
|
239
|
+
|
|
240
|
+
def _clear_buffer(self) -> None:
|
|
241
|
+
"""Clear the buffer (thread-safe)."""
|
|
242
|
+
with self.buffer_lock:
|
|
243
|
+
self.buffer.clear()
|
|
244
|
+
self.buffer_span_keys.clear()
|
|
151
245
|
|
|
152
246
|
async def flush(self) -> None:
|
|
153
247
|
"""
|
|
154
248
|
Flush buffered spans to the server. Thread-safe: ensures only one flush operation runs at a time.
|
|
249
|
+
Atomically extracts spans to prevent race conditions with concurrent export() calls.
|
|
155
250
|
"""
|
|
156
251
|
logger.debug("flush() called - attempting to acquire flush lock")
|
|
157
252
|
with self.flush_lock:
|
|
158
253
|
logger.debug("flush() acquired flush lock")
|
|
159
|
-
#
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
logger.debug(f"flush() extracted {len(spans_to_flush)} span(s) from buffer")
|
|
254
|
+
# Atomically extract and remove spans to prevent race conditions
|
|
255
|
+
# where export() adds spans between extraction and clearing
|
|
256
|
+
spans_to_flush = self._extract_and_remove_spans_from_buffer()
|
|
257
|
+
logger.debug(f"flush() extracted {len(spans_to_flush)} span(s) from buffer")
|
|
164
258
|
|
|
165
259
|
if not spans_to_flush:
|
|
166
260
|
logger.debug("flush() completed: no spans to flush")
|
|
@@ -171,14 +265,36 @@ class AIQASpanExporter(SpanExporter):
|
|
|
171
265
|
logger.warning(
|
|
172
266
|
f"Skipping flush: AIQA_SERVER_URL is not set. {len(spans_to_flush)} span(s) will not be sent."
|
|
173
267
|
)
|
|
268
|
+
# Spans already removed from buffer, clear their keys to free memory
|
|
269
|
+
self._remove_span_keys_from_tracking(spans_to_flush)
|
|
174
270
|
return
|
|
175
271
|
|
|
176
272
|
logger.info(f"flush() sending {len(spans_to_flush)} span(s) to server")
|
|
177
273
|
try:
|
|
178
274
|
await self._send_spans(spans_to_flush)
|
|
179
275
|
logger.info(f"flush() successfully sent {len(spans_to_flush)} span(s) to server")
|
|
276
|
+
# Spans already removed from buffer during extraction
|
|
277
|
+
# Now clear their keys from tracking set to free memory
|
|
278
|
+
self._remove_span_keys_from_tracking(spans_to_flush)
|
|
279
|
+
except RuntimeError as error:
|
|
280
|
+
if self._is_interpreter_shutdown_error(error):
|
|
281
|
+
if self.shutdown_requested:
|
|
282
|
+
logger.debug(f"flush() skipped due to interpreter shutdown: {error}")
|
|
283
|
+
# Put spans back for retry with sync send during shutdown
|
|
284
|
+
self._prepend_spans_to_buffer(spans_to_flush)
|
|
285
|
+
else:
|
|
286
|
+
logger.warning(f"flush() interrupted by interpreter shutdown: {error}")
|
|
287
|
+
# Put spans back for retry
|
|
288
|
+
self._prepend_spans_to_buffer(spans_to_flush)
|
|
289
|
+
raise
|
|
290
|
+
logger.error(f"Error flushing spans to server: {error}")
|
|
291
|
+
# Put spans back for retry
|
|
292
|
+
self._prepend_spans_to_buffer(spans_to_flush)
|
|
293
|
+
raise
|
|
180
294
|
except Exception as error:
|
|
181
|
-
logger.error(f"Error flushing spans to server: {error}"
|
|
295
|
+
logger.error(f"Error flushing spans to server: {error}")
|
|
296
|
+
# Put spans back for retry
|
|
297
|
+
self._prepend_spans_to_buffer(spans_to_flush)
|
|
182
298
|
if self.shutdown_requested:
|
|
183
299
|
raise
|
|
184
300
|
|
|
@@ -205,23 +321,23 @@ class AIQASpanExporter(SpanExporter):
|
|
|
205
321
|
logger.debug(f"Auto-flush cycle #{cycle_count} completed, sleeping {self.flush_interval_ms / 1000.0}s")
|
|
206
322
|
time.sleep(self.flush_interval_ms / 1000.0)
|
|
207
323
|
except Exception as e:
|
|
208
|
-
logger.error(f"Error in auto-flush cycle #{cycle_count}: {e}"
|
|
324
|
+
logger.error(f"Error in auto-flush cycle #{cycle_count}: {e}")
|
|
209
325
|
logger.debug(f"Auto-flush cycle #{cycle_count} error handled, sleeping {self.flush_interval_ms / 1000.0}s")
|
|
210
326
|
time.sleep(self.flush_interval_ms / 1000.0)
|
|
211
327
|
|
|
212
328
|
logger.info(f"Auto-flush worker thread stopping (shutdown requested). Completed {cycle_count} cycles.")
|
|
213
329
|
|
|
214
|
-
#
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
logger.error(f"Error in final flush: {e}", exc_info=True)
|
|
222
|
-
finally:
|
|
330
|
+
# Don't do final flush here - shutdown() will handle it with synchronous send
|
|
331
|
+
# This avoids event loop shutdown issues
|
|
332
|
+
logger.debug("Auto-flush thread skipping final flush (will be handled by shutdown() with sync send)")
|
|
333
|
+
|
|
334
|
+
# Close the event loop
|
|
335
|
+
try:
|
|
336
|
+
if not loop.is_closed():
|
|
223
337
|
loop.close()
|
|
224
|
-
|
|
338
|
+
logger.debug("Auto-flush worker thread event loop closed")
|
|
339
|
+
except Exception:
|
|
340
|
+
pass # Ignore errors during cleanup
|
|
225
341
|
|
|
226
342
|
flush_thread = threading.Thread(target=flush_worker, daemon=True, name="AIQA-AutoFlush")
|
|
227
343
|
flush_thread.start()
|
|
@@ -229,32 +345,25 @@ class AIQASpanExporter(SpanExporter):
|
|
|
229
345
|
logger.info(f"Auto-flush thread started: {flush_thread.name} (daemon={flush_thread.daemon})")
|
|
230
346
|
|
|
231
347
|
async def _send_spans(self, spans: List[Dict[str, Any]]) -> None:
|
|
232
|
-
"""Send spans to the server API."""
|
|
233
|
-
if not self.server_url:
|
|
234
|
-
raise ValueError("AIQA_SERVER_URL is not set. Cannot send spans to server.")
|
|
235
|
-
|
|
348
|
+
"""Send spans to the server API (async)."""
|
|
236
349
|
import aiohttp
|
|
237
350
|
|
|
238
|
-
url =
|
|
351
|
+
url = self._get_span_url()
|
|
352
|
+
headers = self._build_request_headers()
|
|
239
353
|
logger.debug(f"_send_spans() sending {len(spans)} spans to {url}")
|
|
240
|
-
|
|
241
|
-
headers = {
|
|
242
|
-
"Content-Type": "application/json",
|
|
243
|
-
}
|
|
244
354
|
if self.api_key:
|
|
245
|
-
headers["Authorization"] = f"ApiKey {self.api_key[:10]}..." # Log partial key for security
|
|
246
355
|
logger.debug("_send_spans() using API key authentication")
|
|
247
356
|
else:
|
|
248
357
|
logger.debug("_send_spans() no API key provided")
|
|
249
358
|
|
|
250
359
|
try:
|
|
360
|
+
# Pre-serialize JSON to bytes and wrap in BytesIO to avoid blocking event loop
|
|
361
|
+
json_bytes = json.dumps(spans).encode('utf-8')
|
|
362
|
+
data = io.BytesIO(json_bytes)
|
|
363
|
+
|
|
251
364
|
async with aiohttp.ClientSession() as session:
|
|
252
365
|
logger.debug(f"_send_spans() POST request starting to {url}")
|
|
253
|
-
async with session.post(
|
|
254
|
-
url,
|
|
255
|
-
json=spans,
|
|
256
|
-
headers=headers,
|
|
257
|
-
) as response:
|
|
366
|
+
async with session.post(url, data=data, headers=headers) as response:
|
|
258
367
|
logger.debug(f"_send_spans() received response: status={response.status}")
|
|
259
368
|
if not response.ok:
|
|
260
369
|
error_text = await response.text()
|
|
@@ -266,8 +375,46 @@ class AIQASpanExporter(SpanExporter):
|
|
|
266
375
|
f"Failed to send spans: {response.status} {response.reason} - {error_text}"
|
|
267
376
|
)
|
|
268
377
|
logger.debug(f"_send_spans() successfully sent {len(spans)} spans")
|
|
378
|
+
except RuntimeError as e:
|
|
379
|
+
if self._is_interpreter_shutdown_error(e):
|
|
380
|
+
if self.shutdown_requested:
|
|
381
|
+
logger.debug(f"_send_spans() skipped due to interpreter shutdown: {e}")
|
|
382
|
+
else:
|
|
383
|
+
logger.warning(f"_send_spans() interrupted by interpreter shutdown: {e}")
|
|
384
|
+
raise
|
|
385
|
+
logger.error(f"_send_spans() RuntimeError: {type(e).__name__}: {e}")
|
|
386
|
+
raise
|
|
269
387
|
except Exception as e:
|
|
270
|
-
logger.error(f"_send_spans() exception: {type(e).__name__}: {e}"
|
|
388
|
+
logger.error(f"_send_spans() exception: {type(e).__name__}: {e}")
|
|
389
|
+
raise
|
|
390
|
+
|
|
391
|
+
def _send_spans_sync(self, spans: List[Dict[str, Any]]) -> None:
|
|
392
|
+
"""Send spans to the server API (synchronous, for shutdown scenarios)."""
|
|
393
|
+
import requests
|
|
394
|
+
|
|
395
|
+
url = self._get_span_url()
|
|
396
|
+
headers = self._build_request_headers()
|
|
397
|
+
logger.debug(f"_send_spans_sync() sending {len(spans)} spans to {url}")
|
|
398
|
+
if self.api_key:
|
|
399
|
+
logger.debug("_send_spans_sync() using API key authentication")
|
|
400
|
+
else:
|
|
401
|
+
logger.debug("_send_spans_sync() no API key provided")
|
|
402
|
+
|
|
403
|
+
try:
|
|
404
|
+
response = requests.post(url, json=spans, headers=headers, timeout=10.0)
|
|
405
|
+
logger.debug(f"_send_spans_sync() received response: status={response.status_code}")
|
|
406
|
+
if not response.ok:
|
|
407
|
+
error_text = response.text[:200] if response.text else ""
|
|
408
|
+
logger.error(
|
|
409
|
+
f"_send_spans_sync() failed: status={response.status_code}, "
|
|
410
|
+
f"reason={response.reason}, error={error_text}"
|
|
411
|
+
)
|
|
412
|
+
raise Exception(
|
|
413
|
+
f"Failed to send spans: {response.status_code} {response.reason} - {error_text}"
|
|
414
|
+
)
|
|
415
|
+
logger.debug(f"_send_spans_sync() successfully sent {len(spans)} spans")
|
|
416
|
+
except Exception as e:
|
|
417
|
+
logger.error(f"_send_spans_sync() exception: {type(e).__name__}: {e}")
|
|
271
418
|
raise
|
|
272
419
|
|
|
273
420
|
def shutdown(self) -> None:
|
|
@@ -291,24 +438,36 @@ class AIQASpanExporter(SpanExporter):
|
|
|
291
438
|
else:
|
|
292
439
|
logger.debug("shutdown() no active auto-flush thread to wait for")
|
|
293
440
|
|
|
294
|
-
# Final flush attempt (synchronous)
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
441
|
+
# Final flush attempt (use synchronous send to avoid event loop issues)
|
|
442
|
+
with self.flush_lock:
|
|
443
|
+
logger.debug("shutdown() performing final flush with synchronous send")
|
|
444
|
+
# Atomically extract and remove spans to prevent race conditions
|
|
445
|
+
spans_to_flush = self._extract_and_remove_spans_from_buffer()
|
|
446
|
+
logger.debug(f"shutdown() extracted {len(spans_to_flush)} span(s) from buffer for final flush")
|
|
447
|
+
|
|
448
|
+
if spans_to_flush:
|
|
449
|
+
if not self.server_url:
|
|
450
|
+
logger.warning(
|
|
451
|
+
f"shutdown() skipping final flush: AIQA_SERVER_URL is not set. "
|
|
452
|
+
f"{len(spans_to_flush)} span(s) will not be sent."
|
|
453
|
+
)
|
|
454
|
+
# Spans already removed from buffer, clear their keys to free memory
|
|
455
|
+
self._remove_span_keys_from_tracking(spans_to_flush)
|
|
456
|
+
else:
|
|
457
|
+
logger.info(f"shutdown() sending {len(spans_to_flush)} span(s) to server (synchronous)")
|
|
458
|
+
try:
|
|
459
|
+
self._send_spans_sync(spans_to_flush)
|
|
460
|
+
logger.info(f"shutdown() successfully sent {len(spans_to_flush)} span(s) to server")
|
|
461
|
+
# Spans already removed from buffer during extraction
|
|
462
|
+
# Clear their keys from tracking set to free memory
|
|
463
|
+
self._remove_span_keys_from_tracking(spans_to_flush)
|
|
464
|
+
except Exception as e:
|
|
465
|
+
logger.error(f"shutdown() failed to send spans: {e}")
|
|
466
|
+
# Spans already removed, but process is exiting anyway
|
|
467
|
+
logger.warning(f"shutdown() {len(spans_to_flush)} span(s) were not sent due to error")
|
|
468
|
+
# Keys will remain in tracking set, but process is exiting so memory will be freed
|
|
305
469
|
else:
|
|
306
|
-
logger.debug("shutdown()
|
|
307
|
-
loop.run_until_complete(self.flush())
|
|
308
|
-
except RuntimeError:
|
|
309
|
-
# No event loop, create one
|
|
310
|
-
logger.debug("shutdown() no event loop found, creating new one for final flush")
|
|
311
|
-
asyncio.run(self.flush())
|
|
470
|
+
logger.debug("shutdown() no spans to flush")
|
|
312
471
|
|
|
313
472
|
# Check buffer state after shutdown
|
|
314
473
|
with self.buffer_lock:
|
aiqa/client.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# aiqa/client.py
|
|
2
|
+
import os
|
|
3
|
+
import logging
|
|
4
|
+
from functools import lru_cache
|
|
5
|
+
from opentelemetry import trace
|
|
6
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
7
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger("AIQA")
|
|
10
|
+
|
|
11
|
+
# Compatibility import for TraceIdRatioBased sampler
|
|
12
|
+
# In older OpenTelemetry versions it was TraceIdRatioBasedSampler
|
|
13
|
+
# In newer versions (>=1.24.0) it's TraceIdRatioBased
|
|
14
|
+
TraceIdRatioBased = None
|
|
15
|
+
try:
|
|
16
|
+
from opentelemetry.sdk.trace.sampling import TraceIdRatioBased
|
|
17
|
+
except ImportError:
|
|
18
|
+
try:
|
|
19
|
+
from opentelemetry.sdk.trace.sampling import TraceIdRatioBasedSampler as TraceIdRatioBased
|
|
20
|
+
except ImportError:
|
|
21
|
+
logger.warning(
|
|
22
|
+
"Could not import TraceIdRatioBased or TraceIdRatioBasedSampler from "
|
|
23
|
+
"opentelemetry.sdk.trace.sampling. AIQA tracing may not work correctly. "
|
|
24
|
+
"Please ensure opentelemetry-sdk>=1.24.0 is installed. "
|
|
25
|
+
"Try: pip install --upgrade opentelemetry-sdk"
|
|
26
|
+
)
|
|
27
|
+
# Set to None so we can check later
|
|
28
|
+
TraceIdRatioBased = None
|
|
29
|
+
|
|
30
|
+
from .aiqa_exporter import AIQASpanExporter
|
|
31
|
+
|
|
32
|
+
AIQA_TRACER_NAME = "aiqa-tracer"
|
|
33
|
+
|
|
34
|
+
client = {
|
|
35
|
+
"provider": None,
|
|
36
|
+
"exporter": None,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# Component tag to add to all spans (can be set via AIQA_COMPONENT_TAG env var or programmatically)
|
|
40
|
+
_component_tag: str = ""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_component_tag() -> str:
|
|
44
|
+
"""Get the current component tag."""
|
|
45
|
+
return _component_tag
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def set_component_tag(tag: str | None) -> None:
|
|
49
|
+
"""Set the component tag programmatically (overrides environment variable)."""
|
|
50
|
+
global _component_tag
|
|
51
|
+
_component_tag = tag or ""
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@lru_cache(maxsize=1)
|
|
55
|
+
def get_aiqa_client():
|
|
56
|
+
"""
|
|
57
|
+
Initialize and return the AIQA client.
|
|
58
|
+
|
|
59
|
+
This function must be called before using any AIQA tracing functionality to ensure
|
|
60
|
+
that environment variables (such as AIQA_SERVER_URL, AIQA_API_KEY, AIQA_COMPONENT_TAG)
|
|
61
|
+
are properly loaded and the tracing system is initialized.
|
|
62
|
+
|
|
63
|
+
The function is idempotent - calling it multiple times is safe and will only
|
|
64
|
+
initialize once.
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
from aiqa import get_aiqa_client, WithTracing
|
|
68
|
+
|
|
69
|
+
# Initialize client (loads env vars)
|
|
70
|
+
get_aiqa_client()
|
|
71
|
+
|
|
72
|
+
@WithTracing
|
|
73
|
+
def my_function():
|
|
74
|
+
pass
|
|
75
|
+
"""
|
|
76
|
+
global client
|
|
77
|
+
try:
|
|
78
|
+
_init_tracing()
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.error(f"Failed to initialize AIQA tracing: {e}")
|
|
81
|
+
logger.warning("AIQA tracing is disabled. Your application will continue to run without tracing.")
|
|
82
|
+
# optionally return a richer client object; for now you just need init
|
|
83
|
+
return client
|
|
84
|
+
|
|
85
|
+
def _init_tracing():
|
|
86
|
+
"""Initialize tracing system and load configuration from environment variables."""
|
|
87
|
+
try:
|
|
88
|
+
# Initialize component tag from environment variable
|
|
89
|
+
set_component_tag(os.getenv("AIQA_COMPONENT_TAG", None))
|
|
90
|
+
|
|
91
|
+
provider = trace.get_tracer_provider()
|
|
92
|
+
|
|
93
|
+
# Get sampling rate from environment (default: 1.0 = sample all)
|
|
94
|
+
sampling_rate = 1.0
|
|
95
|
+
if env_rate := os.getenv("AIQA_SAMPLING_RATE"):
|
|
96
|
+
try:
|
|
97
|
+
rate = float(env_rate)
|
|
98
|
+
sampling_rate = max(0.0, min(1.0, rate)) # Clamp to [0, 1]
|
|
99
|
+
except ValueError:
|
|
100
|
+
logger.warning(f"Invalid AIQA_SAMPLING_RATE value '{env_rate}', using default 1.0")
|
|
101
|
+
|
|
102
|
+
# If it's still the default proxy, install a real SDK provider
|
|
103
|
+
if not isinstance(provider, TracerProvider):
|
|
104
|
+
if TraceIdRatioBased is None:
|
|
105
|
+
raise ImportError(
|
|
106
|
+
"TraceIdRatioBased sampler is not available. "
|
|
107
|
+
"Please install opentelemetry-sdk>=1.24.0"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Create sampler based on trace-id for deterministic sampling
|
|
111
|
+
sampler = TraceIdRatioBased(sampling_rate)
|
|
112
|
+
provider = TracerProvider(sampler=sampler)
|
|
113
|
+
trace.set_tracer_provider(provider)
|
|
114
|
+
|
|
115
|
+
# Idempotently add your processor
|
|
116
|
+
_attach_aiqa_processor(provider)
|
|
117
|
+
global client
|
|
118
|
+
client["provider"] = provider
|
|
119
|
+
|
|
120
|
+
# Log successful initialization
|
|
121
|
+
server_url = os.getenv("AIQA_SERVER_URL", "not configured")
|
|
122
|
+
logger.info(f"AIQA initialized and tracing (sampling rate: {sampling_rate:.2f}, server: {server_url})")
|
|
123
|
+
|
|
124
|
+
except Exception as e:
|
|
125
|
+
logger.error(f"Error initializing AIQA tracing: {e}")
|
|
126
|
+
raise
|
|
127
|
+
|
|
128
|
+
def _attach_aiqa_processor(provider: TracerProvider):
|
|
129
|
+
"""Attach AIQA span processor to the provider. Idempotent - safe to call multiple times."""
|
|
130
|
+
try:
|
|
131
|
+
# Avoid double-adding if get_aiqa_client() is called multiple times
|
|
132
|
+
for p in provider._active_span_processor._span_processors:
|
|
133
|
+
if isinstance(getattr(p, "exporter", None), AIQASpanExporter):
|
|
134
|
+
logger.debug("AIQA span processor already attached, skipping")
|
|
135
|
+
return
|
|
136
|
+
|
|
137
|
+
exporter = AIQASpanExporter(
|
|
138
|
+
server_url=os.getenv("AIQA_SERVER_URL"),
|
|
139
|
+
api_key=os.getenv("AIQA_API_KEY"),
|
|
140
|
+
)
|
|
141
|
+
provider.add_span_processor(BatchSpanProcessor(exporter))
|
|
142
|
+
global client
|
|
143
|
+
client["exporter"] = exporter
|
|
144
|
+
logger.debug("AIQA span processor attached successfully")
|
|
145
|
+
except Exception as e:
|
|
146
|
+
logger.error(f"Error attaching AIQA span processor: {e}")
|
|
147
|
+
# Re-raise to let _init_tracing handle it - it will log and continue
|
|
148
|
+
raise
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def get_aiqa_tracer():
|
|
152
|
+
"""
|
|
153
|
+
Get the AIQA tracer with version from __init__.py __version__.
|
|
154
|
+
This should be used instead of trace.get_tracer() to ensure version is set.
|
|
155
|
+
"""
|
|
156
|
+
try:
|
|
157
|
+
# Import here to avoid circular import
|
|
158
|
+
from . import __version__
|
|
159
|
+
|
|
160
|
+
# Compatibility: version parameter may not be supported in older OpenTelemetry versions
|
|
161
|
+
try:
|
|
162
|
+
# Try with version parameter (newer OpenTelemetry versions)
|
|
163
|
+
return trace.get_tracer(AIQA_TRACER_NAME, version=__version__)
|
|
164
|
+
except TypeError:
|
|
165
|
+
# Fall back to without version parameter (older versions)
|
|
166
|
+
return trace.get_tracer(AIQA_TRACER_NAME)
|
|
167
|
+
except Exception as e:
|
|
168
|
+
logger.error(f"Error getting AIQA tracer: {e}")
|
|
169
|
+
# Return a basic tracer as fallback to prevent crashes
|
|
170
|
+
return trace.get_tracer(AIQA_TRACER_NAME)
|