aiqa-client 0.4.7__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiqa/__init__.py +1 -1
- aiqa/client.py +97 -15
- aiqa/constants.py +1 -1
- aiqa/http_utils.py +85 -11
- aiqa/tracing.py +48 -20
- {aiqa_client-0.4.7.dist-info → aiqa_client-0.5.2.dist-info}/METADATA +1 -1
- aiqa_client-0.5.2.dist-info/RECORD +14 -0
- aiqa/aiqa_exporter.py +0 -772
- aiqa_client-0.4.7.dist-info/RECORD +0 -15
- {aiqa_client-0.4.7.dist-info → aiqa_client-0.5.2.dist-info}/WHEEL +0 -0
- {aiqa_client-0.4.7.dist-info → aiqa_client-0.5.2.dist-info}/licenses/LICENSE.txt +0 -0
- {aiqa_client-0.4.7.dist-info → aiqa_client-0.5.2.dist-info}/top_level.txt +0 -0
aiqa/__init__.py
CHANGED
|
@@ -6,7 +6,7 @@ The client initializes automatically when WithTracing is first used.
|
|
|
6
6
|
|
|
7
7
|
Set environment variables:
|
|
8
8
|
AIQA_SERVER_URL: URL of the AIQA server
|
|
9
|
-
AIQA_API_KEY: API key for authentication
|
|
9
|
+
AIQA_API_KEY: API key for authentication (required)
|
|
10
10
|
AIQA_COMPONENT_TAG: Optional component identifier
|
|
11
11
|
AIQA_STARTUP_DELAY_SECONDS: Optional delay before first flush (default: 10s)
|
|
12
12
|
|
aiqa/client.py
CHANGED
|
@@ -2,10 +2,12 @@
|
|
|
2
2
|
import os
|
|
3
3
|
import logging
|
|
4
4
|
from functools import lru_cache
|
|
5
|
-
from typing import Optional, TYPE_CHECKING, Any
|
|
5
|
+
from typing import Optional, TYPE_CHECKING, Any, Dict
|
|
6
6
|
from opentelemetry import trace
|
|
7
7
|
from opentelemetry.sdk.trace import TracerProvider
|
|
8
8
|
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
9
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
10
|
+
import requests
|
|
9
11
|
|
|
10
12
|
from .constants import AIQA_TRACER_NAME, LOG_TAG
|
|
11
13
|
|
|
@@ -30,7 +32,7 @@ except ImportError:
|
|
|
30
32
|
# Set to None so we can check later
|
|
31
33
|
TraceIdRatioBased = None
|
|
32
34
|
|
|
33
|
-
from .http_utils import get_server_url, get_api_key
|
|
35
|
+
from .http_utils import get_server_url, get_api_key, build_headers, format_http_error
|
|
34
36
|
|
|
35
37
|
class AIQAClient:
|
|
36
38
|
"""
|
|
@@ -100,14 +102,14 @@ class AIQAClient:
|
|
|
100
102
|
self.enabled = False
|
|
101
103
|
if self._provider:
|
|
102
104
|
self._provider.shutdown()
|
|
103
|
-
|
|
104
|
-
self._exporter.shutdown()
|
|
105
|
+
# OTLP exporter doesn't have a separate shutdown method - it's handled by the provider
|
|
105
106
|
except Exception as e:
|
|
106
107
|
logger.error(f"Error shutting down tracing: {e}")
|
|
107
108
|
# Still disable even if shutdown had errors
|
|
108
109
|
self.enabled = False
|
|
109
110
|
|
|
110
111
|
|
|
112
|
+
|
|
111
113
|
# Global singleton instance
|
|
112
114
|
client: AIQAClient = AIQAClient()
|
|
113
115
|
|
|
@@ -176,11 +178,10 @@ def _init_tracing() -> None:
|
|
|
176
178
|
server_url = get_server_url()
|
|
177
179
|
api_key = get_api_key()
|
|
178
180
|
|
|
179
|
-
if not
|
|
181
|
+
if not api_key:
|
|
180
182
|
client.enabled = False
|
|
181
|
-
missing_vars = [var for var, val in [("AIQA_SERVER_URL", server_url), ("AIQA_API_KEY", api_key)] if not val]
|
|
182
183
|
logger.warning(
|
|
183
|
-
f"AIQA tracing is disabled: missing required environment variables:
|
|
184
|
+
f"AIQA tracing is disabled: missing required environment variables: AIQA_API_KEY"
|
|
184
185
|
)
|
|
185
186
|
client._initialized = True
|
|
186
187
|
return
|
|
@@ -227,20 +228,39 @@ def _init_tracing() -> None:
|
|
|
227
228
|
|
|
228
229
|
def _attach_aiqa_processor(provider: TracerProvider) -> None:
|
|
229
230
|
"""Attach AIQA span processor to the provider. Idempotent - safe to call multiple times."""
|
|
230
|
-
from .aiqa_exporter import AIQASpanExporter
|
|
231
|
-
|
|
232
231
|
try:
|
|
233
232
|
# Check if already attached
|
|
234
233
|
for p in provider._active_span_processor._span_processors:
|
|
235
|
-
if isinstance(getattr(p, "exporter", None),
|
|
234
|
+
if isinstance(getattr(p, "exporter", None), OTLPSpanExporter):
|
|
236
235
|
logger.debug(f"AIQA span processor already attached, skipping")
|
|
237
236
|
return
|
|
238
237
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
238
|
+
server_url = get_server_url()
|
|
239
|
+
api_key = get_api_key()
|
|
240
|
+
|
|
241
|
+
# Build headers for authentication
|
|
242
|
+
# OTLP exporter sets its own Content-Type, so we only need Authorization
|
|
243
|
+
auth_headers = {}
|
|
244
|
+
if api_key:
|
|
245
|
+
auth_headers["Authorization"] = f"ApiKey {api_key}"
|
|
246
|
+
elif os.getenv("AIQA_API_KEY"):
|
|
247
|
+
auth_headers["Authorization"] = f"ApiKey {os.getenv('AIQA_API_KEY')}"
|
|
248
|
+
|
|
249
|
+
# OTLP HTTP exporter requires the full endpoint URL including /v1/traces
|
|
250
|
+
# Ensure server_url doesn't have trailing slash or /v1/traces, then append /v1/traces
|
|
251
|
+
base_url = server_url.rstrip('/')
|
|
252
|
+
if base_url.endswith('/v1/traces'):
|
|
253
|
+
endpoint = base_url
|
|
254
|
+
else:
|
|
255
|
+
endpoint = f"{base_url}/v1/traces"
|
|
256
|
+
|
|
257
|
+
# Create OTLP exporter with authentication headers only
|
|
258
|
+
# The exporter will set Content-Type and other headers automatically
|
|
259
|
+
exporter = OTLPSpanExporter(
|
|
260
|
+
endpoint=endpoint,
|
|
261
|
+
headers=auth_headers if auth_headers else None,
|
|
243
262
|
)
|
|
263
|
+
|
|
244
264
|
provider.add_span_processor(BatchSpanProcessor(exporter))
|
|
245
265
|
global client
|
|
246
266
|
client.exporter = exporter
|
|
@@ -266,4 +286,66 @@ def get_aiqa_tracer() -> trace.Tracer:
|
|
|
266
286
|
except Exception as e:
|
|
267
287
|
# Log issue but still return a tracer
|
|
268
288
|
logger.info(f"Issue getting AIQA tracer with version: {e}, using fallback")
|
|
269
|
-
return trace.get_tracer(AIQA_TRACER_NAME)
|
|
289
|
+
return trace.get_tracer(AIQA_TRACER_NAME)
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def get_organisation(
|
|
293
|
+
organisation_id: str,
|
|
294
|
+
server_url: Optional[str] = None,
|
|
295
|
+
api_key: Optional[str] = None
|
|
296
|
+
) -> Dict[str, Any]:
|
|
297
|
+
"""
|
|
298
|
+
Get organisation information based on API key via an API call.
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
organisation_id: ID of the organisation to retrieve
|
|
302
|
+
server_url: Optional server URL (defaults to AIQA_SERVER_URL env var)
|
|
303
|
+
api_key: Optional API key (defaults to AIQA_API_KEY env var)
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
Organisation object as a dictionary
|
|
307
|
+
"""
|
|
308
|
+
url = get_server_url(server_url)
|
|
309
|
+
key = get_api_key(api_key)
|
|
310
|
+
headers = build_headers(key)
|
|
311
|
+
|
|
312
|
+
response = requests.get(
|
|
313
|
+
f"{url}/organisation/{organisation_id}",
|
|
314
|
+
headers=headers,
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
if not response.ok:
|
|
318
|
+
raise Exception(format_http_error(response, "get organisation"))
|
|
319
|
+
|
|
320
|
+
return response.json()
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def get_api_key_info(
|
|
324
|
+
api_key_id: str,
|
|
325
|
+
server_url: Optional[str] = None,
|
|
326
|
+
api_key: Optional[str] = None
|
|
327
|
+
) -> Dict[str, Any]:
|
|
328
|
+
"""
|
|
329
|
+
Get API key information via an API call.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
api_key_id: ID of the API key to retrieve
|
|
333
|
+
server_url: Optional server URL (defaults to AIQA_SERVER_URL env var)
|
|
334
|
+
api_key: Optional API key (defaults to AIQA_API_KEY env var)
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
ApiKey object as a dictionary
|
|
338
|
+
"""
|
|
339
|
+
url = get_server_url(server_url)
|
|
340
|
+
key = get_api_key(api_key)
|
|
341
|
+
headers = build_headers(key)
|
|
342
|
+
|
|
343
|
+
response = requests.get(
|
|
344
|
+
f"{url}/api-key/{api_key_id}",
|
|
345
|
+
headers=headers,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
if not response.ok:
|
|
349
|
+
raise Exception(format_http_error(response, "get api key info"))
|
|
350
|
+
|
|
351
|
+
return response.json()
|
aiqa/constants.py
CHANGED
|
@@ -3,6 +3,6 @@ Constants used across the AIQA client package.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
AIQA_TRACER_NAME = "aiqa-tracer"
|
|
6
|
-
VERSION = "0.
|
|
6
|
+
VERSION = "0.5.2" # automatically updated by set-version-json.sh
|
|
7
7
|
|
|
8
8
|
LOG_TAG = "AIQA" # Used in all logging output to identify AIQA messages
|
aiqa/http_utils.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Shared HTTP utilities for AIQA client.
|
|
3
3
|
Provides common functions for building headers, handling errors, and accessing environment variables.
|
|
4
|
+
Supports AIQA-specific env vars (AIQA_SERVER_URL, AIQA_API_KEY) with fallback to OTLP standard vars.
|
|
4
5
|
"""
|
|
5
6
|
|
|
6
7
|
import os
|
|
@@ -11,17 +12,46 @@ def build_headers(api_key: Optional[str] = None) -> Dict[str, str]:
|
|
|
11
12
|
"""
|
|
12
13
|
Build HTTP headers for AIQA API requests.
|
|
13
14
|
|
|
15
|
+
Checks AIQA_API_KEY first, then falls back to OTEL_EXPORTER_OTLP_HEADERS if not set.
|
|
16
|
+
|
|
14
17
|
Args:
|
|
15
|
-
api_key: Optional API key. If not provided, will try to get from AIQA_API_KEY env var
|
|
18
|
+
api_key: Optional API key. If not provided, will try to get from AIQA_API_KEY env var,
|
|
19
|
+
then from OTEL_EXPORTER_OTLP_HEADERS.
|
|
16
20
|
|
|
17
21
|
Returns:
|
|
18
|
-
Dictionary with Content-Type and optionally Authorization header.
|
|
22
|
+
Dictionary with Content-Type, Accept-Encoding, and optionally Authorization header.
|
|
19
23
|
"""
|
|
20
|
-
headers = {
|
|
24
|
+
headers = {
|
|
25
|
+
"Content-Type": "application/json",
|
|
26
|
+
"Accept-Encoding": "gzip, deflate, br", # Request compression (aiohttp handles decompression automatically)
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
# Check parameter first
|
|
21
30
|
if api_key:
|
|
22
31
|
headers["Authorization"] = f"ApiKey {api_key}"
|
|
23
|
-
|
|
24
|
-
|
|
32
|
+
return headers
|
|
33
|
+
|
|
34
|
+
# Check AIQA_API_KEY env var
|
|
35
|
+
aiqa_api_key = os.getenv("AIQA_API_KEY")
|
|
36
|
+
if aiqa_api_key:
|
|
37
|
+
headers["Authorization"] = f"ApiKey {aiqa_api_key}"
|
|
38
|
+
return headers
|
|
39
|
+
|
|
40
|
+
# Fallback to OTLP headers (format: "key1=value1,key2=value2")
|
|
41
|
+
otlp_headers = os.getenv("OTEL_EXPORTER_OTLP_HEADERS")
|
|
42
|
+
if otlp_headers:
|
|
43
|
+
# Parse comma-separated key=value pairs
|
|
44
|
+
for header_pair in otlp_headers.split(","):
|
|
45
|
+
header_pair = header_pair.strip()
|
|
46
|
+
if "=" in header_pair:
|
|
47
|
+
key, value = header_pair.split("=", 1)
|
|
48
|
+
key = key.strip()
|
|
49
|
+
value = value.strip()
|
|
50
|
+
if key.lower() == "authorization":
|
|
51
|
+
headers["Authorization"] = value
|
|
52
|
+
else:
|
|
53
|
+
headers[key] = value
|
|
54
|
+
|
|
25
55
|
return headers
|
|
26
56
|
|
|
27
57
|
|
|
@@ -29,27 +59,71 @@ def get_server_url(server_url: Optional[str] = None) -> str:
|
|
|
29
59
|
"""
|
|
30
60
|
Get server URL from parameter or environment variable, with trailing slash removed.
|
|
31
61
|
|
|
62
|
+
Checks AIQA_SERVER_URL first, then falls back to OTEL_EXPORTER_OTLP_ENDPOINT if not set.
|
|
63
|
+
|
|
32
64
|
Args:
|
|
33
|
-
server_url: Optional server URL. If not provided, will get from AIQA_SERVER_URL env var
|
|
65
|
+
server_url: Optional server URL. If not provided, will get from AIQA_SERVER_URL env var,
|
|
66
|
+
then from OTEL_EXPORTER_OTLP_ENDPOINT.
|
|
34
67
|
|
|
35
68
|
Returns:
|
|
36
|
-
Server URL with trailing slash removed
|
|
69
|
+
Server URL with trailing slash removed. Defaults to https://server-aiqa.winterwell.com if not set.
|
|
37
70
|
"""
|
|
38
|
-
|
|
39
|
-
|
|
71
|
+
# Check parameter first
|
|
72
|
+
if server_url:
|
|
73
|
+
return server_url.rstrip("/")
|
|
74
|
+
|
|
75
|
+
# Check AIQA_SERVER_URL env var
|
|
76
|
+
url = os.getenv("AIQA_SERVER_URL")
|
|
77
|
+
if url:
|
|
78
|
+
return url.rstrip("/")
|
|
79
|
+
|
|
80
|
+
# Fallback to OTLP endpoint
|
|
81
|
+
url = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
|
|
82
|
+
if url:
|
|
83
|
+
return url.rstrip("/")
|
|
84
|
+
|
|
85
|
+
# Default fallback
|
|
86
|
+
return "https://server-aiqa.winterwell.com"
|
|
40
87
|
|
|
41
88
|
|
|
42
89
|
def get_api_key(api_key: Optional[str] = None) -> str:
|
|
43
90
|
"""
|
|
44
91
|
Get API key from parameter or environment variable.
|
|
45
92
|
|
|
93
|
+
Checks AIQA_API_KEY first, then falls back to OTEL_EXPORTER_OTLP_HEADERS if not set.
|
|
94
|
+
|
|
46
95
|
Args:
|
|
47
|
-
api_key: Optional API key. If not provided, will get from AIQA_API_KEY env var
|
|
96
|
+
api_key: Optional API key. If not provided, will get from AIQA_API_KEY env var,
|
|
97
|
+
then from OTEL_EXPORTER_OTLP_HEADERS (looking for Authorization header).
|
|
48
98
|
|
|
49
99
|
Returns:
|
|
50
100
|
API key or empty string if not set.
|
|
51
101
|
"""
|
|
52
|
-
|
|
102
|
+
# Check parameter first
|
|
103
|
+
if api_key:
|
|
104
|
+
return api_key
|
|
105
|
+
|
|
106
|
+
# Check AIQA_API_KEY env var
|
|
107
|
+
aiqa_api_key = os.getenv("AIQA_API_KEY")
|
|
108
|
+
if aiqa_api_key:
|
|
109
|
+
return aiqa_api_key
|
|
110
|
+
|
|
111
|
+
# Fallback to OTLP headers (look for Authorization header)
|
|
112
|
+
otlp_headers = os.getenv("OTEL_EXPORTER_OTLP_HEADERS")
|
|
113
|
+
if otlp_headers:
|
|
114
|
+
for header_pair in otlp_headers.split(","):
|
|
115
|
+
header_pair = header_pair.strip()
|
|
116
|
+
if "=" in header_pair:
|
|
117
|
+
key, value = header_pair.split("=", 1)
|
|
118
|
+
key = key.strip()
|
|
119
|
+
value = value.strip()
|
|
120
|
+
if key.lower() == "authorization":
|
|
121
|
+
# Extract API key from "ApiKey <key>" or just return the value
|
|
122
|
+
if value.startswith("ApiKey "):
|
|
123
|
+
return value[7:]
|
|
124
|
+
return value
|
|
125
|
+
|
|
126
|
+
return ""
|
|
53
127
|
|
|
54
128
|
|
|
55
129
|
def format_http_error(response, operation: str) -> str:
|
aiqa/tracing.py
CHANGED
|
@@ -8,13 +8,13 @@ import logging
|
|
|
8
8
|
import inspect
|
|
9
9
|
import os
|
|
10
10
|
import copy
|
|
11
|
+
import requests
|
|
11
12
|
from typing import Any, Callable, Optional, List
|
|
12
13
|
from functools import wraps
|
|
13
14
|
from opentelemetry import trace
|
|
14
15
|
from opentelemetry.sdk.trace import TracerProvider
|
|
15
16
|
from opentelemetry.trace import Status, StatusCode, SpanContext, TraceFlags
|
|
16
17
|
from opentelemetry.propagate import inject, extract
|
|
17
|
-
from .aiqa_exporter import AIQASpanExporter
|
|
18
18
|
from .client import get_aiqa_client, get_component_tag, set_component_tag as _set_component_tag, get_aiqa_tracer
|
|
19
19
|
from .constants import AIQA_TRACER_NAME, LOG_TAG
|
|
20
20
|
from .object_serialiser import serialize_for_span
|
|
@@ -31,13 +31,11 @@ async def flush_tracing() -> None:
|
|
|
31
31
|
if you want to flush immediately, e.g. before exiting a process.
|
|
32
32
|
A common use is if you are tracing unit tests or experiment runs.
|
|
33
33
|
|
|
34
|
-
This flushes
|
|
34
|
+
This flushes the BatchSpanProcessor (OTLP exporter doesn't have a separate flush method).
|
|
35
35
|
"""
|
|
36
36
|
client = get_aiqa_client()
|
|
37
37
|
if client.provider:
|
|
38
38
|
client.provider.force_flush() # Synchronous method
|
|
39
|
-
if client.exporter:
|
|
40
|
-
await client.exporter.flush()
|
|
41
39
|
|
|
42
40
|
|
|
43
41
|
# Export provider and exporter accessors for advanced usage
|
|
@@ -117,12 +115,10 @@ class TracingOptions:
|
|
|
117
115
|
self.filter_output = filter_output
|
|
118
116
|
|
|
119
117
|
|
|
120
|
-
def _prepare_input(args: tuple, kwargs: dict) -> Any:
|
|
118
|
+
def _prepare_input(args: tuple, kwargs: dict, sig: Optional[inspect.Signature] = None) -> Any:
|
|
121
119
|
"""Prepare input for span attributes.
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
For single-arg-dicts or kwargs-only, returns a shallow copy of the input data.
|
|
120
|
+
Converts args and kwargs into a unified dict structure using function signature when available.
|
|
121
|
+
Falls back to legacy behavior for functions without inspectable signatures.
|
|
126
122
|
|
|
127
123
|
Note: This function does NOT serialize values - it just structures the data.
|
|
128
124
|
Serialization happens later via serialize_for_span() to avoid double-encoding
|
|
@@ -130,6 +126,22 @@ def _prepare_input(args: tuple, kwargs: dict) -> Any:
|
|
|
130
126
|
"""
|
|
131
127
|
if not args and not kwargs:
|
|
132
128
|
return None
|
|
129
|
+
|
|
130
|
+
# Try to bind args to parameter names using function signature
|
|
131
|
+
if sig is not None:
|
|
132
|
+
try:
|
|
133
|
+
bound = sig.bind(*args, **kwargs)
|
|
134
|
+
bound.apply_defaults()
|
|
135
|
+
# Return dict of all arguments (positional args are now named)
|
|
136
|
+
result = bound.arguments.copy()
|
|
137
|
+
# Shallow copy to protect against mutating the input
|
|
138
|
+
return result
|
|
139
|
+
except (TypeError, ValueError):
|
|
140
|
+
# Binding failed (e.g., wrong number of args, *args/**kwargs issues)
|
|
141
|
+
# Fall through to legacy behavior
|
|
142
|
+
pass
|
|
143
|
+
|
|
144
|
+
# in case binding fails
|
|
133
145
|
if not kwargs:
|
|
134
146
|
if len(args) == 1:
|
|
135
147
|
arg0 = args[0]
|
|
@@ -150,15 +162,17 @@ def _prepare_and_filter_input(
|
|
|
150
162
|
kwargs: dict,
|
|
151
163
|
filter_input: Optional[Callable[[Any], Any]],
|
|
152
164
|
ignore_input: Optional[List[str]],
|
|
165
|
+
sig: Optional[inspect.Signature] = None,
|
|
153
166
|
) -> Any:
|
|
154
167
|
"""
|
|
155
168
|
Prepare and filter input for span attributes - applies the user's filter_input and ignore_input.
|
|
156
|
-
|
|
169
|
+
Converts all args to a dict using function signature when available.
|
|
157
170
|
"""
|
|
158
171
|
# Handle "self" in ignore_input by skipping the first argument
|
|
159
172
|
filtered_args = args
|
|
160
173
|
filtered_kwargs = kwargs.copy() if kwargs else {}
|
|
161
174
|
filtered_ignore_input = ignore_input
|
|
175
|
+
filtered_sig = sig
|
|
162
176
|
if ignore_input and "self" in ignore_input:
|
|
163
177
|
# Remove "self" from ignore_input list (we'll handle it specially)
|
|
164
178
|
filtered_ignore_input = [key for key in ignore_input if key != "self"]
|
|
@@ -168,8 +182,14 @@ def _prepare_and_filter_input(
|
|
|
168
182
|
# Also remove "self" from kwargs if present
|
|
169
183
|
if "self" in filtered_kwargs:
|
|
170
184
|
del filtered_kwargs["self"]
|
|
171
|
-
|
|
172
|
-
|
|
185
|
+
# Adjust signature to remove "self" parameter if present
|
|
186
|
+
# This is needed because we removed self from args, so signature binding will fail otherwise
|
|
187
|
+
if filtered_sig is not None:
|
|
188
|
+
params = list(filtered_sig.parameters.values())
|
|
189
|
+
if params and params[0].name == "self":
|
|
190
|
+
filtered_sig = filtered_sig.replace(parameters=params[1:])
|
|
191
|
+
# turn args, kwargs into one "nice" object (now always a dict when signature is available)
|
|
192
|
+
input_data = _prepare_input(filtered_args, filtered_kwargs, filtered_sig)
|
|
173
193
|
if filter_input and input_data is not None:
|
|
174
194
|
input_data = filter_input(input_data)
|
|
175
195
|
if filtered_ignore_input and len(filtered_ignore_input) > 0:
|
|
@@ -447,6 +467,15 @@ def WithTracing(
|
|
|
447
467
|
is_generator = inspect.isgeneratorfunction(fn)
|
|
448
468
|
is_async_generator = inspect.isasyncgenfunction(fn) if hasattr(inspect, 'isasyncgenfunction') else False
|
|
449
469
|
|
|
470
|
+
# Get function signature once at decoration time for efficient arg name resolution
|
|
471
|
+
fn_sig: Optional[inspect.Signature] = None
|
|
472
|
+
try:
|
|
473
|
+
fn_sig = inspect.signature(fn)
|
|
474
|
+
except (ValueError, TypeError):
|
|
475
|
+
# Some callables (e.g., builtins, C extensions) don't have inspectable signatures
|
|
476
|
+
# Will fall back to legacy behavior
|
|
477
|
+
pass
|
|
478
|
+
|
|
450
479
|
# Don't get tracer here - get it lazily when function is called
|
|
451
480
|
# This ensures initialization only happens when tracing is actually used
|
|
452
481
|
|
|
@@ -595,7 +624,7 @@ def WithTracing(
|
|
|
595
624
|
if is_async_generator:
|
|
596
625
|
@wraps(fn)
|
|
597
626
|
async def async_gen_traced_fn(*args, **kwargs):
|
|
598
|
-
input_data = _prepare_and_filter_input(args, kwargs, filter_input, ignore_input)
|
|
627
|
+
input_data = _prepare_and_filter_input(args, kwargs, filter_input, ignore_input, fn_sig)
|
|
599
628
|
return await _execute_generator_async(
|
|
600
629
|
lambda: fn(*args, **kwargs),
|
|
601
630
|
input_data
|
|
@@ -607,7 +636,7 @@ def WithTracing(
|
|
|
607
636
|
elif is_generator:
|
|
608
637
|
@wraps(fn)
|
|
609
638
|
def gen_traced_fn(*args, **kwargs):
|
|
610
|
-
input_data = _prepare_and_filter_input(args, kwargs, filter_input, ignore_input)
|
|
639
|
+
input_data = _prepare_and_filter_input(args, kwargs, filter_input, ignore_input, fn_sig)
|
|
611
640
|
return _execute_generator_sync(
|
|
612
641
|
lambda: fn(*args, **kwargs),
|
|
613
642
|
input_data
|
|
@@ -619,7 +648,7 @@ def WithTracing(
|
|
|
619
648
|
elif is_async:
|
|
620
649
|
@wraps(fn)
|
|
621
650
|
async def async_traced_fn(*args, **kwargs):
|
|
622
|
-
input_data = _prepare_and_filter_input(args, kwargs, filter_input, ignore_input)
|
|
651
|
+
input_data = _prepare_and_filter_input(args, kwargs, filter_input, ignore_input, fn_sig)
|
|
623
652
|
return await _execute_with_span_async(
|
|
624
653
|
lambda: fn(*args, **kwargs),
|
|
625
654
|
input_data
|
|
@@ -631,7 +660,7 @@ def WithTracing(
|
|
|
631
660
|
else:
|
|
632
661
|
@wraps(fn)
|
|
633
662
|
def sync_traced_fn(*args, **kwargs):
|
|
634
|
-
input_data = _prepare_and_filter_input(args, kwargs, filter_input, ignore_input)
|
|
663
|
+
input_data = _prepare_and_filter_input(args, kwargs, filter_input, ignore_input, fn_sig)
|
|
635
664
|
return _execute_with_span_sync(
|
|
636
665
|
lambda: fn(*args, **kwargs),
|
|
637
666
|
input_data
|
|
@@ -678,6 +707,7 @@ def get_active_span() -> Optional[trace.Span]:
|
|
|
678
707
|
|
|
679
708
|
def set_conversation_id(conversation_id: str) -> bool:
|
|
680
709
|
"""
|
|
710
|
+
Naturally a conversation might span several traces.
|
|
681
711
|
Set the gen_ai.conversation.id attribute on the active span.
|
|
682
712
|
This allows you to group multiple traces together that are part of the same conversation.
|
|
683
713
|
See https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-events/ for more details.
|
|
@@ -1027,14 +1057,12 @@ def get_span(span_id: str, organisation_id: Optional[str] = None, exclude: Optio
|
|
|
1027
1057
|
print(f"Found span: {span['name']}")
|
|
1028
1058
|
my_function(**span['input'])
|
|
1029
1059
|
"""
|
|
1030
|
-
import os
|
|
1031
|
-
import requests
|
|
1032
|
-
|
|
1033
1060
|
server_url = get_server_url()
|
|
1034
1061
|
api_key = get_api_key()
|
|
1035
1062
|
org_id = organisation_id or os.getenv("AIQA_ORGANISATION_ID", "")
|
|
1036
1063
|
|
|
1037
|
-
if not
|
|
1064
|
+
# Check if server_url is the default (meaning AIQA_SERVER_URL was not set)
|
|
1065
|
+
if not os.getenv("AIQA_SERVER_URL"):
|
|
1038
1066
|
raise ValueError("AIQA_SERVER_URL is not set. Cannot retrieve span.")
|
|
1039
1067
|
if not org_id:
|
|
1040
1068
|
raise ValueError("Organisation ID is required. Provide it as parameter or set AIQA_ORGANISATION_ID environment variable.")
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
aiqa/__init__.py,sha256=V1VLfpxU_BXfkvKyhGckZsPYF43NJhoXeVX6FkeHr9g,1721
|
|
2
|
+
aiqa/client.py,sha256=Ba3v-voBlTSCr-RU88INLXsF_5vqp42QiQWCFciSJbU,12542
|
|
3
|
+
aiqa/constants.py,sha256=tZuh7XvKs6hFvWc-YnQ5Na6uogJMsRrMy-rWOauvcIA,226
|
|
4
|
+
aiqa/experiment_runner.py,sha256=XAZsjVP70UH_QTk5ANSOQYAhmozuGXwKB5qWWHs-zeE,11186
|
|
5
|
+
aiqa/http_utils.py,sha256=OIB4tRI2TiDl4VKDmtbLWg9Q7TicMBeL7scLYEhVPXI,4944
|
|
6
|
+
aiqa/object_serialiser.py,sha256=DBv7EyXIwfwjwXHDsIwdZNFmQffRb5fKAE0r8qhoqgc,16958
|
|
7
|
+
aiqa/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
aiqa/tracing.py,sha256=gdmtpvBVbvc_HrJHgtr78_XH9sIWRjUoEkXuCuNmuc0,45662
|
|
9
|
+
aiqa/tracing_llm_utils.py,sha256=rNx6v6Wh_Mhv-_DPU9_aWS7YQcO46oiv0YPdBK1KVL8,9338
|
|
10
|
+
aiqa_client-0.5.2.dist-info/licenses/LICENSE.txt,sha256=kIzkzLuzG0HHaWYm4F4W5FeJ1Yxut3Ec6bhLWyw798A,1062
|
|
11
|
+
aiqa_client-0.5.2.dist-info/METADATA,sha256=xMaQSnI3AiNE6lYs2vM6BV9VxQWMHXyDoIl6JXwdi3I,7705
|
|
12
|
+
aiqa_client-0.5.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
+
aiqa_client-0.5.2.dist-info/top_level.txt,sha256=nwcsuVVSuWu27iLxZd4n1evVzv1W6FVTrSnCXCc-NQs,5
|
|
14
|
+
aiqa_client-0.5.2.dist-info/RECORD,,
|
aiqa/aiqa_exporter.py
DELETED
|
@@ -1,772 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
OpenTelemetry span exporter that sends spans to the AIQA server API.
|
|
3
|
-
Buffers spans and flushes them periodically or on shutdown. Thread-safe.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import os
|
|
7
|
-
import json
|
|
8
|
-
import logging
|
|
9
|
-
import threading
|
|
10
|
-
import time
|
|
11
|
-
import io
|
|
12
|
-
import asyncio
|
|
13
|
-
from typing import List, Dict, Any, Optional
|
|
14
|
-
from opentelemetry.sdk.trace import ReadableSpan
|
|
15
|
-
from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult
|
|
16
|
-
|
|
17
|
-
from .constants import AIQA_TRACER_NAME, VERSION, LOG_TAG
|
|
18
|
-
from .http_utils import get_server_url, get_api_key, build_headers
|
|
19
|
-
from .object_serialiser import toNumber
|
|
20
|
-
|
|
21
|
-
logger = logging.getLogger(LOG_TAG)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class AIQASpanExporter(SpanExporter):
|
|
25
|
-
"""
|
|
26
|
-
Exports spans to AIQA server. Buffers spans and auto-flushes every flush_interval_seconds.
|
|
27
|
-
Call shutdown() before process exit to flush remaining spans.
|
|
28
|
-
"""
|
|
29
|
-
|
|
30
|
-
def __init__(
|
|
31
|
-
self,
|
|
32
|
-
server_url: Optional[str] = None,
|
|
33
|
-
api_key: Optional[str] = None,
|
|
34
|
-
flush_interval_seconds: float = 5.0,
|
|
35
|
-
max_batch_size_bytes: int = 5 * 1024 * 1024, # 5MB default
|
|
36
|
-
max_buffer_spans: Optional[int] = None, # Maximum spans to buffer (prevents unbounded growth)
|
|
37
|
-
max_buffer_size_bytes: Optional[int] = None, # Maximum buffer size in bytes (prevents unbounded memory growth)
|
|
38
|
-
startup_delay_seconds: Optional[float] = None,
|
|
39
|
-
):
|
|
40
|
-
"""
|
|
41
|
-
Initialize the AIQA span exporter.
|
|
42
|
-
|
|
43
|
-
Args:
|
|
44
|
-
server_url: URL of the AIQA server (defaults to AIQA_SERVER_URL env var)
|
|
45
|
-
api_key: API key for authentication (defaults to AIQA_API_KEY env var)
|
|
46
|
-
flush_interval_seconds: How often to flush spans to the server
|
|
47
|
-
max_batch_size_bytes: Maximum size of a single batch in bytes (default: 5mb)
|
|
48
|
-
max_buffer_spans: Maximum spans to buffer (prevents unbounded growth).
|
|
49
|
-
Defaults to 10000, or AIQA_MAX_BUFFER_SPANS env var if set.
|
|
50
|
-
max_buffer_size_bytes: Maximum total buffer size in bytes (prevents unbounded memory growth).
|
|
51
|
-
Defaults to None (no limit), or AIQA_MAX_BUFFER_SIZE_BYTES env var if set.
|
|
52
|
-
startup_delay_seconds: Delay before starting auto-flush (default: 10s, or AIQA_STARTUP_DELAY_SECONDS env var)
|
|
53
|
-
"""
|
|
54
|
-
self._server_url = get_server_url(server_url)
|
|
55
|
-
self._api_key = get_api_key(api_key)
|
|
56
|
-
self.flush_interval_ms = flush_interval_seconds * 1000
|
|
57
|
-
self.max_batch_size_bytes = max_batch_size_bytes
|
|
58
|
-
|
|
59
|
-
# Get max_buffer_spans from parameter, environment variable, or default
|
|
60
|
-
if not max_buffer_spans:
|
|
61
|
-
max_buffer_spans = toNumber(os.getenv("AIQA_MAX_BUFFER_SPANS")) or 10000
|
|
62
|
-
self.max_buffer_spans = max_buffer_spans
|
|
63
|
-
|
|
64
|
-
# Get max_buffer_size_bytes from parameter, environment variable, or default
|
|
65
|
-
if not max_buffer_size_bytes:
|
|
66
|
-
max_buffer_size_bytes = toNumber(os.getenv("AIQA_MAX_BUFFER_SIZE_BYTES")) or toNumber("100m")
|
|
67
|
-
self.max_buffer_size_bytes = max_buffer_size_bytes
|
|
68
|
-
|
|
69
|
-
# Get startup delay from parameter or environment variable (default: 10s)
|
|
70
|
-
if startup_delay_seconds is None:
|
|
71
|
-
env_delay = os.getenv("AIQA_STARTUP_DELAY_SECONDS")
|
|
72
|
-
if env_delay:
|
|
73
|
-
try:
|
|
74
|
-
startup_delay_seconds = float(env_delay)
|
|
75
|
-
except ValueError:
|
|
76
|
-
logger.warning(f"Invalid AIQA_STARTUP_DELAY_SECONDS value '{env_delay}', using default 10.0")
|
|
77
|
-
startup_delay_seconds = 10.0
|
|
78
|
-
else:
|
|
79
|
-
startup_delay_seconds = 10.0
|
|
80
|
-
self.startup_delay_seconds = startup_delay_seconds
|
|
81
|
-
|
|
82
|
-
self.buffer: List[Dict[str, Any]] = []
|
|
83
|
-
self.buffer_span_keys: set = set() # Track (traceId, spanId) tuples to prevent duplicates (Python 3.8 compatible)
|
|
84
|
-
self.buffer_size_bytes: int = 0 # Track total size of buffered spans in bytes
|
|
85
|
-
# Cache span sizes to avoid recalculation (maps span_key -> size_bytes)
|
|
86
|
-
# Limited to max_buffer_spans * 2 to prevent unbounded growth
|
|
87
|
-
self._span_size_cache: Dict[tuple, int] = {}
|
|
88
|
-
self._max_cache_size = self.max_buffer_spans * 2 # Allow cache to be 2x buffer size
|
|
89
|
-
self.buffer_lock = threading.Lock()
|
|
90
|
-
self.flush_lock = threading.Lock()
|
|
91
|
-
# shutdown_requested is only set once (in shutdown()) and read many times
|
|
92
|
-
# No lock needed: worst case is reading stale False, which is acceptable
|
|
93
|
-
self.shutdown_requested = False
|
|
94
|
-
self.flush_timer: Optional[threading.Thread] = None
|
|
95
|
-
self._auto_flush_started = False
|
|
96
|
-
self._auto_flush_lock = threading.Lock() # Lock for lazy thread creation
|
|
97
|
-
|
|
98
|
-
logger.info(f"Initializing AIQASpanExporter: server_url={self._server_url or 'not set'}, "
|
|
99
|
-
f"flush_interval={flush_interval_seconds}s, startup_delay={startup_delay_seconds}s"
|
|
100
|
-
)
|
|
101
|
-
# Don't start thread immediately - start lazily on first export to avoid startup issues
|
|
102
|
-
|
|
103
|
-
def export(self, spans: List[ReadableSpan]) -> SpanExportResult:
|
|
104
|
-
"""
|
|
105
|
-
Export spans to the AIQA server. Adds spans to buffer for async flushing.
|
|
106
|
-
Deduplicates spans based on (traceId, spanId) to prevent repeated exports.
|
|
107
|
-
Actual send is done by flush -> _send_spans, or shutdown -> _send_spans_sync
|
|
108
|
-
"""
|
|
109
|
-
if not spans:
|
|
110
|
-
logger.debug(f"export: called with empty spans list")
|
|
111
|
-
return SpanExportResult.SUCCESS
|
|
112
|
-
|
|
113
|
-
# Check if AIQA tracing is enabled
|
|
114
|
-
try:
|
|
115
|
-
from .client import get_aiqa_client
|
|
116
|
-
client = get_aiqa_client()
|
|
117
|
-
if not client.enabled:
|
|
118
|
-
logger.debug(f"AIQA export: skipped: tracing is disabled")
|
|
119
|
-
return SpanExportResult.SUCCESS
|
|
120
|
-
except Exception:
|
|
121
|
-
# If we can't check enabled status, proceed (fail open)
|
|
122
|
-
pass
|
|
123
|
-
|
|
124
|
-
logger.debug(f"AIQA export() to buffer called with {len(spans)} spans")
|
|
125
|
-
|
|
126
|
-
# Lazy initialization: start auto-flush thread on first export
|
|
127
|
-
# This avoids thread creation during initialization, which can cause issues in ECS deployments
|
|
128
|
-
self._ensure_auto_flush_started()
|
|
129
|
-
|
|
130
|
-
# Serialize and add to buffer, deduplicating by (traceId, spanId)
|
|
131
|
-
with self.buffer_lock:
|
|
132
|
-
serialized_spans = []
|
|
133
|
-
serialized_sizes = [] # Track sizes of serialized spans
|
|
134
|
-
duplicates_count = 0
|
|
135
|
-
dropped_count = 0
|
|
136
|
-
dropped_memory_count = 0
|
|
137
|
-
flush_in_progress = self.flush_lock.locked()
|
|
138
|
-
|
|
139
|
-
for span in spans:
|
|
140
|
-
# Check if buffer is full by span count (prevent unbounded growth)
|
|
141
|
-
if len(self.buffer) >= self.max_buffer_spans:
|
|
142
|
-
if flush_in_progress:
|
|
143
|
-
# Flush in progress, drop this span
|
|
144
|
-
dropped_count += 1
|
|
145
|
-
continue
|
|
146
|
-
# Flush not in progress, will trigger flush after adding spans
|
|
147
|
-
# Continue processing remaining spans to add them before flush
|
|
148
|
-
|
|
149
|
-
serialized = self._serialize_span(span)
|
|
150
|
-
span_key = (serialized["traceId"], serialized["spanId"])
|
|
151
|
-
if span_key not in self.buffer_span_keys:
|
|
152
|
-
# Estimate size of this span when serialized (cache for later use)
|
|
153
|
-
span_size = self._get_span_size(span_key, serialized)
|
|
154
|
-
|
|
155
|
-
# Check if buffer is full by memory size (prevent unbounded memory growth)
|
|
156
|
-
if self.max_buffer_size_bytes is not None and self.buffer_size_bytes + span_size > self.max_buffer_size_bytes:
|
|
157
|
-
if flush_in_progress:
|
|
158
|
-
# Flush in progress, drop this span
|
|
159
|
-
# Don't cache size for dropped spans to prevent memory leak
|
|
160
|
-
dropped_memory_count += 1
|
|
161
|
-
continue
|
|
162
|
-
# Flush not in progress, will trigger flush after adding spans
|
|
163
|
-
# Continue processing remaining spans to add them before flush
|
|
164
|
-
|
|
165
|
-
serialized_spans.append(serialized)
|
|
166
|
-
serialized_sizes.append(span_size)
|
|
167
|
-
self.buffer_span_keys.add(span_key)
|
|
168
|
-
else:
|
|
169
|
-
duplicates_count += 1
|
|
170
|
-
logger.debug(f"export: skipping duplicate span: traceId={serialized['traceId']}, spanId={serialized['spanId']}")
|
|
171
|
-
|
|
172
|
-
# Add spans and update buffer size
|
|
173
|
-
self.buffer.extend(serialized_spans)
|
|
174
|
-
self.buffer_size_bytes += sum(serialized_sizes)
|
|
175
|
-
buffer_size = len(self.buffer)
|
|
176
|
-
|
|
177
|
-
# Check if thresholds are reached after adding spans
|
|
178
|
-
threshold_reached = self._check_thresholds_reached()
|
|
179
|
-
|
|
180
|
-
if dropped_count > 0:
|
|
181
|
-
logger.warning(f"WARNING: Buffer full ({buffer_size} spans), dropped {dropped_count} span(s) (flush in progress). "
|
|
182
|
-
f"Consider increasing max_buffer_spans or fixing server connectivity."
|
|
183
|
-
)
|
|
184
|
-
if dropped_memory_count > 0:
|
|
185
|
-
logger.warning(f"WARNING: Buffer memory limit reached ({self.buffer_size_bytes} bytes / {self.max_buffer_size_bytes} bytes), "
|
|
186
|
-
f"dropped {dropped_memory_count} span(s) (flush in progress). "
|
|
187
|
-
f"Consider increasing AIQA_MAX_BUFFER_SIZE_BYTES or fixing server connectivity."
|
|
188
|
-
)
|
|
189
|
-
|
|
190
|
-
# Trigger immediate flush if threshold reached and flush not in progress
|
|
191
|
-
if threshold_reached and not flush_in_progress:
|
|
192
|
-
logger.info(f"Buffer threshold reached ({buffer_size} spans, {self.buffer_size_bytes} bytes), triggering immediate flush")
|
|
193
|
-
self._trigger_immediate_flush()
|
|
194
|
-
|
|
195
|
-
if duplicates_count > 0:
|
|
196
|
-
logger.debug(f"export() added {len(serialized_spans)} span(s) to buffer, skipped {duplicates_count} duplicate(s). "
|
|
197
|
-
f"Total buffered: {buffer_size}"
|
|
198
|
-
)
|
|
199
|
-
else:
|
|
200
|
-
logger.debug(f"export() added {len(spans)} span(s) to buffer. "
|
|
201
|
-
f"Total buffered: {buffer_size}"
|
|
202
|
-
)
|
|
203
|
-
|
|
204
|
-
return SpanExportResult.SUCCESS
|
|
205
|
-
|
|
206
|
-
def _serialize_span(self, span: ReadableSpan) -> Dict[str, Any]:
|
|
207
|
-
"""Convert ReadableSpan to a serializable format."""
|
|
208
|
-
span_context = span.get_span_context()
|
|
209
|
-
|
|
210
|
-
# Get parent span ID
|
|
211
|
-
parent_span_id = None
|
|
212
|
-
if hasattr(span, "parent") and span.parent:
|
|
213
|
-
parent_span_id = format(span.parent.span_id, "016x")
|
|
214
|
-
elif hasattr(span, "parent_span_id") and span.parent_span_id:
|
|
215
|
-
parent_span_id = format(span.parent_span_id, "016x")
|
|
216
|
-
|
|
217
|
-
# Get span kind (handle both enum and int)
|
|
218
|
-
span_kind = span.kind
|
|
219
|
-
if hasattr(span_kind, "value"):
|
|
220
|
-
span_kind = span_kind.value
|
|
221
|
-
|
|
222
|
-
# Get status code (handle both enum and int)
|
|
223
|
-
status_code = span.status.status_code
|
|
224
|
-
if hasattr(status_code, "value"):
|
|
225
|
-
status_code = status_code.value
|
|
226
|
-
|
|
227
|
-
return {
|
|
228
|
-
"name": span.name,
|
|
229
|
-
"kind": span_kind,
|
|
230
|
-
"parentSpanId": parent_span_id,
|
|
231
|
-
"startTime": self._time_to_tuple(span.start_time),
|
|
232
|
-
"endTime": self._time_to_tuple(span.end_time) if span.end_time else None,
|
|
233
|
-
"status": {
|
|
234
|
-
"code": status_code,
|
|
235
|
-
"message": getattr(span.status, "description", None),
|
|
236
|
-
},
|
|
237
|
-
"attributes": dict(span.attributes) if span.attributes else {},
|
|
238
|
-
"links": [
|
|
239
|
-
{
|
|
240
|
-
"context": {
|
|
241
|
-
"traceId": format(link.context.trace_id, "032x"),
|
|
242
|
-
"spanId": format(link.context.span_id, "016x"),
|
|
243
|
-
},
|
|
244
|
-
"attributes": dict(link.attributes) if link.attributes else {},
|
|
245
|
-
}
|
|
246
|
-
for link in (span.links or [])
|
|
247
|
-
],
|
|
248
|
-
"events": [
|
|
249
|
-
{
|
|
250
|
-
"name": event.name,
|
|
251
|
-
"time": self._time_to_tuple(event.timestamp),
|
|
252
|
-
"attributes": dict(event.attributes) if event.attributes else {},
|
|
253
|
-
}
|
|
254
|
-
for event in (span.events or [])
|
|
255
|
-
],
|
|
256
|
-
"resource": {
|
|
257
|
-
"attributes": dict(span.resource.attributes) if span.resource.attributes else {},
|
|
258
|
-
},
|
|
259
|
-
"traceId": format(span_context.trace_id, "032x"),
|
|
260
|
-
"spanId": format(span_context.span_id, "016x"),
|
|
261
|
-
"traceFlags": span_context.trace_flags,
|
|
262
|
-
"duration": self._time_to_tuple(span.end_time - span.start_time) if span.end_time else None,
|
|
263
|
-
"ended": span.end_time is not None,
|
|
264
|
-
"instrumentationLibrary": self._get_instrumentation_library(span),
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
def _get_instrumentation_library(self, span: ReadableSpan) -> Dict[str, Any]:
|
|
268
|
-
"""
|
|
269
|
-
Get instrumentation library information from the span: just use the package version.
|
|
270
|
-
"""
|
|
271
|
-
return {
|
|
272
|
-
"name": AIQA_TRACER_NAME,
|
|
273
|
-
"version": VERSION,
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
def _time_to_tuple(self, nanoseconds: int) -> tuple:
|
|
277
|
-
"""Convert nanoseconds to (seconds, nanoseconds) tuple."""
|
|
278
|
-
seconds = int(nanoseconds // 1_000_000_000)
|
|
279
|
-
nanos = int(nanoseconds % 1_000_000_000)
|
|
280
|
-
return (seconds, nanos)
|
|
281
|
-
|
|
282
|
-
def _get_span_size(self, span_key: tuple, serialized: Dict[str, Any]) -> int:
|
|
283
|
-
"""
|
|
284
|
-
Get span size from cache or calculate and cache it.
|
|
285
|
-
Thread-safe when called within buffer_lock.
|
|
286
|
-
Limits cache size to prevent unbounded memory growth.
|
|
287
|
-
"""
|
|
288
|
-
if span_key in self._span_size_cache:
|
|
289
|
-
return self._span_size_cache[span_key]
|
|
290
|
-
span_json = json.dumps(serialized)
|
|
291
|
-
span_size = len(span_json.encode('utf-8'))
|
|
292
|
-
# Only cache if we have valid keys and cache isn't too large
|
|
293
|
-
if span_key[0] and span_key[1] and len(self._span_size_cache) < self._max_cache_size:
|
|
294
|
-
self._span_size_cache[span_key] = span_size
|
|
295
|
-
return span_size
|
|
296
|
-
|
|
297
|
-
def _check_thresholds_reached(self) -> bool:
|
|
298
|
-
"""Check if buffer thresholds are reached. Must be called within buffer_lock."""
|
|
299
|
-
if len(self.buffer) >= self.max_buffer_spans:
|
|
300
|
-
return True
|
|
301
|
-
if self.max_buffer_size_bytes is not None and self.buffer_size_bytes >= self.max_buffer_size_bytes:
|
|
302
|
-
return True
|
|
303
|
-
return False
|
|
304
|
-
|
|
305
|
-
def _build_request_headers(self) -> Dict[str, str]:
|
|
306
|
-
"""Build HTTP headers for span requests."""
|
|
307
|
-
return build_headers(self._api_key)
|
|
308
|
-
|
|
309
|
-
def _get_span_url(self) -> str:
|
|
310
|
-
return f"{self._server_url}/span"
|
|
311
|
-
|
|
312
|
-
def _is_interpreter_shutdown_error(self, error: Exception) -> bool:
|
|
313
|
-
"""Check if error is due to interpreter shutdown."""
|
|
314
|
-
error_str = str(error)
|
|
315
|
-
return "cannot schedule new futures after" in error_str or "interpreter shutdown" in error_str
|
|
316
|
-
|
|
317
|
-
def _extract_spans_from_buffer(self) -> List[Dict[str, Any]]:
|
|
318
|
-
"""Extract spans from buffer (thread-safe). Returns copy of buffer."""
|
|
319
|
-
with self.buffer_lock:
|
|
320
|
-
return self.buffer[:]
|
|
321
|
-
|
|
322
|
-
def _extract_and_remove_spans_from_buffer(self) -> List[Dict[str, Any]]:
|
|
323
|
-
"""
|
|
324
|
-
Atomically extract and remove all spans from buffer (thread-safe).
|
|
325
|
-
Returns the extracted spans. This prevents race conditions where spans
|
|
326
|
-
are added between extraction and clearing.
|
|
327
|
-
Note: Does NOT clear buffer_span_keys - that should be done after successful send
|
|
328
|
-
to avoid unnecessary clearing/rebuilding on failures.
|
|
329
|
-
Also resets buffer_size_bytes to 0.
|
|
330
|
-
"""
|
|
331
|
-
with self.buffer_lock:
|
|
332
|
-
spans = self.buffer[:]
|
|
333
|
-
self.buffer.clear()
|
|
334
|
-
self.buffer_size_bytes = 0
|
|
335
|
-
return spans
|
|
336
|
-
|
|
337
|
-
def _remove_span_keys_from_tracking(self, spans: List[Dict[str, Any]]) -> None:
|
|
338
|
-
"""
|
|
339
|
-
Remove span keys from tracking set and size cache (thread-safe). Called after successful send.
|
|
340
|
-
"""
|
|
341
|
-
with self.buffer_lock:
|
|
342
|
-
for span in spans:
|
|
343
|
-
span_key = (span["traceId"], span["spanId"])
|
|
344
|
-
self.buffer_span_keys.discard(span_key)
|
|
345
|
-
# Also remove from size cache to free memory
|
|
346
|
-
self._span_size_cache.pop(span_key, None)
|
|
347
|
-
|
|
348
|
-
def _prepend_spans_to_buffer(self, spans: List[Dict[str, Any]]) -> None:
|
|
349
|
-
"""
|
|
350
|
-
Prepend spans back to buffer (thread-safe). Used to restore spans
|
|
351
|
-
if sending fails. Rebuilds the span keys tracking set and buffer size.
|
|
352
|
-
Uses cached sizes when available to avoid re-serialization.
|
|
353
|
-
"""
|
|
354
|
-
with self.buffer_lock:
|
|
355
|
-
self.buffer[:0] = spans
|
|
356
|
-
# Rebuild span keys set from current buffer contents
|
|
357
|
-
self.buffer_span_keys = {(span["traceId"], span["spanId"]) for span in self.buffer}
|
|
358
|
-
# Recalculate buffer size using cache when available
|
|
359
|
-
total_size = 0
|
|
360
|
-
for span in self.buffer:
|
|
361
|
-
span_key = (span.get("traceId"), span.get("spanId"))
|
|
362
|
-
total_size += self._get_span_size(span_key, span)
|
|
363
|
-
self.buffer_size_bytes = total_size
|
|
364
|
-
|
|
365
|
-
def _clear_buffer(self) -> None:
|
|
366
|
-
"""Clear the buffer (thread-safe)."""
|
|
367
|
-
with self.buffer_lock:
|
|
368
|
-
self.buffer.clear()
|
|
369
|
-
self.buffer_span_keys.clear()
|
|
370
|
-
self.buffer_size_bytes = 0
|
|
371
|
-
self._span_size_cache.clear()
|
|
372
|
-
|
|
373
|
-
def _split_into_batches(self, spans: List[Dict[str, Any]]) -> List[List[Dict[str, Any]]]:
|
|
374
|
-
"""
|
|
375
|
-
Split spans into batches based on max_batch_size_bytes.
|
|
376
|
-
Each batch will be as large as possible without exceeding the limit.
|
|
377
|
-
If a single span exceeds the limit, it will be sent in its own batch with a warning.
|
|
378
|
-
"""
|
|
379
|
-
if not spans:
|
|
380
|
-
return []
|
|
381
|
-
|
|
382
|
-
batches = []
|
|
383
|
-
current_batch = []
|
|
384
|
-
current_batch_size = 0
|
|
385
|
-
|
|
386
|
-
for span in spans:
|
|
387
|
-
# Get size from cache if available, otherwise calculate it
|
|
388
|
-
span_key = (span.get("traceId"), span.get("spanId"))
|
|
389
|
-
span_size = self._get_span_size(span_key, span)
|
|
390
|
-
|
|
391
|
-
# Check if this single span exceeds the limit
|
|
392
|
-
if span_size > self.max_batch_size_bytes:
|
|
393
|
-
# If we have a current batch, save it first
|
|
394
|
-
if current_batch:
|
|
395
|
-
batches.append(current_batch)
|
|
396
|
-
current_batch = []
|
|
397
|
-
current_batch_size = 0
|
|
398
|
-
|
|
399
|
-
# Log warning about oversized span
|
|
400
|
-
span_name = span.get('name', 'unknown')
|
|
401
|
-
span_trace_id = span.get('traceId', 'unknown')
|
|
402
|
-
logger.warning(f"Span \'{span_name}' (traceId={span_trace_id}) exceeds max_batch_size_bytes "
|
|
403
|
-
f"({span_size} bytes > {self.max_batch_size_bytes} bytes). "
|
|
404
|
-
f"Will attempt to send it anyway - may fail if server/nginx limit is exceeded."
|
|
405
|
-
)
|
|
406
|
-
# Still create a batch with just this span - we'll try to send it
|
|
407
|
-
batches.append([span])
|
|
408
|
-
continue
|
|
409
|
-
|
|
410
|
-
# If adding this span would exceed the limit, start a new batch
|
|
411
|
-
if current_batch and current_batch_size + span_size > self.max_batch_size_bytes:
|
|
412
|
-
batches.append(current_batch)
|
|
413
|
-
current_batch = []
|
|
414
|
-
current_batch_size = 0
|
|
415
|
-
|
|
416
|
-
current_batch.append(span)
|
|
417
|
-
current_batch_size += span_size
|
|
418
|
-
|
|
419
|
-
# Add the last batch if it has any spans
|
|
420
|
-
if current_batch:
|
|
421
|
-
batches.append(current_batch)
|
|
422
|
-
|
|
423
|
-
return batches
|
|
424
|
-
|
|
425
|
-
async def flush(self) -> None:
|
|
426
|
-
"""
|
|
427
|
-
Flush buffered spans to the server. Thread-safe: ensures only one flush operation runs at a time.
|
|
428
|
-
Atomically extracts spans to prevent race conditions with concurrent export() calls.
|
|
429
|
-
|
|
430
|
-
Lock ordering: flush_lock -> buffer_lock (must be consistent to avoid deadlocks)
|
|
431
|
-
"""
|
|
432
|
-
logger.debug(f"flush: called - attempting to acquire flush lock")
|
|
433
|
-
with self.flush_lock:
|
|
434
|
-
logger.debug(f"flush() acquired flush lock")
|
|
435
|
-
# Atomically extract and remove spans to prevent race conditions
|
|
436
|
-
# where export() adds spans between extraction and clearing
|
|
437
|
-
spans_to_flush = self._extract_and_remove_spans_from_buffer()
|
|
438
|
-
logger.debug(f"flush: extracted {len(spans_to_flush)} span(s) from buffer")
|
|
439
|
-
|
|
440
|
-
if not spans_to_flush:
|
|
441
|
-
logger.debug(f"flush() completed: no spans to flush")
|
|
442
|
-
return
|
|
443
|
-
|
|
444
|
-
# Skip sending if server URL is not configured
|
|
445
|
-
if not self._server_url:
|
|
446
|
-
logger.warning(f"Skipping flush: AIQA_SERVER_URL is not set. {len(spans_to_flush)} span(s) will not be sent."
|
|
447
|
-
)
|
|
448
|
-
# Spans already removed from buffer, clear their keys to free memory
|
|
449
|
-
self._remove_span_keys_from_tracking(spans_to_flush)
|
|
450
|
-
return
|
|
451
|
-
|
|
452
|
-
# Release flush_lock before I/O to avoid blocking other flush attempts
|
|
453
|
-
# Spans are already extracted, so concurrent exports won't interfere
|
|
454
|
-
logger.info(f"flush: sending {len(spans_to_flush)} span(s) to server")
|
|
455
|
-
try:
|
|
456
|
-
await self._send_spans(spans_to_flush)
|
|
457
|
-
logger.info(f"flush() successfully sent {len(spans_to_flush)} span(s) to server")
|
|
458
|
-
# Spans already removed from buffer during extraction
|
|
459
|
-
# Now clear their keys from tracking set to free memory
|
|
460
|
-
self._remove_span_keys_from_tracking(spans_to_flush)
|
|
461
|
-
except RuntimeError as error:
|
|
462
|
-
if self._is_interpreter_shutdown_error(error):
|
|
463
|
-
if self.shutdown_requested:
|
|
464
|
-
logger.debug(f"flush: skipped due to interpreter shutdown: {error}")
|
|
465
|
-
else:
|
|
466
|
-
logger.warning(f"flush() interrupted by interpreter shutdown: {error}")
|
|
467
|
-
# Put spans back for retry with sync send during shutdown
|
|
468
|
-
self._prepend_spans_to_buffer(spans_to_flush)
|
|
469
|
-
raise
|
|
470
|
-
logger.error(f"Error flushing spans to server: {error}")
|
|
471
|
-
# Put spans back for retry
|
|
472
|
-
self._prepend_spans_to_buffer(spans_to_flush)
|
|
473
|
-
raise
|
|
474
|
-
except Exception as error:
|
|
475
|
-
logger.error(f"Error flushing spans to server: {error}")
|
|
476
|
-
# Put spans back for retry
|
|
477
|
-
self._prepend_spans_to_buffer(spans_to_flush)
|
|
478
|
-
if self.shutdown_requested:
|
|
479
|
-
raise
|
|
480
|
-
|
|
481
|
-
def _ensure_auto_flush_started(self) -> None:
|
|
482
|
-
"""Ensure auto-flush thread is started (lazy initialization). Thread-safe."""
|
|
483
|
-
# Fast path: check without lock first
|
|
484
|
-
if self._auto_flush_started or self.shutdown_requested:
|
|
485
|
-
return
|
|
486
|
-
|
|
487
|
-
# Slow path: acquire lock and double-check
|
|
488
|
-
with self._auto_flush_lock:
|
|
489
|
-
if self._auto_flush_started or self.shutdown_requested:
|
|
490
|
-
return
|
|
491
|
-
|
|
492
|
-
try:
|
|
493
|
-
self._start_auto_flush()
|
|
494
|
-
self._auto_flush_started = True
|
|
495
|
-
except Exception as e:
|
|
496
|
-
logger.error(f"Failed to start auto-flush thread: {e}", exc_info=True)
|
|
497
|
-
# Don't raise - allow spans to be buffered even if auto-flush fails
|
|
498
|
-
# They can still be flushed manually or on shutdown
|
|
499
|
-
|
|
500
|
-
def _trigger_immediate_flush(self) -> None:
|
|
501
|
-
"""
|
|
502
|
-
Trigger an immediate flush in a background thread.
|
|
503
|
-
This is called when buffer thresholds are reached and no flush is in progress.
|
|
504
|
-
"""
|
|
505
|
-
def flush_in_thread():
|
|
506
|
-
"""Run flush in a new thread with its own event loop."""
|
|
507
|
-
try:
|
|
508
|
-
loop = asyncio.new_event_loop()
|
|
509
|
-
asyncio.set_event_loop(loop)
|
|
510
|
-
try:
|
|
511
|
-
loop.run_until_complete(self.flush())
|
|
512
|
-
finally:
|
|
513
|
-
if not loop.is_closed():
|
|
514
|
-
loop.close()
|
|
515
|
-
except Exception as e:
|
|
516
|
-
logger.error(f"Error in immediate flush thread: {e}", exc_info=True)
|
|
517
|
-
|
|
518
|
-
# Start flush in background thread (daemon so it doesn't block shutdown)
|
|
519
|
-
flush_thread = threading.Thread(target=flush_in_thread, daemon=True, name="AIQA-ImmediateFlush")
|
|
520
|
-
flush_thread.start()
|
|
521
|
-
|
|
522
|
-
def _flush_worker(self) -> None:
|
|
523
|
-
"""Worker function for auto-flush thread. Runs in a separate thread with its own event loop."""
|
|
524
|
-
import asyncio
|
|
525
|
-
logger.debug(f"Auto-flush worker thread started")
|
|
526
|
-
|
|
527
|
-
# Wait for startup delay before beginning flush operations
|
|
528
|
-
# This gives the container/application time to stabilize, which helps avoid startup issues (seen with AWS ECS, Dec 2025).
|
|
529
|
-
if self.startup_delay_seconds > 0:
|
|
530
|
-
logger.info(f"Auto-flush waiting {self.startup_delay_seconds}s before first flush (startup delay)")
|
|
531
|
-
# Sleep in small increments to allow for early shutdown
|
|
532
|
-
sleep_interval = 0.5
|
|
533
|
-
remaining_delay = self.startup_delay_seconds
|
|
534
|
-
while remaining_delay > 0 and not self.shutdown_requested:
|
|
535
|
-
sleep_time = min(sleep_interval, remaining_delay)
|
|
536
|
-
time.sleep(sleep_time)
|
|
537
|
-
remaining_delay -= sleep_time
|
|
538
|
-
|
|
539
|
-
if self.shutdown_requested:
|
|
540
|
-
logger.debug(f"Auto-flush startup delay interrupted by shutdown")
|
|
541
|
-
return
|
|
542
|
-
|
|
543
|
-
logger.info(f"Auto-flush startup delay complete, beginning flush operations")
|
|
544
|
-
|
|
545
|
-
# Create event loop in this thread (isolated from main thread's event loop)
|
|
546
|
-
# This prevents interference with the main application's event loop
|
|
547
|
-
try:
|
|
548
|
-
loop = asyncio.new_event_loop()
|
|
549
|
-
asyncio.set_event_loop(loop)
|
|
550
|
-
except Exception as e:
|
|
551
|
-
logger.error(f"Failed to create event loop for auto-flush thread: {e}", exc_info=True)
|
|
552
|
-
return
|
|
553
|
-
|
|
554
|
-
# Ensure event loop is always closed, even if an exception occurs
|
|
555
|
-
try:
|
|
556
|
-
cycle_count = 0
|
|
557
|
-
while not self.shutdown_requested:
|
|
558
|
-
cycle_count += 1
|
|
559
|
-
logger.debug(f"Auto-flush cycle #{cycle_count} starting")
|
|
560
|
-
try:
|
|
561
|
-
loop.run_until_complete(self.flush())
|
|
562
|
-
logger.debug(f"Auto-flush cycle #{cycle_count} completed, sleeping {self.flush_interval_ms / 1000.0}s")
|
|
563
|
-
except Exception as e:
|
|
564
|
-
logger.error(f"Error in auto-flush cycle #{cycle_count}: {e}")
|
|
565
|
-
logger.debug(f"Auto-flush cycle #{cycle_count} error handled, sleeping {self.flush_interval_ms / 1000.0}s")
|
|
566
|
-
|
|
567
|
-
# Sleep after each cycle (including errors) to avoid tight loops
|
|
568
|
-
if not self.shutdown_requested:
|
|
569
|
-
time.sleep(self.flush_interval_ms / 1000.0)
|
|
570
|
-
|
|
571
|
-
logger.info(f"Auto-flush worker thread stopping (shutdown requested). Completed {cycle_count} cycles.")
|
|
572
|
-
# Don't do final flush here - shutdown() will handle it with synchronous send
|
|
573
|
-
# This avoids event loop shutdown issues
|
|
574
|
-
logger.debug(f"Auto-flush thread skipping final flush (will be handled by shutdown() with sync send)")
|
|
575
|
-
finally:
|
|
576
|
-
# Always close the event loop, even if an exception occurs
|
|
577
|
-
try:
|
|
578
|
-
if not loop.is_closed():
|
|
579
|
-
loop.close()
|
|
580
|
-
logger.debug(f"Auto-flush worker thread event loop closed")
|
|
581
|
-
except Exception:
|
|
582
|
-
pass # Ignore errors during cleanup
|
|
583
|
-
|
|
584
|
-
def _start_auto_flush(self) -> None:
|
|
585
|
-
"""Start the auto-flush timer with startup delay."""
|
|
586
|
-
if self.shutdown_requested:
|
|
587
|
-
logger.warning(f"_start_auto_flush() called but shutdown already requested")
|
|
588
|
-
return
|
|
589
|
-
|
|
590
|
-
logger.info(f"Starting auto-flush thread with interval {self.flush_interval_ms / 1000.0}s, "
|
|
591
|
-
f"startup delay {self.startup_delay_seconds}s"
|
|
592
|
-
)
|
|
593
|
-
|
|
594
|
-
flush_thread = threading.Thread(target=self._flush_worker, daemon=True, name="AIQA-AutoFlush")
|
|
595
|
-
flush_thread.start()
|
|
596
|
-
self.flush_timer = flush_thread
|
|
597
|
-
logger.info(f"Auto-flush thread started: {flush_thread.name} (daemon={flush_thread.daemon})")
|
|
598
|
-
|
|
599
|
-
async def _send_spans(self, spans: List[Dict[str, Any]]) -> None:
|
|
600
|
-
"""Send spans to the server API (async). Batches large payloads automatically."""
|
|
601
|
-
import aiohttp
|
|
602
|
-
|
|
603
|
-
# Split into batches if needed
|
|
604
|
-
batches = self._split_into_batches(spans)
|
|
605
|
-
if len(batches) > 1:
|
|
606
|
-
logger.info(f"_send_spans: splitting {len(spans)} spans into {len(batches)} batches")
|
|
607
|
-
|
|
608
|
-
url = self._get_span_url()
|
|
609
|
-
headers = self._build_request_headers()
|
|
610
|
-
|
|
611
|
-
if not self._api_key: # This should not happen
|
|
612
|
-
logger.error(f"_send_spans: fail - no API key provided. {len(spans)} spans lost.")
|
|
613
|
-
# Spans were already removed from buffer before calling this method. They will now get forgotten
|
|
614
|
-
return
|
|
615
|
-
|
|
616
|
-
# Use timeout to prevent hanging on unreachable servers
|
|
617
|
-
timeout = aiohttp.ClientTimeout(total=30.0, connect=10.0)
|
|
618
|
-
errors = []
|
|
619
|
-
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
620
|
-
for batch_idx, batch in enumerate(batches):
|
|
621
|
-
try:
|
|
622
|
-
logger.debug(f"_send_spans: sending batch {batch_idx + 1}/{len(batches)} with {len(batch)} spans to {url}")
|
|
623
|
-
# Pre-serialize JSON to bytes and wrap in BytesIO to avoid blocking event loop
|
|
624
|
-
json_bytes = json.dumps(batch).encode('utf-8')
|
|
625
|
-
data = io.BytesIO(json_bytes)
|
|
626
|
-
|
|
627
|
-
async with session.post(url, data=data, headers=headers) as response:
|
|
628
|
-
logger.debug(f"_send_spans: batch {batch_idx + 1} received response: status={response.status}")
|
|
629
|
-
if not response.ok:
|
|
630
|
-
error_text = await response.text()
|
|
631
|
-
error_msg = f"Failed to send batch {batch_idx + 1}/{len(batches)}: {response.status} {response.reason} - {error_text[:200]}"
|
|
632
|
-
logger.error(f"_send_spans: {error_msg}")
|
|
633
|
-
errors.append((batch_idx + 1, error_msg))
|
|
634
|
-
# Continue with other batches even if one fails
|
|
635
|
-
continue
|
|
636
|
-
logger.debug(f"_send_spans: batch {batch_idx + 1} successfully sent {len(batch)} spans")
|
|
637
|
-
except (aiohttp.ClientError, asyncio.TimeoutError) as e:
|
|
638
|
-
# Network errors and timeouts - log but don't fail completely
|
|
639
|
-
error_msg = f"Network error in batch {batch_idx + 1}: {type(e).__name__}: {e}"
|
|
640
|
-
logger.warning(f"_send_spans: {error_msg} - will retry on next flush")
|
|
641
|
-
errors.append((batch_idx + 1, error_msg))
|
|
642
|
-
# Continue with other batches
|
|
643
|
-
except RuntimeError as e:
|
|
644
|
-
if self._is_interpreter_shutdown_error(e):
|
|
645
|
-
if self.shutdown_requested:
|
|
646
|
-
logger.debug(f"_send_spans: skipped due to interpreter shutdown: {e}")
|
|
647
|
-
else:
|
|
648
|
-
logger.warning(f"_send_spans: interrupted by interpreter shutdown: {e}")
|
|
649
|
-
raise
|
|
650
|
-
error_msg = f"RuntimeError in batch {batch_idx + 1}: {type(e).__name__}: {e}"
|
|
651
|
-
logger.error(f"_send_spans: {error_msg}")
|
|
652
|
-
errors.append((batch_idx + 1, error_msg))
|
|
653
|
-
# Continue with other batches
|
|
654
|
-
except Exception as e:
|
|
655
|
-
error_msg = f"Exception in batch {batch_idx + 1}: {type(e).__name__}: {e}"
|
|
656
|
-
logger.error(f"_send_spans: {error_msg}")
|
|
657
|
-
errors.append((batch_idx + 1, error_msg))
|
|
658
|
-
# Continue with other batches
|
|
659
|
-
|
|
660
|
-
# If any batches failed, raise an exception with details
|
|
661
|
-
# Spans will be restored to buffer for retry on next flush
|
|
662
|
-
if errors:
|
|
663
|
-
error_summary = "; ".join([f"batch {idx}: {msg}" for idx, msg in errors])
|
|
664
|
-
raise Exception(f"Failed to send some spans: {error_summary}")
|
|
665
|
-
|
|
666
|
-
logger.debug(f"_send_spans: successfully sent all {len(spans)} spans in {len(batches)} batch(es)")
|
|
667
|
-
|
|
668
|
-
def _send_spans_sync(self, spans: List[Dict[str, Any]]) -> None:
|
|
669
|
-
"""Send spans to the server API (synchronous, for shutdown scenarios). Batches large payloads automatically."""
|
|
670
|
-
import requests
|
|
671
|
-
|
|
672
|
-
# Split into batches if needed
|
|
673
|
-
batches = self._split_into_batches(spans)
|
|
674
|
-
if len(batches) > 1:
|
|
675
|
-
logger.info(f"_send_spans_sync() splitting {len(spans)} spans into {len(batches)} batches")
|
|
676
|
-
|
|
677
|
-
url = self._get_span_url()
|
|
678
|
-
headers = self._build_request_headers()
|
|
679
|
-
|
|
680
|
-
if not self._api_key:
|
|
681
|
-
logger.error(f"_send_spans_sync() fail - no API key provided")
|
|
682
|
-
return
|
|
683
|
-
|
|
684
|
-
errors = []
|
|
685
|
-
for batch_idx, batch in enumerate(batches):
|
|
686
|
-
try:
|
|
687
|
-
logger.debug(f"_send_spans_sync() sending batch {batch_idx + 1}/{len(batches)} with {len(batch)} spans to {url}")
|
|
688
|
-
response = requests.post(url, json=batch, headers=headers, timeout=10.0)
|
|
689
|
-
logger.debug(f"_send_spans_sync() batch {batch_idx + 1} received response: status={response.status_code}")
|
|
690
|
-
if not response.ok:
|
|
691
|
-
error_text = response.text[:200] if response.text else ""
|
|
692
|
-
error_msg = f"Failed to send batch {batch_idx + 1}/{len(batches)}: {response.status_code} {response.reason} - {error_text}"
|
|
693
|
-
logger.error(f"_send_spans_sync() {error_msg}")
|
|
694
|
-
errors.append((batch_idx + 1, error_msg))
|
|
695
|
-
# Continue with other batches even if one fails
|
|
696
|
-
continue
|
|
697
|
-
logger.debug(f"_send_spans_sync() batch {batch_idx + 1} successfully sent {len(batch)} spans")
|
|
698
|
-
except Exception as e:
|
|
699
|
-
error_msg = f"Exception in batch {batch_idx + 1}: {type(e).__name__}: {e}"
|
|
700
|
-
logger.error(f"_send_spans_sync() {error_msg}")
|
|
701
|
-
errors.append((batch_idx + 1, error_msg))
|
|
702
|
-
# Continue with other batches
|
|
703
|
-
|
|
704
|
-
# If any batches failed, raise an exception with details
|
|
705
|
-
if errors:
|
|
706
|
-
error_summary = "; ".join([f"batch {idx}: {msg}" for idx, msg in errors])
|
|
707
|
-
raise Exception(f"Failed to send some spans: {error_summary}")
|
|
708
|
-
|
|
709
|
-
logger.debug(f"_send_spans_sync() successfully sent all {len(spans)} spans in {len(batches)} batch(es)")
|
|
710
|
-
|
|
711
|
-
def shutdown(self) -> None:
|
|
712
|
-
"""Shutdown the exporter, flushing any remaining spans. Call before process exit."""
|
|
713
|
-
logger.info(f"shutdown: called - initiating exporter shutdown")
|
|
714
|
-
self.shutdown_requested = True
|
|
715
|
-
|
|
716
|
-
# Check buffer state before shutdown
|
|
717
|
-
with self.buffer_lock:
|
|
718
|
-
buffer_size = len(self.buffer)
|
|
719
|
-
logger.info(f"shutdown: buffer contains {buffer_size} span(s) before shutdown")
|
|
720
|
-
|
|
721
|
-
# Wait for flush thread to finish (it will do final flush)
|
|
722
|
-
# Only wait if thread was actually started
|
|
723
|
-
if self._auto_flush_started and self.flush_timer and self.flush_timer.is_alive():
|
|
724
|
-
logger.info(f"shutdown: waiting for auto-flush thread to complete (timeout=10s)")
|
|
725
|
-
self.flush_timer.join(timeout=10.0)
|
|
726
|
-
if self.flush_timer.is_alive():
|
|
727
|
-
logger.warning(f"shutdown: auto-flush thread did not complete within timeout")
|
|
728
|
-
else:
|
|
729
|
-
logger.info(f"shutdown: auto-flush thread completed")
|
|
730
|
-
else:
|
|
731
|
-
logger.debug(f"shutdown: no active auto-flush thread to wait for")
|
|
732
|
-
|
|
733
|
-
# Final flush attempt (use synchronous send to avoid event loop issues)
|
|
734
|
-
with self.flush_lock:
|
|
735
|
-
logger.debug(f"shutdown: performing final flush with synchronous send")
|
|
736
|
-
# Atomically extract and remove spans to prevent race conditions
|
|
737
|
-
spans_to_flush = self._extract_and_remove_spans_from_buffer()
|
|
738
|
-
logger.debug(f"shutdown: extracted {len(spans_to_flush)} span(s) from buffer for final flush")
|
|
739
|
-
|
|
740
|
-
if spans_to_flush:
|
|
741
|
-
if not self._server_url:
|
|
742
|
-
logger.warning(f"shutdown: skipping final flush: AIQA_SERVER_URL is not set. "
|
|
743
|
-
f"{len(spans_to_flush)} span(s) will not be sent."
|
|
744
|
-
)
|
|
745
|
-
# Spans already removed from buffer, clear their keys to free memory
|
|
746
|
-
self._remove_span_keys_from_tracking(spans_to_flush)
|
|
747
|
-
else:
|
|
748
|
-
logger.info(f"shutdown: sending {len(spans_to_flush)} span(s) to server (synchronous)")
|
|
749
|
-
try:
|
|
750
|
-
self._send_spans_sync(spans_to_flush)
|
|
751
|
-
logger.info(f"shutdown: successfully sent {len(spans_to_flush)} span(s) to server")
|
|
752
|
-
# Spans already removed from buffer during extraction
|
|
753
|
-
# Clear their keys from tracking set to free memory
|
|
754
|
-
self._remove_span_keys_from_tracking(spans_to_flush)
|
|
755
|
-
except Exception as e:
|
|
756
|
-
logger.error(f"shutdown: failed to send spans: {e}")
|
|
757
|
-
# Spans already removed, but process is exiting anyway
|
|
758
|
-
logger.warning(f"shutdown: {len(spans_to_flush)} span(s) were not sent due to error")
|
|
759
|
-
# Keys will remain in tracking set, but process is exiting so memory will be freed
|
|
760
|
-
else:
|
|
761
|
-
logger.debug(f"shutdown: no spans to flush")
|
|
762
|
-
|
|
763
|
-
# Check buffer state after shutdown
|
|
764
|
-
with self.buffer_lock:
|
|
765
|
-
buffer_size = len(self.buffer)
|
|
766
|
-
if buffer_size > 0:
|
|
767
|
-
logger.warning(f"shutdown: buffer still contains {buffer_size} span(s) after shutdown")
|
|
768
|
-
else:
|
|
769
|
-
logger.info(f"shutdown: buffer is empty after shutdown")
|
|
770
|
-
|
|
771
|
-
logger.info(f"shutdown: completed")
|
|
772
|
-
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
aiqa/__init__.py,sha256=8MQBrnisjeYNrwrbTheUafEWS09GtIF7ff0fBZ1Jb24,1710
|
|
2
|
-
aiqa/aiqa_exporter.py,sha256=PAEwnrqTiII_OY1q6bskPob7rKGoYOYaE7ismU1pIv4,38630
|
|
3
|
-
aiqa/client.py,sha256=lcENe5LlyfH8v312ElcX_HtVuOoyIMzzJnmeKrbjXYw,10063
|
|
4
|
-
aiqa/constants.py,sha256=rUI3WuY1fKB_Isaok4C9vYer2XZYEgAVxAIe13pJi14,226
|
|
5
|
-
aiqa/experiment_runner.py,sha256=XAZsjVP70UH_QTk5ANSOQYAhmozuGXwKB5qWWHs-zeE,11186
|
|
6
|
-
aiqa/http_utils.py,sha256=m4fu3NI9CSAfdz4yz3S-nYLoAOmUhDGas4ZcpuMLog8,2241
|
|
7
|
-
aiqa/object_serialiser.py,sha256=DBv7EyXIwfwjwXHDsIwdZNFmQffRb5fKAE0r8qhoqgc,16958
|
|
8
|
-
aiqa/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
aiqa/tracing.py,sha256=1DLiQ-HHRgVV-mLIdkzeBeWD0bLsNCV4kh1yRlurvps,44080
|
|
10
|
-
aiqa/tracing_llm_utils.py,sha256=rNx6v6Wh_Mhv-_DPU9_aWS7YQcO46oiv0YPdBK1KVL8,9338
|
|
11
|
-
aiqa_client-0.4.7.dist-info/licenses/LICENSE.txt,sha256=kIzkzLuzG0HHaWYm4F4W5FeJ1Yxut3Ec6bhLWyw798A,1062
|
|
12
|
-
aiqa_client-0.4.7.dist-info/METADATA,sha256=a8uGQEoyu0rmY4N2UcS7FyTC6sIyGSb8Qn_-cxJq8Yc,7705
|
|
13
|
-
aiqa_client-0.4.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
14
|
-
aiqa_client-0.4.7.dist-info/top_level.txt,sha256=nwcsuVVSuWu27iLxZd4n1evVzv1W6FVTrSnCXCc-NQs,5
|
|
15
|
-
aiqa_client-0.4.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|