aiqa-client 0.4.3__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiqa/__init__.py +1 -1
- aiqa/client.py +108 -23
- aiqa/constants.py +3 -1
- aiqa/experiment_runner.py +12 -29
- aiqa/http_utils.py +143 -0
- aiqa/object_serialiser.py +136 -115
- aiqa/tracing.py +155 -267
- aiqa/tracing_llm_utils.py +191 -0
- {aiqa_client-0.4.3.dist-info → aiqa_client-0.5.2.dist-info}/METADATA +1 -1
- aiqa_client-0.5.2.dist-info/RECORD +14 -0
- aiqa/aiqa_exporter.py +0 -679
- aiqa/test_experiment_runner.py +0 -176
- aiqa/test_startup_reliability.py +0 -249
- aiqa/test_tracing.py +0 -230
- aiqa_client-0.4.3.dist-info/RECORD +0 -16
- {aiqa_client-0.4.3.dist-info → aiqa_client-0.5.2.dist-info}/WHEEL +0 -0
- {aiqa_client-0.4.3.dist-info → aiqa_client-0.5.2.dist-info}/licenses/LICENSE.txt +0 -0
- {aiqa_client-0.4.3.dist-info → aiqa_client-0.5.2.dist-info}/top_level.txt +0 -0
aiqa/object_serialiser.py
CHANGED
|
@@ -7,10 +7,36 @@ import json
|
|
|
7
7
|
import os
|
|
8
8
|
import dataclasses
|
|
9
9
|
import logging
|
|
10
|
+
from .constants import LOG_TAG
|
|
10
11
|
from datetime import datetime, date, time
|
|
11
12
|
from typing import Any, Callable, Set
|
|
13
|
+
from json.encoder import JSONEncoder
|
|
12
14
|
|
|
13
|
-
logger = logging.getLogger(
|
|
15
|
+
logger = logging.getLogger(LOG_TAG)
|
|
16
|
+
|
|
17
|
+
def sanitize_string_for_utf8(text: str) -> str:
|
|
18
|
+
"""
|
|
19
|
+
Sanitize a string to remove surrogate characters that can't be encoded to UTF-8.
|
|
20
|
+
Surrogate characters (U+D800 to U+DFFF) are invalid in UTF-8 and can cause encoding errors.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
text: The string to sanitize
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
A string with surrogate characters replaced by the Unicode replacement character (U+FFFD)
|
|
27
|
+
"""
|
|
28
|
+
if text == None:
|
|
29
|
+
return None
|
|
30
|
+
if not isinstance(text, str): # paranoia
|
|
31
|
+
text = str(text)
|
|
32
|
+
try:
|
|
33
|
+
# Try encoding to UTF-8 to check if there are any issues
|
|
34
|
+
text.encode('utf-8')
|
|
35
|
+
return text
|
|
36
|
+
except UnicodeEncodeError:
|
|
37
|
+
# If encoding fails, replace surrogates with replacement character
|
|
38
|
+
# This handles surrogates that can't be encoded
|
|
39
|
+
return text.encode('utf-8', errors='replace').decode('utf-8', errors='replace')
|
|
14
40
|
|
|
15
41
|
def toNumber(value: str|int|None) -> int:
|
|
16
42
|
"""Convert string to number. handling units like g, m, k, (also mb kb gb though these should be avoided)"""
|
|
@@ -105,7 +131,7 @@ def serialize_for_span(value: Any) -> Any:
|
|
|
105
131
|
"""
|
|
106
132
|
Serialize a value for span attributes.
|
|
107
133
|
OpenTelemetry only accepts primitives (bool, str, bytes, int, float) or sequences of those.
|
|
108
|
-
Complex types (dicts,
|
|
134
|
+
Complex types (dicts, objects) are converted to JSON strings.
|
|
109
135
|
|
|
110
136
|
Handles objects by attempting to convert them to dicts, with safeguards against:
|
|
111
137
|
- Circular references
|
|
@@ -118,14 +144,17 @@ def serialize_for_span(value: Any) -> Any:
|
|
|
118
144
|
|
|
119
145
|
# For sequences, check if all elements are primitives
|
|
120
146
|
if isinstance(value, (list, tuple)):
|
|
121
|
-
#
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
147
|
+
# Use short-circuiting loop instead of all() for better performance on large lists
|
|
148
|
+
# Only iterate until we find a non-primitive
|
|
149
|
+
for item in value:
|
|
150
|
+
if not isinstance(item, (str, int, float, bool, bytes, type(None))):
|
|
151
|
+
# Found non-primitive, serialize to JSON string
|
|
152
|
+
try:
|
|
153
|
+
return safe_json_dumps(value)
|
|
154
|
+
except Exception:
|
|
155
|
+
return str(value)
|
|
156
|
+
# All elements are primitives, return as list
|
|
157
|
+
return list(value)
|
|
129
158
|
|
|
130
159
|
# For dicts and other complex types, serialize to JSON string
|
|
131
160
|
try:
|
|
@@ -140,10 +169,13 @@ def safe_str_repr(value: Any) -> str:
|
|
|
140
169
|
Safely convert a value to string representation.
|
|
141
170
|
Handles objects with __repr__ that might raise exceptions.
|
|
142
171
|
Uses AIQA_MAX_OBJECT_STR_CHARS environment variable (default: 100000) to limit length.
|
|
172
|
+
Also sanitizes surrogate characters to prevent UTF-8 encoding errors.
|
|
143
173
|
"""
|
|
144
174
|
try:
|
|
145
175
|
# Try __repr__ first (usually more informative)
|
|
146
176
|
repr_str = repr(value)
|
|
177
|
+
# Sanitize surrogate characters that can't be encoded to UTF-8
|
|
178
|
+
repr_str = sanitize_string_for_utf8(repr_str)
|
|
147
179
|
# Limit length to avoid huge strings
|
|
148
180
|
if len(repr_str) > AIQA_MAX_OBJECT_STR_CHARS:
|
|
149
181
|
return repr_str[:AIQA_MAX_OBJECT_STR_CHARS] + "... (truncated)"
|
|
@@ -158,7 +190,7 @@ def safe_str_repr(value: Any) -> str:
|
|
|
158
190
|
|
|
159
191
|
def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_depth: int = 0) -> Any:
|
|
160
192
|
"""
|
|
161
|
-
Convert an object to a dictionary representation.
|
|
193
|
+
Convert an object to a dictionary representation. Applies data filters to the object.
|
|
162
194
|
|
|
163
195
|
Args:
|
|
164
196
|
obj: The object to convert
|
|
@@ -172,7 +204,7 @@ def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_dep
|
|
|
172
204
|
if current_depth > max_depth:
|
|
173
205
|
return "<max depth exceeded>"
|
|
174
206
|
|
|
175
|
-
obj_id = id(obj)
|
|
207
|
+
obj_id = id(obj) # note: id cannot raise exception
|
|
176
208
|
if obj_id in visited:
|
|
177
209
|
return "<circular reference>"
|
|
178
210
|
|
|
@@ -185,53 +217,42 @@ def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_dep
|
|
|
185
217
|
return obj
|
|
186
218
|
|
|
187
219
|
# Handle datetime objects
|
|
188
|
-
if isinstance(obj, datetime):
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
return obj.isoformat()
|
|
220
|
+
if isinstance(obj, datetime) or isinstance(obj, date) or isinstance(obj, time):
|
|
221
|
+
try:
|
|
222
|
+
return obj.isoformat()
|
|
223
|
+
except Exception: # paranoia if isoformat() fails (e.g., invalid datetime state, custom implementation bug)
|
|
224
|
+
return safe_str_repr(obj)
|
|
194
225
|
|
|
195
226
|
# Handle dict
|
|
196
227
|
if isinstance(obj, dict):
|
|
197
228
|
visited.add(obj_id)
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
return result
|
|
212
|
-
except Exception as e:
|
|
213
|
-
visited.discard(obj_id)
|
|
214
|
-
logger.debug(f"Failed to convert dict to dict: {e}")
|
|
215
|
-
return safe_str_repr(obj)
|
|
229
|
+
result = {}
|
|
230
|
+
for k, v in obj.items():
|
|
231
|
+
try:
|
|
232
|
+
key_str = str(k) if not isinstance(k, (str, int, float, bool)) else k
|
|
233
|
+
filtered_value = _apply_data_filters(key_str, v)
|
|
234
|
+
result[key_str] = object_to_dict(filtered_value, visited, max_depth, current_depth + 1)
|
|
235
|
+
except Exception as e:
|
|
236
|
+
# If one key-value pair fails, log and use string representation for the value
|
|
237
|
+
key_str = str(k) if not isinstance(k, (str, int, float, bool)) else k
|
|
238
|
+
logger.debug(f"Failed to convert dict value for key '{key_str}': {e}")
|
|
239
|
+
result[key_str] = safe_str_repr(v)
|
|
240
|
+
visited.remove(obj_id)
|
|
241
|
+
return result
|
|
216
242
|
|
|
217
243
|
# Handle list/tuple
|
|
218
244
|
if isinstance(obj, (list, tuple)):
|
|
219
245
|
visited.add(obj_id)
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
return result
|
|
231
|
-
except Exception as e:
|
|
232
|
-
visited.discard(obj_id)
|
|
233
|
-
logger.debug(f"Failed to convert list/tuple to dict: {e}")
|
|
234
|
-
return safe_str_repr(obj)
|
|
246
|
+
result = []
|
|
247
|
+
for item in obj:
|
|
248
|
+
try:
|
|
249
|
+
result.append(object_to_dict(item, visited, max_depth, current_depth + 1))
|
|
250
|
+
except Exception as e:
|
|
251
|
+
# If one item fails, log and use its string representation
|
|
252
|
+
logger.debug(f"Failed to convert list item {type(item).__name__} to dict: {e}")
|
|
253
|
+
result.append(safe_str_repr(item))
|
|
254
|
+
visited.remove(obj_id)
|
|
255
|
+
return result
|
|
235
256
|
|
|
236
257
|
# Handle dataclasses
|
|
237
258
|
if dataclasses.is_dataclass(obj):
|
|
@@ -258,18 +279,11 @@ def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_dep
|
|
|
258
279
|
if hasattr(obj, "__dict__"):
|
|
259
280
|
visited.add(obj_id)
|
|
260
281
|
try:
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
if not (isinstance(key, str) and key.startswith("__")):
|
|
265
|
-
filtered_value = _apply_data_filters(key, value)
|
|
266
|
-
result[key] = object_to_dict(filtered_value, visited, max_depth, current_depth + 1)
|
|
267
|
-
visited.remove(obj_id)
|
|
268
|
-
return result
|
|
269
|
-
except Exception as e:
|
|
282
|
+
obj_dict = obj.__dict__
|
|
283
|
+
return object_to_dict(obj_dict, visited, max_depth, current_depth) # Note: Don't count using __dict__ as a recursion depth +1 step
|
|
284
|
+
except Exception as e: # paranoia: object_to_dict should never raise an exception
|
|
270
285
|
visited.discard(obj_id)
|
|
271
|
-
|
|
272
|
-
logger.debug(f"Failed to convert object {type(obj).__name__} to dict: {e}")
|
|
286
|
+
logger.debug(f"Failed to convert object {type(obj).__name__} with __dict__ to dict: {e}")
|
|
273
287
|
return safe_str_repr(obj)
|
|
274
288
|
|
|
275
289
|
# Handle objects with __slots__
|
|
@@ -311,6 +325,36 @@ def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_dep
|
|
|
311
325
|
return safe_str_repr(obj)
|
|
312
326
|
|
|
313
327
|
|
|
328
|
+
class SizeLimitedJSONEncoder(JSONEncoder):
|
|
329
|
+
"""
|
|
330
|
+
Custom JSON encoder that stops serialization early when max_size_chars is reached.
|
|
331
|
+
Tracks output length incrementally and stops yielding chunks when limit is exceeded.
|
|
332
|
+
"""
|
|
333
|
+
def __init__(self, max_size_chars: int, *args, **kwargs):
|
|
334
|
+
super().__init__(*args, **kwargs)
|
|
335
|
+
self.max_size_chars = max_size_chars
|
|
336
|
+
self.current_length = 0
|
|
337
|
+
self._truncated = False
|
|
338
|
+
|
|
339
|
+
def iterencode(self, o, _one_shot=False):
|
|
340
|
+
"""
|
|
341
|
+
Encode the object incrementally, checking size after each chunk.
|
|
342
|
+
Stops early if max_size_chars is exceeded.
|
|
343
|
+
"""
|
|
344
|
+
self.current_length = 0
|
|
345
|
+
self._truncated = False
|
|
346
|
+
|
|
347
|
+
# Use _one_shot optimization when possible (faster for simple objects)
|
|
348
|
+
# The parent class will determine if _one_shot is safe
|
|
349
|
+
for chunk in super().iterencode(o, _one_shot):
|
|
350
|
+
self.current_length += len(chunk)
|
|
351
|
+
if self.current_length > self.max_size_chars:
|
|
352
|
+
self._truncated = True
|
|
353
|
+
# Stop yielding chunks when limit is exceeded
|
|
354
|
+
break
|
|
355
|
+
yield chunk
|
|
356
|
+
|
|
357
|
+
|
|
314
358
|
def safe_json_dumps(value: Any) -> str:
|
|
315
359
|
"""
|
|
316
360
|
Safely serialize a value to JSON string with safeguards against:
|
|
@@ -329,68 +373,45 @@ def safe_json_dumps(value: Any) -> str:
|
|
|
329
373
|
max_size_chars = AIQA_MAX_OBJECT_STR_CHARS
|
|
330
374
|
visited: Set[int] = set()
|
|
331
375
|
|
|
332
|
-
# Convert the entire structure to ensure circular references are detected
|
|
376
|
+
# Convert the entire structure to json-friendy form, and ensure circular references are detected
|
|
333
377
|
# across the whole object graph
|
|
334
378
|
try:
|
|
335
379
|
converted = object_to_dict(value, visited)
|
|
336
380
|
except Exception as e:
|
|
337
|
-
#
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
return json_str
|
|
344
|
-
except Exception as e2:
|
|
345
|
-
logger.debug(f"json.dumps with default handler also failed for {type(value).__name__}: {e2}")
|
|
346
|
-
return safe_str_repr(value)
|
|
381
|
+
# Note: object_to_dict is very defensive but can still raise in rare edge cases:
|
|
382
|
+
# - Objects with corrupted type metadata causing isinstance()/hasattr() to fail
|
|
383
|
+
# - Malformed dataclasses causing dataclasses.fields() to raise
|
|
384
|
+
# - Objects where accessing __dict__ or __slots__ triggers descriptors that raise
|
|
385
|
+
logger.debug(f"object_to_dict failed for {type(value).__name__}, using safe_str_repr. Error: {e}")
|
|
386
|
+
return safe_str_repr(value)
|
|
347
387
|
|
|
348
|
-
# Try JSON serialization of the converted structure
|
|
388
|
+
# Try JSON serialization of the converted structure with size-limited encoder
|
|
389
|
+
# After object_to_dict(), converted is a plain dict/list with circular refs already
|
|
390
|
+
# converted to "<circular reference>" strings. We use check_circular=True (default)
|
|
391
|
+
# as an additional safety net, though it's redundant since object_to_dict() already
|
|
392
|
+
# handled circular refs. We don't need a default handler here since converted
|
|
393
|
+
# should be JSON-serializable.
|
|
349
394
|
try:
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
395
|
+
encoder = SizeLimitedJSONEncoder(
|
|
396
|
+
max_size_chars=max_size_chars,
|
|
397
|
+
check_circular=True, # Safety net for dict/list circular refs (redundant but harmless)
|
|
398
|
+
ensure_ascii=False
|
|
399
|
+
)
|
|
400
|
+
# Use iterencode to get chunks and check size incrementally
|
|
401
|
+
chunks = []
|
|
402
|
+
for chunk in encoder.iterencode(converted, _one_shot=True):
|
|
403
|
+
chunks.append(chunk)
|
|
404
|
+
if encoder._truncated:
|
|
405
|
+
# Hit the limit, stop early
|
|
406
|
+
json_str = ''.join(chunks)
|
|
407
|
+
return f"<object {type(value)} too large: {len(json_str)} chars (limit: {max_size_chars} chars) begins: {json_str[:100]}...>"
|
|
408
|
+
json_str = ''.join(chunks)
|
|
409
|
+
# Check if truncation occurred (encoder may have stopped after last chunk)
|
|
410
|
+
if encoder._truncated or len(json_str) > max_size_chars:
|
|
353
411
|
return f"<object {type(value)} too large: {len(json_str)} chars (limit: {max_size_chars} chars) begins: {json_str[:100]}...>"
|
|
354
412
|
return json_str
|
|
355
413
|
except Exception as e:
|
|
356
|
-
logger.debug(f"json.dumps total fail for {type(value).__name__}: {
|
|
414
|
+
logger.debug(f"json.dumps total fail for {type(value).__name__}: {e}")
|
|
357
415
|
# Final fallback
|
|
358
416
|
return safe_str_repr(value)
|
|
359
417
|
|
|
360
|
-
|
|
361
|
-
def json_default_handler_factory(visited: Set[int]) -> Callable[[Any], Any]:
|
|
362
|
-
"""
|
|
363
|
-
Create a JSON default handler with a shared visited set for circular reference detection.
|
|
364
|
-
"""
|
|
365
|
-
def handler(obj: Any) -> Any:
|
|
366
|
-
# Handle datetime objects
|
|
367
|
-
if isinstance(obj, datetime):
|
|
368
|
-
return obj.isoformat()
|
|
369
|
-
if isinstance(obj, date):
|
|
370
|
-
return obj.isoformat()
|
|
371
|
-
if isinstance(obj, time):
|
|
372
|
-
return obj.isoformat()
|
|
373
|
-
|
|
374
|
-
# Handle bytes
|
|
375
|
-
if isinstance(obj, bytes):
|
|
376
|
-
try:
|
|
377
|
-
return obj.decode('utf-8')
|
|
378
|
-
except UnicodeDecodeError:
|
|
379
|
-
return f"<bytes: {len(obj)} bytes>"
|
|
380
|
-
|
|
381
|
-
# Try object conversion with the shared visited set
|
|
382
|
-
try:
|
|
383
|
-
return object_to_dict(obj, visited)
|
|
384
|
-
except Exception:
|
|
385
|
-
return safe_str_repr(obj)
|
|
386
|
-
|
|
387
|
-
return handler
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
def json_default_handler(obj: Any) -> Any:
|
|
391
|
-
"""
|
|
392
|
-
Default handler for JSON serialization of non-serializable objects.
|
|
393
|
-
This is a fallback that creates its own visited set.
|
|
394
|
-
"""
|
|
395
|
-
return json_default_handler_factory(set())(obj)
|
|
396
|
-
|