aiqa-client 0.4.3__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aiqa/object_serialiser.py CHANGED
@@ -7,10 +7,36 @@ import json
7
7
  import os
8
8
  import dataclasses
9
9
  import logging
10
+ from .constants import LOG_TAG
10
11
  from datetime import datetime, date, time
11
12
  from typing import Any, Callable, Set
13
+ from json.encoder import JSONEncoder
12
14
 
13
- logger = logging.getLogger("aiqa")
15
+ logger = logging.getLogger(LOG_TAG)
16
+
17
+ def sanitize_string_for_utf8(text: str) -> str:
18
+ """
19
+ Sanitize a string to remove surrogate characters that can't be encoded to UTF-8.
20
+ Surrogate characters (U+D800 to U+DFFF) are invalid in UTF-8 and can cause encoding errors.
21
+
22
+ Args:
23
+ text: The string to sanitize
24
+
25
+ Returns:
26
+ A string with surrogate characters replaced by the Unicode replacement character (U+FFFD)
27
+ """
28
+ if text == None:
29
+ return None
30
+ if not isinstance(text, str): # paranoia
31
+ text = str(text)
32
+ try:
33
+ # Try encoding to UTF-8 to check if there are any issues
34
+ text.encode('utf-8')
35
+ return text
36
+ except UnicodeEncodeError:
37
+ # If encoding fails, replace surrogates with replacement character
38
+ # This handles surrogates that can't be encoded
39
+ return text.encode('utf-8', errors='replace').decode('utf-8', errors='replace')
14
40
 
15
41
  def toNumber(value: str|int|None) -> int:
16
42
  """Convert string to number. handling units like g, m, k, (also mb kb gb though these should be avoided)"""
@@ -105,7 +131,7 @@ def serialize_for_span(value: Any) -> Any:
105
131
  """
106
132
  Serialize a value for span attributes.
107
133
  OpenTelemetry only accepts primitives (bool, str, bytes, int, float) or sequences of those.
108
- Complex types (dicts, lists, objects) are converted to JSON strings.
134
+ Complex types (dicts, objects) are converted to JSON strings.
109
135
 
110
136
  Handles objects by attempting to convert them to dicts, with safeguards against:
111
137
  - Circular references
@@ -118,14 +144,17 @@ def serialize_for_span(value: Any) -> Any:
118
144
 
119
145
  # For sequences, check if all elements are primitives
120
146
  if isinstance(value, (list, tuple)):
121
- # If all elements are primitives, return as list
122
- if all(isinstance(item, (str, int, float, bool, bytes, type(None))) for item in value):
123
- return list(value)
124
- # Otherwise serialize to JSON string
125
- try:
126
- return safe_json_dumps(value)
127
- except Exception:
128
- return str(value)
147
+ # Use short-circuiting loop instead of all() for better performance on large lists
148
+ # Only iterate until we find a non-primitive
149
+ for item in value:
150
+ if not isinstance(item, (str, int, float, bool, bytes, type(None))):
151
+ # Found non-primitive, serialize to JSON string
152
+ try:
153
+ return safe_json_dumps(value)
154
+ except Exception:
155
+ return str(value)
156
+ # All elements are primitives, return as list
157
+ return list(value)
129
158
 
130
159
  # For dicts and other complex types, serialize to JSON string
131
160
  try:
@@ -140,10 +169,13 @@ def safe_str_repr(value: Any) -> str:
140
169
  Safely convert a value to string representation.
141
170
  Handles objects with __repr__ that might raise exceptions.
142
171
  Uses AIQA_MAX_OBJECT_STR_CHARS environment variable (default: 100000) to limit length.
172
+ Also sanitizes surrogate characters to prevent UTF-8 encoding errors.
143
173
  """
144
174
  try:
145
175
  # Try __repr__ first (usually more informative)
146
176
  repr_str = repr(value)
177
+ # Sanitize surrogate characters that can't be encoded to UTF-8
178
+ repr_str = sanitize_string_for_utf8(repr_str)
147
179
  # Limit length to avoid huge strings
148
180
  if len(repr_str) > AIQA_MAX_OBJECT_STR_CHARS:
149
181
  return repr_str[:AIQA_MAX_OBJECT_STR_CHARS] + "... (truncated)"
@@ -158,7 +190,7 @@ def safe_str_repr(value: Any) -> str:
158
190
 
159
191
  def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_depth: int = 0) -> Any:
160
192
  """
161
- Convert an object to a dictionary representation.
193
+ Convert an object to a dictionary representation. Applies data filters to the object.
162
194
 
163
195
  Args:
164
196
  obj: The object to convert
@@ -172,7 +204,7 @@ def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_dep
172
204
  if current_depth > max_depth:
173
205
  return "<max depth exceeded>"
174
206
 
175
- obj_id = id(obj)
207
+ obj_id = id(obj) # note: id cannot raise exception
176
208
  if obj_id in visited:
177
209
  return "<circular reference>"
178
210
 
@@ -185,53 +217,42 @@ def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_dep
185
217
  return obj
186
218
 
187
219
  # Handle datetime objects
188
- if isinstance(obj, datetime):
189
- return obj.isoformat()
190
- if isinstance(obj, date):
191
- return obj.isoformat()
192
- if isinstance(obj, time):
193
- return obj.isoformat()
220
+ if isinstance(obj, datetime) or isinstance(obj, date) or isinstance(obj, time):
221
+ try:
222
+ return obj.isoformat()
223
+ except Exception: # paranoia if isoformat() fails (e.g., invalid datetime state, custom implementation bug)
224
+ return safe_str_repr(obj)
194
225
 
195
226
  # Handle dict
196
227
  if isinstance(obj, dict):
197
228
  visited.add(obj_id)
198
- try:
199
- result = {}
200
- for k, v in obj.items():
201
- try:
202
- key_str = str(k) if not isinstance(k, (str, int, float, bool)) else k
203
- filtered_value = _apply_data_filters(key_str, v)
204
- result[key_str] = object_to_dict(filtered_value, visited, max_depth, current_depth + 1)
205
- except Exception as e:
206
- # If one key-value pair fails, log and use string representation for the value
207
- key_str = str(k) if not isinstance(k, (str, int, float, bool)) else k
208
- logger.debug(f"Failed to convert dict value for key '{key_str}': {e}")
209
- result[key_str] = safe_str_repr(v)
210
- visited.remove(obj_id)
211
- return result
212
- except Exception as e:
213
- visited.discard(obj_id)
214
- logger.debug(f"Failed to convert dict to dict: {e}")
215
- return safe_str_repr(obj)
229
+ result = {}
230
+ for k, v in obj.items():
231
+ try:
232
+ key_str = str(k) if not isinstance(k, (str, int, float, bool)) else k
233
+ filtered_value = _apply_data_filters(key_str, v)
234
+ result[key_str] = object_to_dict(filtered_value, visited, max_depth, current_depth + 1)
235
+ except Exception as e:
236
+ # If one key-value pair fails, log and use string representation for the value
237
+ key_str = str(k) if not isinstance(k, (str, int, float, bool)) else k
238
+ logger.debug(f"Failed to convert dict value for key '{key_str}': {e}")
239
+ result[key_str] = safe_str_repr(v)
240
+ visited.remove(obj_id)
241
+ return result
216
242
 
217
243
  # Handle list/tuple
218
244
  if isinstance(obj, (list, tuple)):
219
245
  visited.add(obj_id)
220
- try:
221
- result = []
222
- for item in obj:
223
- try:
224
- result.append(object_to_dict(item, visited, max_depth, current_depth + 1))
225
- except Exception as e:
226
- # If one item fails, log and use its string representation
227
- logger.debug(f"Failed to convert list item {type(item).__name__} to dict: {e}")
228
- result.append(safe_str_repr(item))
229
- visited.remove(obj_id)
230
- return result
231
- except Exception as e:
232
- visited.discard(obj_id)
233
- logger.debug(f"Failed to convert list/tuple to dict: {e}")
234
- return safe_str_repr(obj)
246
+ result = []
247
+ for item in obj:
248
+ try:
249
+ result.append(object_to_dict(item, visited, max_depth, current_depth + 1))
250
+ except Exception as e:
251
+ # If one item fails, log and use its string representation
252
+ logger.debug(f"Failed to convert list item {type(item).__name__} to dict: {e}")
253
+ result.append(safe_str_repr(item))
254
+ visited.remove(obj_id)
255
+ return result
235
256
 
236
257
  # Handle dataclasses
237
258
  if dataclasses.is_dataclass(obj):
@@ -258,18 +279,11 @@ def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_dep
258
279
  if hasattr(obj, "__dict__"):
259
280
  visited.add(obj_id)
260
281
  try:
261
- result = {}
262
- for key, value in obj.__dict__.items():
263
- # Skip private attributes that start with __
264
- if not (isinstance(key, str) and key.startswith("__")):
265
- filtered_value = _apply_data_filters(key, value)
266
- result[key] = object_to_dict(filtered_value, visited, max_depth, current_depth + 1)
267
- visited.remove(obj_id)
268
- return result
269
- except Exception as e:
282
+ obj_dict = obj.__dict__
283
+ return object_to_dict(obj_dict, visited, max_depth, current_depth) # Note: Don't count using __dict__ as a recursion depth +1 step
284
+ except Exception as e: # paranoia: object_to_dict should never raise an exception
270
285
  visited.discard(obj_id)
271
- # Log the error for debugging, but still return string representation
272
- logger.debug(f"Failed to convert object {type(obj).__name__} to dict: {e}")
286
+ logger.debug(f"Failed to convert object {type(obj).__name__} with __dict__ to dict: {e}")
273
287
  return safe_str_repr(obj)
274
288
 
275
289
  # Handle objects with __slots__
@@ -311,6 +325,36 @@ def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_dep
311
325
  return safe_str_repr(obj)
312
326
 
313
327
 
328
+ class SizeLimitedJSONEncoder(JSONEncoder):
329
+ """
330
+ Custom JSON encoder that stops serialization early when max_size_chars is reached.
331
+ Tracks output length incrementally and stops yielding chunks when limit is exceeded.
332
+ """
333
+ def __init__(self, max_size_chars: int, *args, **kwargs):
334
+ super().__init__(*args, **kwargs)
335
+ self.max_size_chars = max_size_chars
336
+ self.current_length = 0
337
+ self._truncated = False
338
+
339
+ def iterencode(self, o, _one_shot=False):
340
+ """
341
+ Encode the object incrementally, checking size after each chunk.
342
+ Stops early if max_size_chars is exceeded.
343
+ """
344
+ self.current_length = 0
345
+ self._truncated = False
346
+
347
+ # Use _one_shot optimization when possible (faster for simple objects)
348
+ # The parent class will determine if _one_shot is safe
349
+ for chunk in super().iterencode(o, _one_shot):
350
+ self.current_length += len(chunk)
351
+ if self.current_length > self.max_size_chars:
352
+ self._truncated = True
353
+ # Stop yielding chunks when limit is exceeded
354
+ break
355
+ yield chunk
356
+
357
+
314
358
  def safe_json_dumps(value: Any) -> str:
315
359
  """
316
360
  Safely serialize a value to JSON string with safeguards against:
@@ -329,68 +373,45 @@ def safe_json_dumps(value: Any) -> str:
329
373
  max_size_chars = AIQA_MAX_OBJECT_STR_CHARS
330
374
  visited: Set[int] = set()
331
375
 
332
- # Convert the entire structure to ensure circular references are detected
376
+ # Convert the entire structure to json-friendy form, and ensure circular references are detected
333
377
  # across the whole object graph
334
378
  try:
335
379
  converted = object_to_dict(value, visited)
336
380
  except Exception as e:
337
- # If conversion fails, try with a fresh visited set and json default handler
338
- logger.debug(f"object_to_dict failed for {type(value).__name__}, trying json.dumps with default handler: {e}")
339
- try:
340
- json_str = json.dumps(value, default=json_default_handler_factory(set()))
341
- if len(json_str) > max_size_chars:
342
- return f"<object {type(value)} too large: {len(json_str)} chars (limit: {max_size_chars} chars) begins: {json_str[:100]}... conversion error: {e}>"
343
- return json_str
344
- except Exception as e2:
345
- logger.debug(f"json.dumps with default handler also failed for {type(value).__name__}: {e2}")
346
- return safe_str_repr(value)
381
+ # Note: object_to_dict is very defensive but can still raise in rare edge cases:
382
+ # - Objects with corrupted type metadata causing isinstance()/hasattr() to fail
383
+ # - Malformed dataclasses causing dataclasses.fields() to raise
384
+ # - Objects where accessing __dict__ or __slots__ triggers descriptors that raise
385
+ logger.debug(f"object_to_dict failed for {type(value).__name__}, using safe_str_repr. Error: {e}")
386
+ return safe_str_repr(value)
347
387
 
348
- # Try JSON serialization of the converted structure
388
+ # Try JSON serialization of the converted structure with size-limited encoder
389
+ # After object_to_dict(), converted is a plain dict/list with circular refs already
390
+ # converted to "<circular reference>" strings. We use check_circular=True (default)
391
+ # as an additional safety net, though it's redundant since object_to_dict() already
392
+ # handled circular refs. We don't need a default handler here since converted
393
+ # should be JSON-serializable.
349
394
  try:
350
- json_str = json.dumps(converted, default=json_default_handler_factory(set()))
351
- # Check size
352
- if len(json_str) > max_size_chars:
395
+ encoder = SizeLimitedJSONEncoder(
396
+ max_size_chars=max_size_chars,
397
+ check_circular=True, # Safety net for dict/list circular refs (redundant but harmless)
398
+ ensure_ascii=False
399
+ )
400
+ # Use iterencode to get chunks and check size incrementally
401
+ chunks = []
402
+ for chunk in encoder.iterencode(converted, _one_shot=True):
403
+ chunks.append(chunk)
404
+ if encoder._truncated:
405
+ # Hit the limit, stop early
406
+ json_str = ''.join(chunks)
407
+ return f"<object {type(value)} too large: {len(json_str)} chars (limit: {max_size_chars} chars) begins: {json_str[:100]}...>"
408
+ json_str = ''.join(chunks)
409
+ # Check if truncation occurred (encoder may have stopped after last chunk)
410
+ if encoder._truncated or len(json_str) > max_size_chars:
353
411
  return f"<object {type(value)} too large: {len(json_str)} chars (limit: {max_size_chars} chars) begins: {json_str[:100]}...>"
354
412
  return json_str
355
413
  except Exception as e:
356
- logger.debug(f"json.dumps total fail for {type(value).__name__}: {e2}")
414
+ logger.debug(f"json.dumps total fail for {type(value).__name__}: {e}")
357
415
  # Final fallback
358
416
  return safe_str_repr(value)
359
417
 
360
-
361
- def json_default_handler_factory(visited: Set[int]) -> Callable[[Any], Any]:
362
- """
363
- Create a JSON default handler with a shared visited set for circular reference detection.
364
- """
365
- def handler(obj: Any) -> Any:
366
- # Handle datetime objects
367
- if isinstance(obj, datetime):
368
- return obj.isoformat()
369
- if isinstance(obj, date):
370
- return obj.isoformat()
371
- if isinstance(obj, time):
372
- return obj.isoformat()
373
-
374
- # Handle bytes
375
- if isinstance(obj, bytes):
376
- try:
377
- return obj.decode('utf-8')
378
- except UnicodeDecodeError:
379
- return f"<bytes: {len(obj)} bytes>"
380
-
381
- # Try object conversion with the shared visited set
382
- try:
383
- return object_to_dict(obj, visited)
384
- except Exception:
385
- return safe_str_repr(obj)
386
-
387
- return handler
388
-
389
-
390
- def json_default_handler(obj: Any) -> Any:
391
- """
392
- Default handler for JSON serialization of non-serializable objects.
393
- This is a fallback that creates its own visited set.
394
- """
395
- return json_default_handler_factory(set())(obj)
396
-