aiqa-client 0.4.1__py3-none-any.whl → 0.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aiqa/client.py CHANGED
@@ -7,7 +7,9 @@ from opentelemetry import trace
7
7
  from opentelemetry.sdk.trace import TracerProvider
8
8
  from opentelemetry.sdk.trace.export import BatchSpanProcessor
9
9
 
10
- logger = logging.getLogger("AIQA")
10
+ from .constants import AIQA_TRACER_NAME, LOG_TAG
11
+
12
+ logger = logging.getLogger(LOG_TAG)
11
13
 
12
14
  # Compatibility import for TraceIdRatioBased sampler
13
15
  # In older OpenTelemetry versions it was TraceIdRatioBasedSampler
@@ -20,7 +22,7 @@ except ImportError:
20
22
  from opentelemetry.sdk.trace.sampling import TraceIdRatioBasedSampler as TraceIdRatioBased
21
23
  except ImportError:
22
24
  logger.warning(
23
- "Could not import TraceIdRatioBased or TraceIdRatioBasedSampler from "
25
+ f"Could not import TraceIdRatioBased or TraceIdRatioBasedSampler from "
24
26
  "opentelemetry.sdk.trace.sampling. AIQA tracing may not work correctly. "
25
27
  "Please ensure opentelemetry-sdk>=1.24.0 is installed. "
26
28
  "Try: pip install --upgrade opentelemetry-sdk"
@@ -28,7 +30,7 @@ except ImportError:
28
30
  # Set to None so we can check later
29
31
  TraceIdRatioBased = None
30
32
 
31
- from .constants import AIQA_TRACER_NAME
33
+ from .http_utils import get_server_url, get_api_key
32
34
 
33
35
  class AIQAClient:
34
36
  """
@@ -93,7 +95,7 @@ class AIQAClient:
93
95
  This will also set enabled=False to prevent further tracing attempts.
94
96
  """
95
97
  try:
96
- logger.info("AIQA tracing shutting down")
98
+ logger.info(f"AIQA tracing shutting down")
97
99
  # Disable tracing to prevent attempts to use shut-down system
98
100
  self.enabled = False
99
101
  if self._provider:
@@ -118,7 +120,7 @@ def get_component_tag() -> str:
118
120
  return _component_tag
119
121
 
120
122
 
121
- def set_component_tag(tag: str | None) -> None:
123
+ def set_component_tag(tag: Optional[str]) -> None:
122
124
  """Set the component tag programmatically (overrides environment variable)."""
123
125
  global _component_tag
124
126
  _component_tag = tag or ""
@@ -150,7 +152,7 @@ def get_aiqa_client() -> AIQAClient:
150
152
  # Optional: Initialize explicitly (usually not needed)
151
153
  client = get_aiqa_client()
152
154
  if client.enabled:
153
- print("Tracing is enabled")
155
+ print(f"Tracing is enabled")
154
156
 
155
157
  @WithTracing
156
158
  def my_function():
@@ -161,7 +163,7 @@ def get_aiqa_client() -> AIQAClient:
161
163
  _init_tracing()
162
164
  except Exception as e:
163
165
  logger.error(f"Failed to initialize AIQA tracing: {e}")
164
- logger.warning("AIQA tracing is disabled. Your application will continue to run without tracing.")
166
+ logger.warning(f"AIQA tracing is disabled. Your application will continue to run without tracing.")
165
167
  return client
166
168
 
167
169
  def _init_tracing() -> None:
@@ -171,8 +173,8 @@ def _init_tracing() -> None:
171
173
  return
172
174
 
173
175
  try:
174
- server_url = os.getenv("AIQA_SERVER_URL")
175
- api_key = os.getenv("AIQA_API_KEY")
176
+ server_url = get_server_url()
177
+ api_key = get_api_key()
176
178
 
177
179
  if not server_url or not api_key:
178
180
  client.enabled = False
@@ -231,17 +233,18 @@ def _attach_aiqa_processor(provider: TracerProvider) -> None:
231
233
  # Check if already attached
232
234
  for p in provider._active_span_processor._span_processors:
233
235
  if isinstance(getattr(p, "exporter", None), AIQASpanExporter):
234
- logger.debug("AIQA span processor already attached, skipping")
236
+ logger.debug(f"AIQA span processor already attached, skipping")
235
237
  return
236
238
 
237
239
  exporter = AIQASpanExporter(
238
240
  server_url=os.getenv("AIQA_SERVER_URL"),
239
241
  api_key=os.getenv("AIQA_API_KEY"),
242
+ # max_buffer_spans will be read from AIQA_MAX_BUFFER_SPANS env var by the exporter
240
243
  )
241
244
  provider.add_span_processor(BatchSpanProcessor(exporter))
242
245
  global client
243
246
  client.exporter = exporter
244
- logger.debug("AIQA span processor attached successfully")
247
+ logger.debug(f"AIQA span processor attached successfully")
245
248
  except Exception as e:
246
249
  logger.error(f"Error attaching AIQA span processor: {e}")
247
250
  # Re-raise to let _init_tracing handle it - it will log and continue
aiqa/constants.py CHANGED
@@ -3,4 +3,6 @@ Constants used across the AIQA client package.
3
3
  """
4
4
 
5
5
  AIQA_TRACER_NAME = "aiqa-tracer"
6
- VERSION = "0.4.1" # automatically updated by set-version-json.sh
6
+ VERSION = "0.4.7" # automatically updated by set-version-json.sh
7
+
8
+ LOG_TAG = "AIQA" # Used in all logging output to identify AIQA messages
aiqa/experiment_runner.py CHANGED
@@ -4,6 +4,8 @@ ExperimentRunner - runs experiments on datasets and scores results
4
4
 
5
5
  import os
6
6
  import time
7
+ from .constants import LOG_TAG
8
+ from .http_utils import build_headers, get_server_url, get_api_key, format_http_error
7
9
  from typing import Any, Dict, List, Optional, Callable, Awaitable, Union
8
10
  import requests
9
11
 
@@ -35,18 +37,15 @@ class ExperimentRunner:
35
37
  """
36
38
  self.dataset_id = dataset_id
37
39
  self.experiment_id = experiment_id
38
- self.server_url = (server_url or os.getenv("AIQA_SERVER_URL", "")).rstrip("/")
39
- self.api_key = api_key or os.getenv("AIQA_API_KEY", "")
40
+ self.server_url = get_server_url(server_url)
41
+ self.api_key = get_api_key(api_key)
40
42
  self.organisation = organisation_id
41
43
  self.experiment: Optional[Dict[str, Any]] = None
42
44
  self.scores: List[Dict[str, Any]] = []
43
45
 
44
46
  def _get_headers(self) -> Dict[str, str]:
45
47
  """Build HTTP headers for API requests."""
46
- headers = {"Content-Type": "application/json"}
47
- if self.api_key:
48
- headers["Authorization"] = f"ApiKey {self.api_key}"
49
- return headers
48
+ return build_headers(self.api_key)
50
49
 
51
50
  def get_dataset(self) -> Dict[str, Any]:
52
51
  """
@@ -61,10 +60,7 @@ class ExperimentRunner:
61
60
  )
62
61
 
63
62
  if not response.ok:
64
- error_text = response.text if hasattr(response, "text") else "Unknown error"
65
- raise Exception(
66
- f"Failed to fetch dataset: {response.status_code} {response.reason} - {error_text}"
67
- )
63
+ raise Exception(format_http_error(response, "fetch dataset"))
68
64
 
69
65
  return response.json()
70
66
 
@@ -92,10 +88,7 @@ class ExperimentRunner:
92
88
  )
93
89
 
94
90
  if not response.ok:
95
- error_text = response.text if hasattr(response, "text") else "Unknown error"
96
- raise Exception(
97
- f"Failed to fetch example inputs: {response.status_code} {response.reason} - {error_text}"
98
- )
91
+ raise Exception(format_http_error(response, "fetch example inputs"))
99
92
 
100
93
  data = response.json()
101
94
  return data.get("hits", [])
@@ -130,7 +123,7 @@ class ExperimentRunner:
130
123
  "summary_results": {},
131
124
  }
132
125
 
133
- print("Creating experiment")
126
+ print(f"Creating experiment")
134
127
  response = requests.post(
135
128
  f"{self.server_url}/experiment",
136
129
  json=experiment_setup,
@@ -138,10 +131,7 @@ class ExperimentRunner:
138
131
  )
139
132
 
140
133
  if not response.ok:
141
- error_text = response.text if hasattr(response, "text") else "Unknown error"
142
- raise Exception(
143
- f"Failed to create experiment: {response.status_code} {response.reason} - {error_text}"
144
- )
134
+ raise Exception(format_http_error(response, "create experiment"))
145
135
 
146
136
  experiment = response.json()
147
137
  self.experiment_id = experiment["id"]
@@ -186,10 +176,7 @@ class ExperimentRunner:
186
176
  )
187
177
 
188
178
  if not response.ok:
189
- error_text = response.text if hasattr(response, "text") else "Unknown error"
190
- raise Exception(
191
- f"Failed to score and store: {response.status_code} {response.reason} - {error_text}"
192
- )
179
+ raise Exception(format_http_error(response, "score and store"))
193
180
 
194
181
  json_result = response.json()
195
182
  print(f"scoreAndStore response: {json_result}")
@@ -270,8 +257,7 @@ class ExperimentRunner:
270
257
  input_data = example["spans"][0].get("attributes", {}).get("input")
271
258
 
272
259
  if not input_data:
273
- print(
274
- f"Warning: Example has no input field or spans with input attribute: {example}"
260
+ print(f"Warning: Example has no input field or spans with input attribute: {example}"
275
261
  )
276
262
  # Run engine anyway -- this could make sense if it's all about the parameters
277
263
 
@@ -326,10 +312,7 @@ class ExperimentRunner:
326
312
  )
327
313
 
328
314
  if not response.ok:
329
- error_text = response.text if hasattr(response, "text") else "Unknown error"
330
- raise Exception(
331
- f"Failed to fetch summary results: {response.status_code} {response.reason} - {error_text}"
332
- )
315
+ raise Exception(format_http_error(response, "fetch summary results"))
333
316
 
334
317
  experiment2 = response.json()
335
318
  return experiment2.get("summary_results", {})
aiqa/http_utils.py ADDED
@@ -0,0 +1,69 @@
1
+ """
2
+ Shared HTTP utilities for AIQA client.
3
+ Provides common functions for building headers, handling errors, and accessing environment variables.
4
+ """
5
+
6
+ import os
7
+ from typing import Dict, Optional
8
+
9
+
10
+ def build_headers(api_key: Optional[str] = None) -> Dict[str, str]:
11
+ """
12
+ Build HTTP headers for AIQA API requests.
13
+
14
+ Args:
15
+ api_key: Optional API key. If not provided, will try to get from AIQA_API_KEY env var.
16
+
17
+ Returns:
18
+ Dictionary with Content-Type and optionally Authorization header.
19
+ """
20
+ headers = {"Content-Type": "application/json"}
21
+ if api_key:
22
+ headers["Authorization"] = f"ApiKey {api_key}"
23
+ elif os.getenv("AIQA_API_KEY"):
24
+ headers["Authorization"] = f"ApiKey {os.getenv('AIQA_API_KEY')}"
25
+ return headers
26
+
27
+
28
+ def get_server_url(server_url: Optional[str] = None) -> str:
29
+ """
30
+ Get server URL from parameter or environment variable, with trailing slash removed.
31
+
32
+ Args:
33
+ server_url: Optional server URL. If not provided, will get from AIQA_SERVER_URL env var.
34
+
35
+ Returns:
36
+ Server URL with trailing slash removed, or empty string if not set.
37
+ """
38
+ url = server_url or os.getenv("AIQA_SERVER_URL", "")
39
+ return url.rstrip("/")
40
+
41
+
42
+ def get_api_key(api_key: Optional[str] = None) -> str:
43
+ """
44
+ Get API key from parameter or environment variable.
45
+
46
+ Args:
47
+ api_key: Optional API key. If not provided, will get from AIQA_API_KEY env var.
48
+
49
+ Returns:
50
+ API key or empty string if not set.
51
+ """
52
+ return api_key or os.getenv("AIQA_API_KEY", "")
53
+
54
+
55
+ def format_http_error(response, operation: str) -> str:
56
+ """
57
+ Format an HTTP error message from a response object.
58
+
59
+ Args:
60
+ response: Response object with status_code, reason, and text attributes
61
+ operation: Description of the operation that failed (e.g., "fetch dataset")
62
+
63
+ Returns:
64
+ Formatted error message string.
65
+ """
66
+ error_text = response.text if hasattr(response, "text") else "Unknown error"
67
+ status_code = getattr(response, "status_code", getattr(response, "status", "unknown"))
68
+ reason = getattr(response, "reason", "")
69
+ return f"Failed to {operation}: {status_code} {reason} - {error_text}"
aiqa/object_serialiser.py CHANGED
@@ -7,10 +7,36 @@ import json
7
7
  import os
8
8
  import dataclasses
9
9
  import logging
10
+ from .constants import LOG_TAG
10
11
  from datetime import datetime, date, time
11
12
  from typing import Any, Callable, Set
13
+ from json.encoder import JSONEncoder
12
14
 
13
- logger = logging.getLogger("aiqa")
15
+ logger = logging.getLogger(LOG_TAG)
16
+
17
+ def sanitize_string_for_utf8(text: str) -> str:
18
+ """
19
+ Sanitize a string to remove surrogate characters that can't be encoded to UTF-8.
20
+ Surrogate characters (U+D800 to U+DFFF) are invalid in UTF-8 and can cause encoding errors.
21
+
22
+ Args:
23
+ text: The string to sanitize
24
+
25
+ Returns:
26
+ A string with surrogate characters replaced by the Unicode replacement character (U+FFFD)
27
+ """
28
+ if text == None:
29
+ return None
30
+ if not isinstance(text, str): # paranoia
31
+ text = str(text)
32
+ try:
33
+ # Try encoding to UTF-8 to check if there are any issues
34
+ text.encode('utf-8')
35
+ return text
36
+ except UnicodeEncodeError:
37
+ # If encoding fails, replace surrogates with replacement character
38
+ # This handles surrogates that can't be encoded
39
+ return text.encode('utf-8', errors='replace').decode('utf-8', errors='replace')
14
40
 
15
41
  def toNumber(value: str|int|None) -> int:
16
42
  """Convert string to number. handling units like g, m, k, (also mb kb gb though these should be avoided)"""
@@ -105,7 +131,7 @@ def serialize_for_span(value: Any) -> Any:
105
131
  """
106
132
  Serialize a value for span attributes.
107
133
  OpenTelemetry only accepts primitives (bool, str, bytes, int, float) or sequences of those.
108
- Complex types (dicts, lists, objects) are converted to JSON strings.
134
+ Complex types (dicts, objects) are converted to JSON strings.
109
135
 
110
136
  Handles objects by attempting to convert them to dicts, with safeguards against:
111
137
  - Circular references
@@ -118,14 +144,17 @@ def serialize_for_span(value: Any) -> Any:
118
144
 
119
145
  # For sequences, check if all elements are primitives
120
146
  if isinstance(value, (list, tuple)):
121
- # If all elements are primitives, return as list
122
- if all(isinstance(item, (str, int, float, bool, bytes, type(None))) for item in value):
123
- return list(value)
124
- # Otherwise serialize to JSON string
125
- try:
126
- return safe_json_dumps(value)
127
- except Exception:
128
- return str(value)
147
+ # Use short-circuiting loop instead of all() for better performance on large lists
148
+ # Only iterate until we find a non-primitive
149
+ for item in value:
150
+ if not isinstance(item, (str, int, float, bool, bytes, type(None))):
151
+ # Found non-primitive, serialize to JSON string
152
+ try:
153
+ return safe_json_dumps(value)
154
+ except Exception:
155
+ return str(value)
156
+ # All elements are primitives, return as list
157
+ return list(value)
129
158
 
130
159
  # For dicts and other complex types, serialize to JSON string
131
160
  try:
@@ -140,10 +169,13 @@ def safe_str_repr(value: Any) -> str:
140
169
  Safely convert a value to string representation.
141
170
  Handles objects with __repr__ that might raise exceptions.
142
171
  Uses AIQA_MAX_OBJECT_STR_CHARS environment variable (default: 100000) to limit length.
172
+ Also sanitizes surrogate characters to prevent UTF-8 encoding errors.
143
173
  """
144
174
  try:
145
175
  # Try __repr__ first (usually more informative)
146
176
  repr_str = repr(value)
177
+ # Sanitize surrogate characters that can't be encoded to UTF-8
178
+ repr_str = sanitize_string_for_utf8(repr_str)
147
179
  # Limit length to avoid huge strings
148
180
  if len(repr_str) > AIQA_MAX_OBJECT_STR_CHARS:
149
181
  return repr_str[:AIQA_MAX_OBJECT_STR_CHARS] + "... (truncated)"
@@ -158,7 +190,7 @@ def safe_str_repr(value: Any) -> str:
158
190
 
159
191
  def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_depth: int = 0) -> Any:
160
192
  """
161
- Convert an object to a dictionary representation.
193
+ Convert an object to a dictionary representation. Applies data filters to the object.
162
194
 
163
195
  Args:
164
196
  obj: The object to convert
@@ -172,7 +204,7 @@ def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_dep
172
204
  if current_depth > max_depth:
173
205
  return "<max depth exceeded>"
174
206
 
175
- obj_id = id(obj)
207
+ obj_id = id(obj) # note: id cannot raise exception
176
208
  if obj_id in visited:
177
209
  return "<circular reference>"
178
210
 
@@ -185,53 +217,42 @@ def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_dep
185
217
  return obj
186
218
 
187
219
  # Handle datetime objects
188
- if isinstance(obj, datetime):
189
- return obj.isoformat()
190
- if isinstance(obj, date):
191
- return obj.isoformat()
192
- if isinstance(obj, time):
193
- return obj.isoformat()
220
+ if isinstance(obj, datetime) or isinstance(obj, date) or isinstance(obj, time):
221
+ try:
222
+ return obj.isoformat()
223
+ except Exception: # paranoia if isoformat() fails (e.g., invalid datetime state, custom implementation bug)
224
+ return safe_str_repr(obj)
194
225
 
195
226
  # Handle dict
196
227
  if isinstance(obj, dict):
197
228
  visited.add(obj_id)
198
- try:
199
- result = {}
200
- for k, v in obj.items():
201
- try:
202
- key_str = str(k) if not isinstance(k, (str, int, float, bool)) else k
203
- filtered_value = _apply_data_filters(key_str, v)
204
- result[key_str] = object_to_dict(filtered_value, visited, max_depth, current_depth + 1)
205
- except Exception as e:
206
- # If one key-value pair fails, log and use string representation for the value
207
- key_str = str(k) if not isinstance(k, (str, int, float, bool)) else k
208
- logger.debug(f"Failed to convert dict value for key '{key_str}': {e}")
209
- result[key_str] = safe_str_repr(v)
210
- visited.remove(obj_id)
211
- return result
212
- except Exception as e:
213
- visited.discard(obj_id)
214
- logger.debug(f"Failed to convert dict to dict: {e}")
215
- return safe_str_repr(obj)
229
+ result = {}
230
+ for k, v in obj.items():
231
+ try:
232
+ key_str = str(k) if not isinstance(k, (str, int, float, bool)) else k
233
+ filtered_value = _apply_data_filters(key_str, v)
234
+ result[key_str] = object_to_dict(filtered_value, visited, max_depth, current_depth + 1)
235
+ except Exception as e:
236
+ # If one key-value pair fails, log and use string representation for the value
237
+ key_str = str(k) if not isinstance(k, (str, int, float, bool)) else k
238
+ logger.debug(f"Failed to convert dict value for key '{key_str}': {e}")
239
+ result[key_str] = safe_str_repr(v)
240
+ visited.remove(obj_id)
241
+ return result
216
242
 
217
243
  # Handle list/tuple
218
244
  if isinstance(obj, (list, tuple)):
219
245
  visited.add(obj_id)
220
- try:
221
- result = []
222
- for item in obj:
223
- try:
224
- result.append(object_to_dict(item, visited, max_depth, current_depth + 1))
225
- except Exception as e:
226
- # If one item fails, log and use its string representation
227
- logger.debug(f"Failed to convert list item {type(item).__name__} to dict: {e}")
228
- result.append(safe_str_repr(item))
229
- visited.remove(obj_id)
230
- return result
231
- except Exception as e:
232
- visited.discard(obj_id)
233
- logger.debug(f"Failed to convert list/tuple to dict: {e}")
234
- return safe_str_repr(obj)
246
+ result = []
247
+ for item in obj:
248
+ try:
249
+ result.append(object_to_dict(item, visited, max_depth, current_depth + 1))
250
+ except Exception as e:
251
+ # If one item fails, log and use its string representation
252
+ logger.debug(f"Failed to convert list item {type(item).__name__} to dict: {e}")
253
+ result.append(safe_str_repr(item))
254
+ visited.remove(obj_id)
255
+ return result
235
256
 
236
257
  # Handle dataclasses
237
258
  if dataclasses.is_dataclass(obj):
@@ -258,18 +279,11 @@ def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_dep
258
279
  if hasattr(obj, "__dict__"):
259
280
  visited.add(obj_id)
260
281
  try:
261
- result = {}
262
- for key, value in obj.__dict__.items():
263
- # Skip private attributes that start with __
264
- if not (isinstance(key, str) and key.startswith("__")):
265
- filtered_value = _apply_data_filters(key, value)
266
- result[key] = object_to_dict(filtered_value, visited, max_depth, current_depth + 1)
267
- visited.remove(obj_id)
268
- return result
269
- except Exception as e:
282
+ obj_dict = obj.__dict__
283
+ return object_to_dict(obj_dict, visited, max_depth, current_depth) # Note: Don't count using __dict__ as a recursion depth +1 step
284
+ except Exception as e: # paranoia: object_to_dict should never raise an exception
270
285
  visited.discard(obj_id)
271
- # Log the error for debugging, but still return string representation
272
- logger.debug(f"Failed to convert object {type(obj).__name__} to dict: {e}")
286
+ logger.debug(f"Failed to convert object {type(obj).__name__} with __dict__ to dict: {e}")
273
287
  return safe_str_repr(obj)
274
288
 
275
289
  # Handle objects with __slots__
@@ -311,6 +325,36 @@ def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_dep
311
325
  return safe_str_repr(obj)
312
326
 
313
327
 
328
+ class SizeLimitedJSONEncoder(JSONEncoder):
329
+ """
330
+ Custom JSON encoder that stops serialization early when max_size_chars is reached.
331
+ Tracks output length incrementally and stops yielding chunks when limit is exceeded.
332
+ """
333
+ def __init__(self, max_size_chars: int, *args, **kwargs):
334
+ super().__init__(*args, **kwargs)
335
+ self.max_size_chars = max_size_chars
336
+ self.current_length = 0
337
+ self._truncated = False
338
+
339
+ def iterencode(self, o, _one_shot=False):
340
+ """
341
+ Encode the object incrementally, checking size after each chunk.
342
+ Stops early if max_size_chars is exceeded.
343
+ """
344
+ self.current_length = 0
345
+ self._truncated = False
346
+
347
+ # Use _one_shot optimization when possible (faster for simple objects)
348
+ # The parent class will determine if _one_shot is safe
349
+ for chunk in super().iterencode(o, _one_shot):
350
+ self.current_length += len(chunk)
351
+ if self.current_length > self.max_size_chars:
352
+ self._truncated = True
353
+ # Stop yielding chunks when limit is exceeded
354
+ break
355
+ yield chunk
356
+
357
+
314
358
  def safe_json_dumps(value: Any) -> str:
315
359
  """
316
360
  Safely serialize a value to JSON string with safeguards against:
@@ -329,68 +373,45 @@ def safe_json_dumps(value: Any) -> str:
329
373
  max_size_chars = AIQA_MAX_OBJECT_STR_CHARS
330
374
  visited: Set[int] = set()
331
375
 
332
- # Convert the entire structure to ensure circular references are detected
376
+ # Convert the entire structure to json-friendy form, and ensure circular references are detected
333
377
  # across the whole object graph
334
378
  try:
335
379
  converted = object_to_dict(value, visited)
336
380
  except Exception as e:
337
- # If conversion fails, try with a fresh visited set and json default handler
338
- logger.debug(f"object_to_dict failed for {type(value).__name__}, trying json.dumps with default handler: {e}")
339
- try:
340
- json_str = json.dumps(value, default=json_default_handler_factory(set()))
341
- if len(json_str) > max_size_chars:
342
- return f"<object {type(value)} too large: {len(json_str)} chars (limit: {max_size_chars} chars) begins: {json_str[:100]}... conversion error: {e}>"
343
- return json_str
344
- except Exception as e2:
345
- logger.debug(f"json.dumps with default handler also failed for {type(value).__name__}: {e2}")
346
- return safe_str_repr(value)
381
+ # Note: object_to_dict is very defensive but can still raise in rare edge cases:
382
+ # - Objects with corrupted type metadata causing isinstance()/hasattr() to fail
383
+ # - Malformed dataclasses causing dataclasses.fields() to raise
384
+ # - Objects where accessing __dict__ or __slots__ triggers descriptors that raise
385
+ logger.debug(f"object_to_dict failed for {type(value).__name__}, using safe_str_repr. Error: {e}")
386
+ return safe_str_repr(value)
347
387
 
348
- # Try JSON serialization of the converted structure
388
+ # Try JSON serialization of the converted structure with size-limited encoder
389
+ # After object_to_dict(), converted is a plain dict/list with circular refs already
390
+ # converted to "<circular reference>" strings. We use check_circular=True (default)
391
+ # as an additional safety net, though it's redundant since object_to_dict() already
392
+ # handled circular refs. We don't need a default handler here since converted
393
+ # should be JSON-serializable.
349
394
  try:
350
- json_str = json.dumps(converted, default=json_default_handler_factory(set()))
351
- # Check size
352
- if len(json_str) > max_size_chars:
395
+ encoder = SizeLimitedJSONEncoder(
396
+ max_size_chars=max_size_chars,
397
+ check_circular=True, # Safety net for dict/list circular refs (redundant but harmless)
398
+ ensure_ascii=False
399
+ )
400
+ # Use iterencode to get chunks and check size incrementally
401
+ chunks = []
402
+ for chunk in encoder.iterencode(converted, _one_shot=True):
403
+ chunks.append(chunk)
404
+ if encoder._truncated:
405
+ # Hit the limit, stop early
406
+ json_str = ''.join(chunks)
407
+ return f"<object {type(value)} too large: {len(json_str)} chars (limit: {max_size_chars} chars) begins: {json_str[:100]}...>"
408
+ json_str = ''.join(chunks)
409
+ # Check if truncation occurred (encoder may have stopped after last chunk)
410
+ if encoder._truncated or len(json_str) > max_size_chars:
353
411
  return f"<object {type(value)} too large: {len(json_str)} chars (limit: {max_size_chars} chars) begins: {json_str[:100]}...>"
354
412
  return json_str
355
413
  except Exception as e:
356
- logger.debug(f"json.dumps total fail for {type(value).__name__}: {e2}")
414
+ logger.debug(f"json.dumps total fail for {type(value).__name__}: {e}")
357
415
  # Final fallback
358
416
  return safe_str_repr(value)
359
417
 
360
-
361
- def json_default_handler_factory(visited: Set[int]) -> Callable[[Any], Any]:
362
- """
363
- Create a JSON default handler with a shared visited set for circular reference detection.
364
- """
365
- def handler(obj: Any) -> Any:
366
- # Handle datetime objects
367
- if isinstance(obj, datetime):
368
- return obj.isoformat()
369
- if isinstance(obj, date):
370
- return obj.isoformat()
371
- if isinstance(obj, time):
372
- return obj.isoformat()
373
-
374
- # Handle bytes
375
- if isinstance(obj, bytes):
376
- try:
377
- return obj.decode('utf-8')
378
- except UnicodeDecodeError:
379
- return f"<bytes: {len(obj)} bytes>"
380
-
381
- # Try object conversion with the shared visited set
382
- try:
383
- return object_to_dict(obj, visited)
384
- except Exception:
385
- return safe_str_repr(obj)
386
-
387
- return handler
388
-
389
-
390
- def json_default_handler(obj: Any) -> Any:
391
- """
392
- Default handler for JSON serialization of non-serializable objects.
393
- This is a fallback that creates its own visited set.
394
- """
395
- return json_default_handler_factory(set())(obj)
396
-