aiqa-client 0.4.3__py3-none-any.whl → 0.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiqa/aiqa_exporter.py +192 -99
- aiqa/client.py +13 -10
- aiqa/constants.py +3 -1
- aiqa/experiment_runner.py +12 -29
- aiqa/http_utils.py +69 -0
- aiqa/object_serialiser.py +136 -115
- aiqa/tracing.py +113 -253
- aiqa/tracing_llm_utils.py +191 -0
- {aiqa_client-0.4.3.dist-info → aiqa_client-0.4.7.dist-info}/METADATA +1 -1
- aiqa_client-0.4.7.dist-info/RECORD +15 -0
- aiqa/test_experiment_runner.py +0 -176
- aiqa/test_startup_reliability.py +0 -249
- aiqa/test_tracing.py +0 -230
- aiqa_client-0.4.3.dist-info/RECORD +0 -16
- {aiqa_client-0.4.3.dist-info → aiqa_client-0.4.7.dist-info}/WHEEL +0 -0
- {aiqa_client-0.4.3.dist-info → aiqa_client-0.4.7.dist-info}/licenses/LICENSE.txt +0 -0
- {aiqa_client-0.4.3.dist-info → aiqa_client-0.4.7.dist-info}/top_level.txt +0 -0
aiqa/client.py
CHANGED
|
@@ -7,7 +7,9 @@ from opentelemetry import trace
|
|
|
7
7
|
from opentelemetry.sdk.trace import TracerProvider
|
|
8
8
|
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
from .constants import AIQA_TRACER_NAME, LOG_TAG
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(LOG_TAG)
|
|
11
13
|
|
|
12
14
|
# Compatibility import for TraceIdRatioBased sampler
|
|
13
15
|
# In older OpenTelemetry versions it was TraceIdRatioBasedSampler
|
|
@@ -20,7 +22,7 @@ except ImportError:
|
|
|
20
22
|
from opentelemetry.sdk.trace.sampling import TraceIdRatioBasedSampler as TraceIdRatioBased
|
|
21
23
|
except ImportError:
|
|
22
24
|
logger.warning(
|
|
23
|
-
"Could not import TraceIdRatioBased or TraceIdRatioBasedSampler from "
|
|
25
|
+
f"Could not import TraceIdRatioBased or TraceIdRatioBasedSampler from "
|
|
24
26
|
"opentelemetry.sdk.trace.sampling. AIQA tracing may not work correctly. "
|
|
25
27
|
"Please ensure opentelemetry-sdk>=1.24.0 is installed. "
|
|
26
28
|
"Try: pip install --upgrade opentelemetry-sdk"
|
|
@@ -28,7 +30,7 @@ except ImportError:
|
|
|
28
30
|
# Set to None so we can check later
|
|
29
31
|
TraceIdRatioBased = None
|
|
30
32
|
|
|
31
|
-
from .
|
|
33
|
+
from .http_utils import get_server_url, get_api_key
|
|
32
34
|
|
|
33
35
|
class AIQAClient:
|
|
34
36
|
"""
|
|
@@ -93,7 +95,7 @@ class AIQAClient:
|
|
|
93
95
|
This will also set enabled=False to prevent further tracing attempts.
|
|
94
96
|
"""
|
|
95
97
|
try:
|
|
96
|
-
logger.info("AIQA tracing shutting down")
|
|
98
|
+
logger.info(f"AIQA tracing shutting down")
|
|
97
99
|
# Disable tracing to prevent attempts to use shut-down system
|
|
98
100
|
self.enabled = False
|
|
99
101
|
if self._provider:
|
|
@@ -150,7 +152,7 @@ def get_aiqa_client() -> AIQAClient:
|
|
|
150
152
|
# Optional: Initialize explicitly (usually not needed)
|
|
151
153
|
client = get_aiqa_client()
|
|
152
154
|
if client.enabled:
|
|
153
|
-
print("Tracing is enabled")
|
|
155
|
+
print(f"Tracing is enabled")
|
|
154
156
|
|
|
155
157
|
@WithTracing
|
|
156
158
|
def my_function():
|
|
@@ -161,7 +163,7 @@ def get_aiqa_client() -> AIQAClient:
|
|
|
161
163
|
_init_tracing()
|
|
162
164
|
except Exception as e:
|
|
163
165
|
logger.error(f"Failed to initialize AIQA tracing: {e}")
|
|
164
|
-
logger.warning("AIQA tracing is disabled. Your application will continue to run without tracing.")
|
|
166
|
+
logger.warning(f"AIQA tracing is disabled. Your application will continue to run without tracing.")
|
|
165
167
|
return client
|
|
166
168
|
|
|
167
169
|
def _init_tracing() -> None:
|
|
@@ -171,8 +173,8 @@ def _init_tracing() -> None:
|
|
|
171
173
|
return
|
|
172
174
|
|
|
173
175
|
try:
|
|
174
|
-
server_url =
|
|
175
|
-
api_key =
|
|
176
|
+
server_url = get_server_url()
|
|
177
|
+
api_key = get_api_key()
|
|
176
178
|
|
|
177
179
|
if not server_url or not api_key:
|
|
178
180
|
client.enabled = False
|
|
@@ -231,17 +233,18 @@ def _attach_aiqa_processor(provider: TracerProvider) -> None:
|
|
|
231
233
|
# Check if already attached
|
|
232
234
|
for p in provider._active_span_processor._span_processors:
|
|
233
235
|
if isinstance(getattr(p, "exporter", None), AIQASpanExporter):
|
|
234
|
-
logger.debug("AIQA span processor already attached, skipping")
|
|
236
|
+
logger.debug(f"AIQA span processor already attached, skipping")
|
|
235
237
|
return
|
|
236
238
|
|
|
237
239
|
exporter = AIQASpanExporter(
|
|
238
240
|
server_url=os.getenv("AIQA_SERVER_URL"),
|
|
239
241
|
api_key=os.getenv("AIQA_API_KEY"),
|
|
242
|
+
# max_buffer_spans will be read from AIQA_MAX_BUFFER_SPANS env var by the exporter
|
|
240
243
|
)
|
|
241
244
|
provider.add_span_processor(BatchSpanProcessor(exporter))
|
|
242
245
|
global client
|
|
243
246
|
client.exporter = exporter
|
|
244
|
-
logger.debug("AIQA span processor attached successfully")
|
|
247
|
+
logger.debug(f"AIQA span processor attached successfully")
|
|
245
248
|
except Exception as e:
|
|
246
249
|
logger.error(f"Error attaching AIQA span processor: {e}")
|
|
247
250
|
# Re-raise to let _init_tracing handle it - it will log and continue
|
aiqa/constants.py
CHANGED
|
@@ -3,4 +3,6 @@ Constants used across the AIQA client package.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
AIQA_TRACER_NAME = "aiqa-tracer"
|
|
6
|
-
VERSION = "0.4.
|
|
6
|
+
VERSION = "0.4.7" # automatically updated by set-version-json.sh
|
|
7
|
+
|
|
8
|
+
LOG_TAG = "AIQA" # Used in all logging output to identify AIQA messages
|
aiqa/experiment_runner.py
CHANGED
|
@@ -4,6 +4,8 @@ ExperimentRunner - runs experiments on datasets and scores results
|
|
|
4
4
|
|
|
5
5
|
import os
|
|
6
6
|
import time
|
|
7
|
+
from .constants import LOG_TAG
|
|
8
|
+
from .http_utils import build_headers, get_server_url, get_api_key, format_http_error
|
|
7
9
|
from typing import Any, Dict, List, Optional, Callable, Awaitable, Union
|
|
8
10
|
import requests
|
|
9
11
|
|
|
@@ -35,18 +37,15 @@ class ExperimentRunner:
|
|
|
35
37
|
"""
|
|
36
38
|
self.dataset_id = dataset_id
|
|
37
39
|
self.experiment_id = experiment_id
|
|
38
|
-
self.server_url = (server_url
|
|
39
|
-
self.api_key = api_key
|
|
40
|
+
self.server_url = get_server_url(server_url)
|
|
41
|
+
self.api_key = get_api_key(api_key)
|
|
40
42
|
self.organisation = organisation_id
|
|
41
43
|
self.experiment: Optional[Dict[str, Any]] = None
|
|
42
44
|
self.scores: List[Dict[str, Any]] = []
|
|
43
45
|
|
|
44
46
|
def _get_headers(self) -> Dict[str, str]:
|
|
45
47
|
"""Build HTTP headers for API requests."""
|
|
46
|
-
|
|
47
|
-
if self.api_key:
|
|
48
|
-
headers["Authorization"] = f"ApiKey {self.api_key}"
|
|
49
|
-
return headers
|
|
48
|
+
return build_headers(self.api_key)
|
|
50
49
|
|
|
51
50
|
def get_dataset(self) -> Dict[str, Any]:
|
|
52
51
|
"""
|
|
@@ -61,10 +60,7 @@ class ExperimentRunner:
|
|
|
61
60
|
)
|
|
62
61
|
|
|
63
62
|
if not response.ok:
|
|
64
|
-
|
|
65
|
-
raise Exception(
|
|
66
|
-
f"Failed to fetch dataset: {response.status_code} {response.reason} - {error_text}"
|
|
67
|
-
)
|
|
63
|
+
raise Exception(format_http_error(response, "fetch dataset"))
|
|
68
64
|
|
|
69
65
|
return response.json()
|
|
70
66
|
|
|
@@ -92,10 +88,7 @@ class ExperimentRunner:
|
|
|
92
88
|
)
|
|
93
89
|
|
|
94
90
|
if not response.ok:
|
|
95
|
-
|
|
96
|
-
raise Exception(
|
|
97
|
-
f"Failed to fetch example inputs: {response.status_code} {response.reason} - {error_text}"
|
|
98
|
-
)
|
|
91
|
+
raise Exception(format_http_error(response, "fetch example inputs"))
|
|
99
92
|
|
|
100
93
|
data = response.json()
|
|
101
94
|
return data.get("hits", [])
|
|
@@ -130,7 +123,7 @@ class ExperimentRunner:
|
|
|
130
123
|
"summary_results": {},
|
|
131
124
|
}
|
|
132
125
|
|
|
133
|
-
print("Creating experiment")
|
|
126
|
+
print(f"Creating experiment")
|
|
134
127
|
response = requests.post(
|
|
135
128
|
f"{self.server_url}/experiment",
|
|
136
129
|
json=experiment_setup,
|
|
@@ -138,10 +131,7 @@ class ExperimentRunner:
|
|
|
138
131
|
)
|
|
139
132
|
|
|
140
133
|
if not response.ok:
|
|
141
|
-
|
|
142
|
-
raise Exception(
|
|
143
|
-
f"Failed to create experiment: {response.status_code} {response.reason} - {error_text}"
|
|
144
|
-
)
|
|
134
|
+
raise Exception(format_http_error(response, "create experiment"))
|
|
145
135
|
|
|
146
136
|
experiment = response.json()
|
|
147
137
|
self.experiment_id = experiment["id"]
|
|
@@ -186,10 +176,7 @@ class ExperimentRunner:
|
|
|
186
176
|
)
|
|
187
177
|
|
|
188
178
|
if not response.ok:
|
|
189
|
-
|
|
190
|
-
raise Exception(
|
|
191
|
-
f"Failed to score and store: {response.status_code} {response.reason} - {error_text}"
|
|
192
|
-
)
|
|
179
|
+
raise Exception(format_http_error(response, "score and store"))
|
|
193
180
|
|
|
194
181
|
json_result = response.json()
|
|
195
182
|
print(f"scoreAndStore response: {json_result}")
|
|
@@ -270,8 +257,7 @@ class ExperimentRunner:
|
|
|
270
257
|
input_data = example["spans"][0].get("attributes", {}).get("input")
|
|
271
258
|
|
|
272
259
|
if not input_data:
|
|
273
|
-
print(
|
|
274
|
-
f"Warning: Example has no input field or spans with input attribute: {example}"
|
|
260
|
+
print(f"Warning: Example has no input field or spans with input attribute: {example}"
|
|
275
261
|
)
|
|
276
262
|
# Run engine anyway -- this could make sense if it's all about the parameters
|
|
277
263
|
|
|
@@ -326,10 +312,7 @@ class ExperimentRunner:
|
|
|
326
312
|
)
|
|
327
313
|
|
|
328
314
|
if not response.ok:
|
|
329
|
-
|
|
330
|
-
raise Exception(
|
|
331
|
-
f"Failed to fetch summary results: {response.status_code} {response.reason} - {error_text}"
|
|
332
|
-
)
|
|
315
|
+
raise Exception(format_http_error(response, "fetch summary results"))
|
|
333
316
|
|
|
334
317
|
experiment2 = response.json()
|
|
335
318
|
return experiment2.get("summary_results", {})
|
aiqa/http_utils.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shared HTTP utilities for AIQA client.
|
|
3
|
+
Provides common functions for building headers, handling errors, and accessing environment variables.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
from typing import Dict, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def build_headers(api_key: Optional[str] = None) -> Dict[str, str]:
|
|
11
|
+
"""
|
|
12
|
+
Build HTTP headers for AIQA API requests.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
api_key: Optional API key. If not provided, will try to get from AIQA_API_KEY env var.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Dictionary with Content-Type and optionally Authorization header.
|
|
19
|
+
"""
|
|
20
|
+
headers = {"Content-Type": "application/json"}
|
|
21
|
+
if api_key:
|
|
22
|
+
headers["Authorization"] = f"ApiKey {api_key}"
|
|
23
|
+
elif os.getenv("AIQA_API_KEY"):
|
|
24
|
+
headers["Authorization"] = f"ApiKey {os.getenv('AIQA_API_KEY')}"
|
|
25
|
+
return headers
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_server_url(server_url: Optional[str] = None) -> str:
|
|
29
|
+
"""
|
|
30
|
+
Get server URL from parameter or environment variable, with trailing slash removed.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
server_url: Optional server URL. If not provided, will get from AIQA_SERVER_URL env var.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Server URL with trailing slash removed, or empty string if not set.
|
|
37
|
+
"""
|
|
38
|
+
url = server_url or os.getenv("AIQA_SERVER_URL", "")
|
|
39
|
+
return url.rstrip("/")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_api_key(api_key: Optional[str] = None) -> str:
|
|
43
|
+
"""
|
|
44
|
+
Get API key from parameter or environment variable.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
api_key: Optional API key. If not provided, will get from AIQA_API_KEY env var.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
API key or empty string if not set.
|
|
51
|
+
"""
|
|
52
|
+
return api_key or os.getenv("AIQA_API_KEY", "")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def format_http_error(response, operation: str) -> str:
|
|
56
|
+
"""
|
|
57
|
+
Format an HTTP error message from a response object.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
response: Response object with status_code, reason, and text attributes
|
|
61
|
+
operation: Description of the operation that failed (e.g., "fetch dataset")
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Formatted error message string.
|
|
65
|
+
"""
|
|
66
|
+
error_text = response.text if hasattr(response, "text") else "Unknown error"
|
|
67
|
+
status_code = getattr(response, "status_code", getattr(response, "status", "unknown"))
|
|
68
|
+
reason = getattr(response, "reason", "")
|
|
69
|
+
return f"Failed to {operation}: {status_code} {reason} - {error_text}"
|
aiqa/object_serialiser.py
CHANGED
|
@@ -7,10 +7,36 @@ import json
|
|
|
7
7
|
import os
|
|
8
8
|
import dataclasses
|
|
9
9
|
import logging
|
|
10
|
+
from .constants import LOG_TAG
|
|
10
11
|
from datetime import datetime, date, time
|
|
11
12
|
from typing import Any, Callable, Set
|
|
13
|
+
from json.encoder import JSONEncoder
|
|
12
14
|
|
|
13
|
-
logger = logging.getLogger(
|
|
15
|
+
logger = logging.getLogger(LOG_TAG)
|
|
16
|
+
|
|
17
|
+
def sanitize_string_for_utf8(text: str) -> str:
|
|
18
|
+
"""
|
|
19
|
+
Sanitize a string to remove surrogate characters that can't be encoded to UTF-8.
|
|
20
|
+
Surrogate characters (U+D800 to U+DFFF) are invalid in UTF-8 and can cause encoding errors.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
text: The string to sanitize
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
A string with surrogate characters replaced by the Unicode replacement character (U+FFFD)
|
|
27
|
+
"""
|
|
28
|
+
if text == None:
|
|
29
|
+
return None
|
|
30
|
+
if not isinstance(text, str): # paranoia
|
|
31
|
+
text = str(text)
|
|
32
|
+
try:
|
|
33
|
+
# Try encoding to UTF-8 to check if there are any issues
|
|
34
|
+
text.encode('utf-8')
|
|
35
|
+
return text
|
|
36
|
+
except UnicodeEncodeError:
|
|
37
|
+
# If encoding fails, replace surrogates with replacement character
|
|
38
|
+
# This handles surrogates that can't be encoded
|
|
39
|
+
return text.encode('utf-8', errors='replace').decode('utf-8', errors='replace')
|
|
14
40
|
|
|
15
41
|
def toNumber(value: str|int|None) -> int:
|
|
16
42
|
"""Convert string to number. handling units like g, m, k, (also mb kb gb though these should be avoided)"""
|
|
@@ -105,7 +131,7 @@ def serialize_for_span(value: Any) -> Any:
|
|
|
105
131
|
"""
|
|
106
132
|
Serialize a value for span attributes.
|
|
107
133
|
OpenTelemetry only accepts primitives (bool, str, bytes, int, float) or sequences of those.
|
|
108
|
-
Complex types (dicts,
|
|
134
|
+
Complex types (dicts, objects) are converted to JSON strings.
|
|
109
135
|
|
|
110
136
|
Handles objects by attempting to convert them to dicts, with safeguards against:
|
|
111
137
|
- Circular references
|
|
@@ -118,14 +144,17 @@ def serialize_for_span(value: Any) -> Any:
|
|
|
118
144
|
|
|
119
145
|
# For sequences, check if all elements are primitives
|
|
120
146
|
if isinstance(value, (list, tuple)):
|
|
121
|
-
#
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
147
|
+
# Use short-circuiting loop instead of all() for better performance on large lists
|
|
148
|
+
# Only iterate until we find a non-primitive
|
|
149
|
+
for item in value:
|
|
150
|
+
if not isinstance(item, (str, int, float, bool, bytes, type(None))):
|
|
151
|
+
# Found non-primitive, serialize to JSON string
|
|
152
|
+
try:
|
|
153
|
+
return safe_json_dumps(value)
|
|
154
|
+
except Exception:
|
|
155
|
+
return str(value)
|
|
156
|
+
# All elements are primitives, return as list
|
|
157
|
+
return list(value)
|
|
129
158
|
|
|
130
159
|
# For dicts and other complex types, serialize to JSON string
|
|
131
160
|
try:
|
|
@@ -140,10 +169,13 @@ def safe_str_repr(value: Any) -> str:
|
|
|
140
169
|
Safely convert a value to string representation.
|
|
141
170
|
Handles objects with __repr__ that might raise exceptions.
|
|
142
171
|
Uses AIQA_MAX_OBJECT_STR_CHARS environment variable (default: 100000) to limit length.
|
|
172
|
+
Also sanitizes surrogate characters to prevent UTF-8 encoding errors.
|
|
143
173
|
"""
|
|
144
174
|
try:
|
|
145
175
|
# Try __repr__ first (usually more informative)
|
|
146
176
|
repr_str = repr(value)
|
|
177
|
+
# Sanitize surrogate characters that can't be encoded to UTF-8
|
|
178
|
+
repr_str = sanitize_string_for_utf8(repr_str)
|
|
147
179
|
# Limit length to avoid huge strings
|
|
148
180
|
if len(repr_str) > AIQA_MAX_OBJECT_STR_CHARS:
|
|
149
181
|
return repr_str[:AIQA_MAX_OBJECT_STR_CHARS] + "... (truncated)"
|
|
@@ -158,7 +190,7 @@ def safe_str_repr(value: Any) -> str:
|
|
|
158
190
|
|
|
159
191
|
def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_depth: int = 0) -> Any:
|
|
160
192
|
"""
|
|
161
|
-
Convert an object to a dictionary representation.
|
|
193
|
+
Convert an object to a dictionary representation. Applies data filters to the object.
|
|
162
194
|
|
|
163
195
|
Args:
|
|
164
196
|
obj: The object to convert
|
|
@@ -172,7 +204,7 @@ def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_dep
|
|
|
172
204
|
if current_depth > max_depth:
|
|
173
205
|
return "<max depth exceeded>"
|
|
174
206
|
|
|
175
|
-
obj_id = id(obj)
|
|
207
|
+
obj_id = id(obj) # note: id cannot raise exception
|
|
176
208
|
if obj_id in visited:
|
|
177
209
|
return "<circular reference>"
|
|
178
210
|
|
|
@@ -185,53 +217,42 @@ def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_dep
|
|
|
185
217
|
return obj
|
|
186
218
|
|
|
187
219
|
# Handle datetime objects
|
|
188
|
-
if isinstance(obj, datetime):
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
return obj.isoformat()
|
|
220
|
+
if isinstance(obj, datetime) or isinstance(obj, date) or isinstance(obj, time):
|
|
221
|
+
try:
|
|
222
|
+
return obj.isoformat()
|
|
223
|
+
except Exception: # paranoia if isoformat() fails (e.g., invalid datetime state, custom implementation bug)
|
|
224
|
+
return safe_str_repr(obj)
|
|
194
225
|
|
|
195
226
|
# Handle dict
|
|
196
227
|
if isinstance(obj, dict):
|
|
197
228
|
visited.add(obj_id)
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
return result
|
|
212
|
-
except Exception as e:
|
|
213
|
-
visited.discard(obj_id)
|
|
214
|
-
logger.debug(f"Failed to convert dict to dict: {e}")
|
|
215
|
-
return safe_str_repr(obj)
|
|
229
|
+
result = {}
|
|
230
|
+
for k, v in obj.items():
|
|
231
|
+
try:
|
|
232
|
+
key_str = str(k) if not isinstance(k, (str, int, float, bool)) else k
|
|
233
|
+
filtered_value = _apply_data_filters(key_str, v)
|
|
234
|
+
result[key_str] = object_to_dict(filtered_value, visited, max_depth, current_depth + 1)
|
|
235
|
+
except Exception as e:
|
|
236
|
+
# If one key-value pair fails, log and use string representation for the value
|
|
237
|
+
key_str = str(k) if not isinstance(k, (str, int, float, bool)) else k
|
|
238
|
+
logger.debug(f"Failed to convert dict value for key '{key_str}': {e}")
|
|
239
|
+
result[key_str] = safe_str_repr(v)
|
|
240
|
+
visited.remove(obj_id)
|
|
241
|
+
return result
|
|
216
242
|
|
|
217
243
|
# Handle list/tuple
|
|
218
244
|
if isinstance(obj, (list, tuple)):
|
|
219
245
|
visited.add(obj_id)
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
return result
|
|
231
|
-
except Exception as e:
|
|
232
|
-
visited.discard(obj_id)
|
|
233
|
-
logger.debug(f"Failed to convert list/tuple to dict: {e}")
|
|
234
|
-
return safe_str_repr(obj)
|
|
246
|
+
result = []
|
|
247
|
+
for item in obj:
|
|
248
|
+
try:
|
|
249
|
+
result.append(object_to_dict(item, visited, max_depth, current_depth + 1))
|
|
250
|
+
except Exception as e:
|
|
251
|
+
# If one item fails, log and use its string representation
|
|
252
|
+
logger.debug(f"Failed to convert list item {type(item).__name__} to dict: {e}")
|
|
253
|
+
result.append(safe_str_repr(item))
|
|
254
|
+
visited.remove(obj_id)
|
|
255
|
+
return result
|
|
235
256
|
|
|
236
257
|
# Handle dataclasses
|
|
237
258
|
if dataclasses.is_dataclass(obj):
|
|
@@ -258,18 +279,11 @@ def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_dep
|
|
|
258
279
|
if hasattr(obj, "__dict__"):
|
|
259
280
|
visited.add(obj_id)
|
|
260
281
|
try:
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
if not (isinstance(key, str) and key.startswith("__")):
|
|
265
|
-
filtered_value = _apply_data_filters(key, value)
|
|
266
|
-
result[key] = object_to_dict(filtered_value, visited, max_depth, current_depth + 1)
|
|
267
|
-
visited.remove(obj_id)
|
|
268
|
-
return result
|
|
269
|
-
except Exception as e:
|
|
282
|
+
obj_dict = obj.__dict__
|
|
283
|
+
return object_to_dict(obj_dict, visited, max_depth, current_depth) # Note: Don't count using __dict__ as a recursion depth +1 step
|
|
284
|
+
except Exception as e: # paranoia: object_to_dict should never raise an exception
|
|
270
285
|
visited.discard(obj_id)
|
|
271
|
-
|
|
272
|
-
logger.debug(f"Failed to convert object {type(obj).__name__} to dict: {e}")
|
|
286
|
+
logger.debug(f"Failed to convert object {type(obj).__name__} with __dict__ to dict: {e}")
|
|
273
287
|
return safe_str_repr(obj)
|
|
274
288
|
|
|
275
289
|
# Handle objects with __slots__
|
|
@@ -311,6 +325,36 @@ def object_to_dict(obj: Any, visited: Set[int], max_depth: int = 10, current_dep
|
|
|
311
325
|
return safe_str_repr(obj)
|
|
312
326
|
|
|
313
327
|
|
|
328
|
+
class SizeLimitedJSONEncoder(JSONEncoder):
|
|
329
|
+
"""
|
|
330
|
+
Custom JSON encoder that stops serialization early when max_size_chars is reached.
|
|
331
|
+
Tracks output length incrementally and stops yielding chunks when limit is exceeded.
|
|
332
|
+
"""
|
|
333
|
+
def __init__(self, max_size_chars: int, *args, **kwargs):
|
|
334
|
+
super().__init__(*args, **kwargs)
|
|
335
|
+
self.max_size_chars = max_size_chars
|
|
336
|
+
self.current_length = 0
|
|
337
|
+
self._truncated = False
|
|
338
|
+
|
|
339
|
+
def iterencode(self, o, _one_shot=False):
|
|
340
|
+
"""
|
|
341
|
+
Encode the object incrementally, checking size after each chunk.
|
|
342
|
+
Stops early if max_size_chars is exceeded.
|
|
343
|
+
"""
|
|
344
|
+
self.current_length = 0
|
|
345
|
+
self._truncated = False
|
|
346
|
+
|
|
347
|
+
# Use _one_shot optimization when possible (faster for simple objects)
|
|
348
|
+
# The parent class will determine if _one_shot is safe
|
|
349
|
+
for chunk in super().iterencode(o, _one_shot):
|
|
350
|
+
self.current_length += len(chunk)
|
|
351
|
+
if self.current_length > self.max_size_chars:
|
|
352
|
+
self._truncated = True
|
|
353
|
+
# Stop yielding chunks when limit is exceeded
|
|
354
|
+
break
|
|
355
|
+
yield chunk
|
|
356
|
+
|
|
357
|
+
|
|
314
358
|
def safe_json_dumps(value: Any) -> str:
|
|
315
359
|
"""
|
|
316
360
|
Safely serialize a value to JSON string with safeguards against:
|
|
@@ -329,68 +373,45 @@ def safe_json_dumps(value: Any) -> str:
|
|
|
329
373
|
max_size_chars = AIQA_MAX_OBJECT_STR_CHARS
|
|
330
374
|
visited: Set[int] = set()
|
|
331
375
|
|
|
332
|
-
# Convert the entire structure to ensure circular references are detected
|
|
376
|
+
# Convert the entire structure to json-friendy form, and ensure circular references are detected
|
|
333
377
|
# across the whole object graph
|
|
334
378
|
try:
|
|
335
379
|
converted = object_to_dict(value, visited)
|
|
336
380
|
except Exception as e:
|
|
337
|
-
#
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
return json_str
|
|
344
|
-
except Exception as e2:
|
|
345
|
-
logger.debug(f"json.dumps with default handler also failed for {type(value).__name__}: {e2}")
|
|
346
|
-
return safe_str_repr(value)
|
|
381
|
+
# Note: object_to_dict is very defensive but can still raise in rare edge cases:
|
|
382
|
+
# - Objects with corrupted type metadata causing isinstance()/hasattr() to fail
|
|
383
|
+
# - Malformed dataclasses causing dataclasses.fields() to raise
|
|
384
|
+
# - Objects where accessing __dict__ or __slots__ triggers descriptors that raise
|
|
385
|
+
logger.debug(f"object_to_dict failed for {type(value).__name__}, using safe_str_repr. Error: {e}")
|
|
386
|
+
return safe_str_repr(value)
|
|
347
387
|
|
|
348
|
-
# Try JSON serialization of the converted structure
|
|
388
|
+
# Try JSON serialization of the converted structure with size-limited encoder
|
|
389
|
+
# After object_to_dict(), converted is a plain dict/list with circular refs already
|
|
390
|
+
# converted to "<circular reference>" strings. We use check_circular=True (default)
|
|
391
|
+
# as an additional safety net, though it's redundant since object_to_dict() already
|
|
392
|
+
# handled circular refs. We don't need a default handler here since converted
|
|
393
|
+
# should be JSON-serializable.
|
|
349
394
|
try:
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
395
|
+
encoder = SizeLimitedJSONEncoder(
|
|
396
|
+
max_size_chars=max_size_chars,
|
|
397
|
+
check_circular=True, # Safety net for dict/list circular refs (redundant but harmless)
|
|
398
|
+
ensure_ascii=False
|
|
399
|
+
)
|
|
400
|
+
# Use iterencode to get chunks and check size incrementally
|
|
401
|
+
chunks = []
|
|
402
|
+
for chunk in encoder.iterencode(converted, _one_shot=True):
|
|
403
|
+
chunks.append(chunk)
|
|
404
|
+
if encoder._truncated:
|
|
405
|
+
# Hit the limit, stop early
|
|
406
|
+
json_str = ''.join(chunks)
|
|
407
|
+
return f"<object {type(value)} too large: {len(json_str)} chars (limit: {max_size_chars} chars) begins: {json_str[:100]}...>"
|
|
408
|
+
json_str = ''.join(chunks)
|
|
409
|
+
# Check if truncation occurred (encoder may have stopped after last chunk)
|
|
410
|
+
if encoder._truncated or len(json_str) > max_size_chars:
|
|
353
411
|
return f"<object {type(value)} too large: {len(json_str)} chars (limit: {max_size_chars} chars) begins: {json_str[:100]}...>"
|
|
354
412
|
return json_str
|
|
355
413
|
except Exception as e:
|
|
356
|
-
logger.debug(f"json.dumps total fail for {type(value).__name__}: {
|
|
414
|
+
logger.debug(f"json.dumps total fail for {type(value).__name__}: {e}")
|
|
357
415
|
# Final fallback
|
|
358
416
|
return safe_str_repr(value)
|
|
359
417
|
|
|
360
|
-
|
|
361
|
-
def json_default_handler_factory(visited: Set[int]) -> Callable[[Any], Any]:
|
|
362
|
-
"""
|
|
363
|
-
Create a JSON default handler with a shared visited set for circular reference detection.
|
|
364
|
-
"""
|
|
365
|
-
def handler(obj: Any) -> Any:
|
|
366
|
-
# Handle datetime objects
|
|
367
|
-
if isinstance(obj, datetime):
|
|
368
|
-
return obj.isoformat()
|
|
369
|
-
if isinstance(obj, date):
|
|
370
|
-
return obj.isoformat()
|
|
371
|
-
if isinstance(obj, time):
|
|
372
|
-
return obj.isoformat()
|
|
373
|
-
|
|
374
|
-
# Handle bytes
|
|
375
|
-
if isinstance(obj, bytes):
|
|
376
|
-
try:
|
|
377
|
-
return obj.decode('utf-8')
|
|
378
|
-
except UnicodeDecodeError:
|
|
379
|
-
return f"<bytes: {len(obj)} bytes>"
|
|
380
|
-
|
|
381
|
-
# Try object conversion with the shared visited set
|
|
382
|
-
try:
|
|
383
|
-
return object_to_dict(obj, visited)
|
|
384
|
-
except Exception:
|
|
385
|
-
return safe_str_repr(obj)
|
|
386
|
-
|
|
387
|
-
return handler
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
def json_default_handler(obj: Any) -> Any:
|
|
391
|
-
"""
|
|
392
|
-
Default handler for JSON serialization of non-serializable objects.
|
|
393
|
-
This is a fallback that creates its own visited set.
|
|
394
|
-
"""
|
|
395
|
-
return json_default_handler_factory(set())(obj)
|
|
396
|
-
|