aiqa-client 0.6.1__py3-none-any.whl → 0.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aiqa/client.py CHANGED
@@ -2,10 +2,10 @@
2
2
  import os
3
3
  import logging
4
4
  from functools import lru_cache
5
- from typing import Optional, TYPE_CHECKING, Any, Dict
5
+ from typing import Optional, TYPE_CHECKING, Any, Dict, List
6
6
  from opentelemetry import trace
7
7
  from opentelemetry.sdk.trace import TracerProvider
8
- from opentelemetry.sdk.trace.export import BatchSpanProcessor, SpanExporter, SpanExportResult, SpanExporter as SpanExporterBase
8
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor, SpanExporter, SpanExportResult
9
9
  from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
10
10
  from opentelemetry.sdk.trace import ReadableSpan
11
11
  from opentelemetry.trace import SpanContext
@@ -52,6 +52,8 @@ class AIQAClient:
52
52
  cls._instance._exporter = None # reduce circular import issues by not importing for typecheck here
53
53
  cls._instance._enabled: bool = True
54
54
  cls._instance._initialized: bool = False
55
+ cls._instance._default_ignore_patterns: List[str] = ["_*"] # Default: filter properties starting with '_'
56
+ cls._instance._ignore_recursive: bool = True # Default: recursive filtering enabled
55
57
  return cls._instance
56
58
 
57
59
  @property
@@ -90,6 +92,76 @@ class AIQAClient:
90
92
  logger.info(f"AIQA tracing {'enabled' if value else 'disabled'}")
91
93
  self._enabled = value
92
94
 
95
+ @property
96
+ def default_ignore_patterns(self) -> List[str]:
97
+ """
98
+ Get the default ignore patterns applied to all traced inputs and outputs.
99
+
100
+ Default: ["_*"] (filters properties starting with '_')
101
+
102
+ Returns:
103
+ List of ignore patterns (supports wildcards like "_*")
104
+ """
105
+ return self._default_ignore_patterns.copy()
106
+
107
+ @default_ignore_patterns.setter
108
+ def default_ignore_patterns(self, value: Optional[List[str]]) -> None:
109
+ """
110
+ Set the default ignore patterns applied to all traced inputs and outputs.
111
+
112
+ Args:
113
+ value: List of patterns to ignore (e.g., ["_*", "password"]).
114
+ Set to None or [] to disable default ignore patterns.
115
+ Supports wildcards (e.g., "_*" matches "_apple", "_fruit").
116
+
117
+ Example:
118
+ from aiqa import get_aiqa_client
119
+
120
+ client = get_aiqa_client()
121
+ # Add password to default ignore patterns
122
+ client.default_ignore_patterns = ["_*", "password", "api_key"]
123
+ # Disable default ignore patterns
124
+ client.default_ignore_patterns = []
125
+ """
126
+ if value is None:
127
+ self._default_ignore_patterns = []
128
+ else:
129
+ self._default_ignore_patterns = list(value)
130
+ logger.info(f"Default ignore patterns set to: {self._default_ignore_patterns}")
131
+
132
+ @property
133
+ def ignore_recursive(self) -> bool:
134
+ """
135
+ Get whether ignore patterns are applied recursively to nested objects.
136
+
137
+ Default: True (recursive filtering enabled)
138
+
139
+ Returns:
140
+ True if recursive filtering is enabled, False otherwise
141
+ """
142
+ return self._ignore_recursive
143
+
144
+ @ignore_recursive.setter
145
+ def ignore_recursive(self, value: bool) -> None:
146
+ """
147
+ Set whether ignore patterns are applied recursively to nested objects.
148
+
149
+ When True (default), ignore patterns are applied at all nesting levels.
150
+ When False, ignore patterns are only applied to top-level keys.
151
+
152
+ Args:
153
+ value: True to enable recursive filtering, False to disable
154
+
155
+ Example:
156
+ from aiqa import get_aiqa_client
157
+
158
+ client = get_aiqa_client()
159
+ # Disable recursive filtering (only filter top-level keys)
160
+ client.ignore_recursive = False
161
+ """
162
+ self._ignore_recursive = bool(value)
163
+ logger.info(f"Ignore recursive filtering {'enabled' if self._ignore_recursive else 'disabled'}")
164
+
93
165
  def shutdown(self) -> None:
94
166
  """
95
167
  Shutdown the tracer provider and exporter.
@@ -245,8 +317,6 @@ def _attach_aiqa_processor(provider: TracerProvider) -> None:
245
317
  auth_headers = {}
246
318
  if api_key:
247
319
  auth_headers["Authorization"] = f"ApiKey {api_key}"
248
- elif os.getenv("AIQA_API_KEY"):
249
- auth_headers["Authorization"] = f"ApiKey {os.getenv('AIQA_API_KEY')}"
250
320
 
251
321
  # OTLP HTTP exporter requires the full endpoint URL including /v1/traces
252
322
  # Ensure server_url doesn't have trailing slash or /v1/traces, then append /v1/traces
aiqa/constants.py CHANGED
@@ -3,6 +3,6 @@ Constants used across the AIQA client package.
3
3
  """
4
4
 
5
5
  AIQA_TRACER_NAME = "aiqa-tracer"
6
- VERSION = "0.6.1" # automatically updated by set-version-json.sh
6
+ VERSION = "0.7.2" # automatically updated by set-version-json.sh
7
7
 
8
8
  LOG_TAG = "AIQA" # Used in all logging output to identify AIQA messages
aiqa/experiment_runner.py CHANGED
@@ -5,11 +5,16 @@ ExperimentRunner - runs experiments on datasets and scores results
5
5
  import os
6
6
  import time
7
7
  import asyncio
8
+ from opentelemetry import context as otel_context
9
+ from opentelemetry.trace import Status, StatusCode, set_span_in_context
8
10
  from .constants import LOG_TAG
9
11
  from .http_utils import build_headers, get_server_url, get_api_key, format_http_error
10
12
  from typing import Any, Dict, List, Optional, Callable, Awaitable, Union
11
13
  from .tracing import WithTracing
12
- from .span_helpers import set_span_attribute, flush_tracing
14
+ from .span_helpers import set_span_attribute, flush_tracing, get_active_trace_id
15
+ from .client import get_aiqa_client, get_aiqa_tracer, get_component_tag
16
+ from .object_serialiser import serialize_for_span
17
+ from .tracing_llm_utils import _extract_and_set_token_usage, _extract_and_set_provider_and_model
13
18
  from .llm_as_judge import score_llm_metric_local, get_model_from_server, call_llm_fallback
14
19
  import requests
15
20
  from .types import MetricResult, ScoreThisInputOutputMetricType, Example, Result, Metric, CallLLMType
@@ -25,31 +30,9 @@ CallMyCodeType = Callable[[Any, Dict[str, Any]], Union[Any, Awaitable[Any]]]
25
30
  ScoreThisOutputType = Callable[[Any, Any, Dict[str, Any], Dict[str, Any]], Awaitable[Dict[str, Any]]]
26
31
 
27
32
 
28
-
29
- def _filter_input_for_run(input_data: Any) -> Dict[str, Any]:
30
- """Tracing:Filter input - drop most, keep just ids"""
31
- if not isinstance(input_data, dict):
32
- return {}
33
- self_obj = input_data.get("self")
34
- if not self_obj:
35
- return {}
36
- return {
37
- "dataset": getattr(self_obj, "dataset_id", None),
38
- "experiment": getattr(self_obj, "experiment_id", None),
39
- }
40
-
41
-
42
- def _filter_input_for_run_example(
43
- self: "ExperimentRunner",
44
- example: Dict[str, Any],
45
- call_my_code: Any = None,
46
- score_this_output: Any = None,
47
- ) -> Dict[str, Any]:
48
- """Filter input for run_example method to extract dataset, experiment, and example IDs."""
49
- result = _filter_input_for_run({"self": self})
50
- if isinstance(example, dict):
51
- result["example"] = example.get("id")
52
- return result
33
+ def _metric_score_key(metric: Dict[str, Any]) -> str:
34
+ """Key for scores in API: server expects metric name (fallback to id)."""
35
+ return (metric.get("name") or metric.get("id")) or ""
53
36
 
54
37
 
55
38
  class ExperimentRunner:
@@ -123,7 +106,17 @@ class ExperimentRunner:
123
106
 
124
107
  return dataset
125
108
 
126
- def get_example_inputs(self, limit: int = 10000) -> List[Dict[str, Any]]:
109
+ def get_example(self, example_id: str) -> Dict[str, Any]:
110
+ """
111
+ Fetch an example by ID.
112
+ """
113
+ response = requests.get(
114
+ f"{self.server_url}/example/{example_id}",
115
+ headers=self._get_headers(),
116
+ )
117
+ return response.json()
118
+
119
+ def get_examples_for_dataset(self, limit: int = 10000) -> List[Dict[str, Any]]:
127
120
  """
128
121
  Fetch example inputs from the dataset.
129
122
 
@@ -134,7 +127,7 @@ class ExperimentRunner:
134
127
  List of example objects
135
128
  """
136
129
  params = {
137
- "dataset_id": self.dataset_id,
130
+ "dataset": self.dataset_id,
138
131
  "limit": str(limit),
139
132
  }
140
133
  if self.organisation:
@@ -162,7 +155,6 @@ class ExperimentRunner:
162
155
  experiment_setup: Optional setup for the experiment object. You may wish to set:
163
156
  - name (recommended for labelling the experiment)
164
157
  - parameters
165
- - comparison_parameters
166
158
 
167
159
  Returns:
168
160
  The created experiment object
@@ -184,7 +176,7 @@ class ExperimentRunner:
184
176
  "organisation": self.organisation,
185
177
  "dataset": self.dataset_id,
186
178
  "results": [],
187
- "summary_results": {},
179
+ "summaries": {},
188
180
  }
189
181
 
190
182
  print(f"Creating experiment")
@@ -207,6 +199,7 @@ class ExperimentRunner:
207
199
  example: Example,
208
200
  output: Any,
209
201
  result: Result,
202
+ trace_id: Optional[str] = None,
210
203
  ) -> Result:
211
204
  """
212
205
  Ask the server to score an example result. Stores the score for later summary calculation.
@@ -226,24 +219,20 @@ class ExperimentRunner:
226
219
  if not example_id:
227
220
  raise ValueError("Example must have an 'id' field")
228
221
  if result is None:
229
- example_id = example.get("id")
230
- if not example_id:
231
- raise ValueError("Example must have an 'id' field")
232
- result = Result(exampleId=example_id, scores={}, messages={}, errors={})
222
+ result = {"example": example_id, "scores": {}, "messages": {}, "errors": {}}
233
223
  scores = result.get("scores") or {}
234
-
235
-
236
224
 
237
225
  print(f"Scoring and storing example: {example_id}")
238
226
  print(f"Scores: {scores}")
239
227
 
240
228
  # Run synchronous requests.post in a thread pool to avoid blocking
229
+ # Server expects output = raw output to score, not the result dict; scores keyed by metric name
241
230
  def _do_request():
242
231
  return requests.post(
243
232
  f"{self.server_url}/experiment/{self.experiment_id}/example/{example_id}/scoreAndStore",
244
233
  json={
245
- "output": result,
246
- "traceId": example.get("traceId"),
234
+ "output": output,
235
+ "trace": trace_id,
247
236
  "scores": scores,
248
237
  },
249
238
  headers=self._get_headers(),
@@ -258,7 +247,6 @@ class ExperimentRunner:
258
247
  print(f"scoreAndStore response: {json_result}")
259
248
  return json_result
260
249
 
261
- @WithTracing(filter_input=_filter_input_for_run)
262
250
  async def run(
263
251
  self,
264
252
  call_my_code: CallMyCodeType,
@@ -271,19 +259,11 @@ class ExperimentRunner:
271
259
  engine: Function that takes input, returns output (can be async)
272
260
  scorer: Optional function that scores the output given the example
273
261
  """
274
- examples = self.get_example_inputs()
275
-
276
- # Wrap engine to match run_example signature (input, parameters)
277
- async def wrapped_engine(input_data, parameters):
278
- result = call_my_code(input_data, parameters)
279
- # Handle async functions
280
- if hasattr(result, "__await__"):
281
- result = await result
282
- return result
262
+ examples = self.get_examples_for_dataset()
283
263
 
284
264
  for example in examples:
285
265
  try:
286
- scores = await self.run_example(example, wrapped_engine, scorer_for_metric_id)
266
+ scores = await self.run_example(example, call_my_code, scorer_for_metric_id)
287
267
  if scores:
288
268
  self.scores.append(
289
269
  {
@@ -296,7 +276,6 @@ class ExperimentRunner:
296
276
  print(f"Error processing example {example.get('id', 'unknown')}: {e}")
297
277
  # Continue with next example instead of failing entire run
298
278
 
299
- @WithTracing(filter_input=_filter_input_for_run_example)
300
279
  async def run_example(
301
280
  self,
302
281
  example: Example,
@@ -304,8 +283,10 @@ class ExperimentRunner:
304
283
  scorer_for_metric_id: Optional[Dict[str, ScoreThisInputOutputMetricType]] = None,
305
284
  ) -> List[Result]:
306
285
  """
307
- Run the engine on an example with the given parameters (looping over comparison parameters),
308
- and score the result. Also calls scoreAndStore to store the result in the server.
286
+ Run the engine on an example with the experiment's parameters, score the result, and store it.
287
+
288
+ Spans: one root "RunExample" span (input, call_my_code, output) and one child "ScoreExample"
289
+ span for scoring, so the server sees a clear call_my_code vs scoring split (aligned with client-go).
309
290
 
310
291
  Args:
311
292
  example: The example to run. See Example.ts type
@@ -313,117 +294,94 @@ class ExperimentRunner:
313
294
  scorer_for_metric_id: Optional dictionary of metric IDs to functions that score the output given the example and parameters
314
295
 
315
296
  Returns:
316
- One set of scores for each comparison parameter set. If no comparison parameters,
317
- returns an array of one.
297
+ List of one result (for API compatibility).
318
298
  """
319
- # Ensure experiment exists
320
299
  if not self.experiment:
321
300
  self.create_experiment()
322
301
  if not self.experiment:
323
302
  raise Exception("Failed to create experiment")
324
303
 
325
- # Make the parameters
326
- parameters_fixed = self.experiment.get("parameters") or {}
327
- # If comparison_parameters is empty/undefined, default to [{}] so we run at least once
328
- parameters_loop = self.experiment.get("comparison_parameters") or [{}]
329
-
330
- # Handle both spans array and input field
304
+ parameters_here = self.experiment.get("parameters") or {}
331
305
  input_data = example.get("input")
332
306
  if not input_data and example.get("spans") and len(example["spans"]) > 0:
333
307
  input_data = example["spans"][0].get("attributes", {}).get("input")
334
-
335
308
  if not input_data:
336
- print(f"Warning: Example has no input field or spans with input attribute: {example}"
337
- )
338
- # Run engine anyway -- this could make sense if it's all about the parameters
309
+ print(f"Warning: Example has no input field or spans with input attribute: {example}")
339
310
 
340
- # Set example.id on the root span (created by @WithTracing decorator)
341
- # This ensures the root span from the trace has example=Example.id set
342
311
  example_id = example.get("id")
343
312
  if not example_id:
344
313
  raise ValueError("Example must have an 'id' field")
345
- set_span_attribute("example", example_id)
346
-
347
- all_scores: List[Dict[str, Any]] = []
348
- dataset_metrics = self.get_dataset().get("metrics", [])
349
- specific_metrics = example.get("metrics", [])
350
- metrics = [*dataset_metrics, *specific_metrics]
351
- # This loop should not be parallelized - it should run sequentially, one after the other
352
- # to avoid creating interference between the runs.
353
- for parameters in parameters_loop:
354
- parameters_here = {**parameters_fixed, **parameters}
355
- print(f"Running with parameters: {parameters_here}")
356
-
357
- # Save original env var values for cleanup
358
- original_env_vars: Dict[str, Optional[str]] = {}
359
- # Set env vars from parameters_here
360
- for key, value in parameters_here.items():
361
- if value:
362
- original_env_vars[key] = os.environ.get(key)
363
- os.environ[key] = str(value)
364
314
 
365
- try:
366
- start = time.time() * 1000 # milliseconds
367
- output = call_my_code(input_data, parameters_here)
315
+ print(f"Running with parameters: {parameters_here}")
316
+ original_env_vars: Dict[str, Optional[str]] = {}
317
+ for key, value in parameters_here.items():
318
+ if value:
319
+ original_env_vars[key] = os.environ.get(key)
320
+ os.environ[key] = str(value)
321
+ try:
322
+ start = time.time() * 1000
323
+
324
+ run_trace_id_ref: List[Optional[str]] = [None]
325
+
326
+ # Wrap engine to match run_example signature (input, parameters)
327
+ # Root span so server can find it by parent:unset; trace ID is sent to scoreAndStore
328
+ def set_trace_id(tid: Optional[str]) -> None:
329
+ run_trace_id_ref[0] = tid
330
+
331
+ @WithTracing(root=True)
332
+ async def wrapped_engine(input_data, parameters, set_trace_id: Callable[[Optional[str]], None]):
333
+ trace_id_here = get_active_trace_id()
334
+ set_trace_id(trace_id_here)
335
+ result = call_my_code(input_data, parameters)
368
336
  # Handle async functions
369
- if hasattr(output, "__await__"):
370
- output = await output
371
- end = time.time() * 1000 # milliseconds
372
- duration = int(end - start)
373
-
374
- print(f"Output: {output}")
375
- # Score it
376
- result = Result(exampleId=example_id, scores={}, messages={}, errors={})
377
- for metric in metrics:
378
- metric_id = metric.get("id")
379
- if not metric_id:
380
- print(f"Warning: Metric missing 'id' field, skipping: {metric}")
381
- continue
382
- scorer = scorer_for_metric_id.get(metric_id) if scorer_for_metric_id else None
383
- if scorer:
384
- metric_result = await scorer(input_data, output, metric)
385
- elif metric.get("type") == "llm":
386
- metric_result = await self._score_llm_metric(input_data, output, example, metric)
387
- else:
388
- metric_type = metric.get("type", "unknown")
389
- print(f"Skipping metric: {metric_id} {metric_type} - no scorer")
390
- continue
391
-
392
- # Handle None metric_result (e.g., if scoring failed)
393
- if not metric_result:
394
- print(f"Warning: Metric {metric_id} returned None result, skipping")
395
- result["errors"][metric_id] = "Scoring function returned None"
396
- continue
397
-
398
- result["scores"][metric_id] = metric_result.get("score")
399
- result["messages"][metric_id] = metric_result.get("message")
400
- result["errors"][metric_id] = metric_result.get("error")
401
- # Always add duration to scores as a system metric
402
- result["scores"]["duration"] = duration
403
-
404
- # Flush spans before scoreAndStore to ensure they're indexed in ES
405
- # This prevents race condition where scoreAndStore looks up spans before they're indexed
406
- await flush_tracing()
407
-
408
- print(f"Call scoreAndStore ... for example: {example_id} with scores: {result['scores']}")
409
- result = await self.score_and_store(example, output, result)
410
- print(f"scoreAndStore returned: {result}")
411
- all_scores.append(result)
412
- finally:
413
- # Restore original env var values
414
- for key, original_value in original_env_vars.items():
415
- if original_value is None:
416
- # Variable didn't exist before, remove it
417
- os.environ.pop(key, None)
418
- else:
419
- # Restore original value
420
- os.environ[key] = original_value
421
-
422
- return all_scores
423
-
424
- def get_summary_results(self) -> Dict[str, Any]:
337
+ if hasattr(result, "__await__"):
338
+ result = await result
339
+ return result
340
+
341
+ output = wrapped_engine(input_data, parameters_here, set_trace_id)
342
+ if hasattr(output, "__await__"):
343
+ output = await output
344
+ duration = int((time.time() * 1000) - start)
345
+ print(f"Output: {output}")
346
+
347
+ dataset_metrics = self.get_dataset().get("metrics", [])
348
+ specific_metrics = example.get("metrics", [])
349
+ metrics = [*dataset_metrics, *specific_metrics]
350
+ result: Result = {"example": example_id, "scores": {}, "messages": {}, "errors": {}}
351
+ for metric in metrics:
352
+ metric_id = metric.get("id")
353
+ score_key = _metric_score_key(metric)
354
+ if not metric_id or not score_key:
355
+ continue
356
+ scorer = scorer_for_metric_id.get(metric_id) if scorer_for_metric_id else None
357
+ if scorer:
358
+ metric_result = await scorer(input_data, output, metric)
359
+ elif metric.get("type") == "llm":
360
+ metric_result = await self._score_llm_metric(input_data, output, example, metric)
361
+ else:
362
+ continue
363
+ if not metric_result:
364
+ result["errors"][score_key] = "Scoring function returned None"
365
+ continue
366
+ result["scores"][score_key] = metric_result.get("score")
367
+ result["messages"][score_key] = metric_result.get("message")
368
+ result["errors"][score_key] = metric_result.get("error")
369
+ result["scores"]["duration"] = duration
370
+ await flush_tracing()
371
+ print(f"Call scoreAndStore ... for example: {example_id} with scores: {result['scores']}")
372
+ result = await self.score_and_store(example, output, result, trace_id=run_trace_id_ref[0])
373
+ print(f"scoreAndStore returned: {result}")
374
+ return [result]
375
+ finally:
376
+ for key, original_value in original_env_vars.items():
377
+ if original_value is None:
378
+ os.environ.pop(key, None)
379
+ else:
380
+ os.environ[key] = original_value
381
+
382
+ def get_summaries(self) -> Dict[str, Any]:
425
383
  """
426
- Get summary results from the experiment.
384
+ Get summaries from the experiment.
427
385
 
428
386
  Returns:
429
387
  Dictionary of metric names to summary statistics
@@ -435,12 +393,12 @@ class ExperimentRunner:
435
393
  f"{self.server_url}/experiment/{self.experiment_id}",
436
394
  headers=self._get_headers(),
437
395
  )
438
-
396
+
439
397
  if not response.ok:
440
398
  raise Exception(format_http_error(response, "fetch summary results"))
441
399
 
442
400
  experiment2 = response.json()
443
- return experiment2.get("summary_results", {})
401
+ return experiment2.get("summaries", {})
444
402
 
445
403
  async def _score_llm_metric(
446
404
  self,
@@ -471,7 +429,8 @@ class ExperimentRunner:
471
429
  model_id, self.server_url, self._get_headers()
472
430
  )
473
431
  if model_data:
474
- api_key = model_data.get("api_key")
432
+ # Server returns 'apiKey' (camelCase)
433
+ api_key = model_data.get("apiKey")
475
434
  # If provider not set in metric, try to get it from model
476
435
  if not provider and model_data.get("provider"):
477
436
  provider = model_data.get("provider")
aiqa/llm_as_judge.py CHANGED
@@ -52,14 +52,15 @@ async def get_model_from_server(
52
52
  try:
53
53
  def _do_request():
54
54
  return requests.get(
55
- f"{server_url}/model/{model_id}?fields=api_key",
55
+ f"{server_url}/model/{model_id}?fields=apiKey", # Server uses camelCase 'apiKey' (also accepts 'api_key')
56
56
  headers=headers,
57
57
  )
58
58
 
59
59
  response = await asyncio.to_thread(_do_request)
60
60
  if response.ok:
61
61
  model = response.json()
62
- if model.get("api_key"):
62
+ # Server returns 'apiKey' (camelCase)
63
+ if model.get("apiKey"):
63
64
  return model
64
65
  return None
65
66
  except Exception as e:
aiqa/object_serialiser.py CHANGED
@@ -25,7 +25,7 @@ def sanitize_string_for_utf8(text: str) -> str:
25
25
  Returns:
26
26
  A string with surrogate characters replaced by the Unicode replacement character (U+FFFD)
27
27
  """
28
- if text == None:
28
+ if text is None:
29
29
  return None
30
30
  if not isinstance(text, str): # paranoia
31
31
  text = str(text)
@@ -43,7 +43,10 @@ def toNumber(value: str|int|None) -> int:
43
43
  if value is None:
44
44
  return 0
45
45
  if isinstance(value, int):
46
- return value
46
+ return value
47
+ # Convert to string if not already
48
+ if not isinstance(value, str):
49
+ value = str(value)
47
50
  if value.endswith("b"): # drop the b
48
51
  value = value[:-1]
49
52
  if value.endswith("g"):
aiqa/tracing.py CHANGED
@@ -7,7 +7,7 @@ import inspect
7
7
  import fnmatch
8
8
  from typing import Any, Callable, Optional, List
9
9
  from functools import wraps
10
- from opentelemetry import trace
10
+ from opentelemetry import context as otel_context, trace
11
11
  from opentelemetry.trace import Status, StatusCode
12
12
 
13
13
  from .client import get_aiqa_client, get_component_tag, get_aiqa_tracer
@@ -47,16 +47,16 @@ class TracingOptions:
47
47
 
48
48
  ignore_input: Iterable of keys (e.g., list, set) to exclude from
49
49
  input data when recording span attributes. Applies after filter_input if both are set.
50
- Only applies when
51
- input is a dictionary. Supports simple wildcards (e.g., `"_*"`
52
- matches `"_apple"`, `"_fruit"`). For example, use `["password", "api_key"]`
53
- or `["_*", "password"]` to exclude sensitive fields from being traced.
50
+ Supports "self" and simple wildcards (e.g., `"_*"`
51
+ matches `"_apple"`, `"_fruit"`). The pattern `"_*"` is applied by default
52
+ to filter properties starting with '_' in nested objects.
54
53
 
55
54
  ignore_output: Iterable of keys (e.g., list, set) to exclude from
56
55
  output data when recording span attributes. Only applies when
57
56
  output is a dictionary. Supports simple wildcards (e.g., `"_*"`
58
- matches `"_apple"`, `"_fruit"`). Useful for excluding large or sensitive
59
- fields from traces.
57
+ matches `"_apple"`, `"_fruit"`). The pattern `"_*"` is applied by default
58
+ to filter properties starting with '_' in nested objects. Useful for excluding
59
+ large or sensitive fields from traces.
60
60
 
61
61
  filter_input: Callable function that receives the same arguments as the
62
62
  decorated function (*args, **kwargs) and returns a filtered/transformed
@@ -96,7 +96,7 @@ class TracingOptions:
96
96
  filter_input=lambda self, example: {
97
97
  "dataset": self.dataset_id,
98
98
  "experiment": self.experiment_id,
99
- "example_id": example.id if hasattr(example, 'id') else None
99
+ "example": example.id if hasattr(example, 'id') else None
100
100
  }
101
101
  )
102
102
  def run_example(self, example):
@@ -168,33 +168,89 @@ def _prepare_input(args: tuple, kwargs: dict, sig: Optional[inspect.Signature] =
168
168
  return result
169
169
 
170
170
 
171
- def _apply_ignore_patterns(data_dict: dict, ignore_patterns: Optional[List[str]]) -> dict:
171
+ def _apply_ignore_patterns(
172
+ data_dict: dict,
173
+ ignore_patterns: Optional[List[str]],
174
+ recursive: bool = True,
175
+ max_depth: int = 100,
176
+ current_depth: int = 0
177
+ ) -> dict:
172
178
  """
173
- Apply ignore patterns to a dict.
179
+ Apply ignore patterns to a dict, optionally recursively.
174
180
  Supports string keys, wildcard patterns (*), and list of patterns.
175
181
  Used for both ignore_input and ignore_output.
176
182
 
177
183
  Args:
178
- data_dict: Dictionary to filter
184
+ data_dict: Dictionary to filter (may contain nested dictionaries)
179
185
  ignore_patterns: List of patterns to exclude (e.g., ["self", "_*", "password"])
186
+ recursive: Whether to apply patterns recursively to nested dictionaries
187
+ max_depth: Maximum recursion depth to prevent infinite loops (default: 100)
188
+ current_depth: Current recursion depth (internal use)
180
189
 
181
190
  Returns:
182
191
  Filtered dictionary with matching keys removed
183
192
  """
184
- if not ignore_patterns or not isinstance(data_dict, dict):
193
+ if not isinstance(data_dict, dict):
185
194
  return data_dict
186
195
 
187
- result = data_dict.copy()
188
- keys_to_delete = [
189
- key for key in result.keys()
190
- if _matches_ignore_pattern(key, ignore_patterns)
191
- ]
192
- for key in keys_to_delete:
193
- del result[key]
196
+ # Safety check: prevent infinite loops from extremely deep nesting
197
+ if current_depth >= max_depth:
198
+ logger.warning(
199
+ f"_apply_ignore_patterns: max depth {max_depth} reached, "
200
+ f"stopping recursion to prevent infinite loop"
201
+ )
202
+ return data_dict
203
+
204
+ # If no patterns, return copy (no filtering needed, even if recursive=True)
205
+ if not ignore_patterns:
206
+ return data_dict.copy()
207
+
208
+ result = {}
209
+ for key, value in data_dict.items():
210
+ # Skip keys that match ignore patterns
211
+ if _matches_ignore_pattern(key, ignore_patterns):
212
+ continue
213
+
214
+ # Recursively process nested dictionaries if recursive=True
215
+ if recursive and isinstance(value, dict):
216
+ result[key] = _apply_ignore_patterns(
217
+ value, ignore_patterns, recursive, max_depth, current_depth + 1
218
+ )
219
+ else:
220
+ result[key] = value
194
221
 
195
222
  return result
196
223
 
197
224
 
225
+ def _merge_with_default_ignore_patterns(
226
+ ignore_patterns: Optional[List[str]],
227
+ client: Optional[Any] = None
228
+ ) -> List[str]:
229
+ """
230
+ Merge user-provided ignore patterns with client's default ignore patterns.
231
+
232
+ Args:
233
+ ignore_patterns: Optional list of user-provided patterns
234
+ client: Optional client instance (to avoid repeated get_aiqa_client() calls)
235
+
236
+ Returns:
237
+ List of patterns including client's default ignore patterns
238
+ """
239
+ if client is None:
240
+ client = get_aiqa_client()
241
+ default_patterns = client.default_ignore_patterns
242
+
243
+ if ignore_patterns is None:
244
+ return default_patterns.copy() if default_patterns else []
245
+
246
+ # Merge patterns, avoiding duplicates
247
+ merged = list(default_patterns)
248
+ for pattern in ignore_patterns:
249
+ if pattern not in merged:
250
+ merged.append(pattern)
251
+ return merged
252
+
253
+
198
254
  def _prepare_and_filter_input(
199
255
  args: tuple,
200
256
  kwargs: dict,
@@ -209,6 +265,7 @@ def _prepare_and_filter_input(
209
265
  1. Apply filter_input to args, kwargs (receives same inputs as decorated function, including self)
210
266
  2. Convert into dict ready for span.attributes.input
211
267
  3. Apply ignore_input to the dict (supports string, wildcard, and list patterns)
268
+ Client's default ignore patterns are automatically merged with ignore_input.
212
269
 
213
270
  Args:
214
271
  args: Positional arguments (including self for bound methods)
@@ -218,7 +275,7 @@ def _prepare_and_filter_input(
218
275
  including `self` for bound methods. This allows extracting properties from any object.
219
276
  ignore_input: Optional list of keys/patterns to exclude from the final dict.
220
277
  If "self" is in ignore_input, it will be removed from the final dict but filter_input
221
- still receives it.
278
+ still receives it. Client's default ignore patterns are automatically merged.
222
279
  sig: Optional function signature for proper arg name resolution
223
280
 
224
281
  Returns:
@@ -251,15 +308,23 @@ def _prepare_and_filter_input(
251
308
  input_data = _prepare_input(args, kwargs, sig)
252
309
 
253
310
  # Step 3: Apply ignore_input to the dict (removes "self" from final dict if specified)
254
- should_ignore_self = ignore_input and "self" in ignore_input
311
+ # Merge with client's default ignore patterns
312
+ client = get_aiqa_client()
313
+ merged_ignore_input = _merge_with_default_ignore_patterns(ignore_input, client)
314
+ should_ignore_self = "self" in merged_ignore_input
315
+
255
316
  if isinstance(input_data, dict):
256
- input_data = _apply_ignore_patterns(input_data, ignore_input)
317
+ input_data = _apply_ignore_patterns(
318
+ input_data,
319
+ merged_ignore_input,
320
+ recursive=client.ignore_recursive
321
+ )
257
322
  # Handle case where we removed self and there are no remaining args/kwargs
258
323
  if should_ignore_self and not input_data:
259
324
  return None
260
- elif ignore_input:
261
- # Warn if ignore_input is set but input_data is not a dict
262
- logger.warning(f"_prepare_and_filter_input: skip: ignore_input is set but input_data is not a dict: {type(input_data)}")
325
+ elif merged_ignore_input:
326
+ # Warn if ignore patterns are set but input_data is not a dict
327
+ logger.warning(f"_prepare_and_filter_input: skip: ignore patterns are set but input_data is not a dict: {type(input_data)}")
263
328
 
264
329
  return input_data
265
330
 
@@ -269,7 +334,10 @@ def _filter_and_serialize_output(
269
334
  filter_output: Optional[Callable[[Any], Any]],
270
335
  ignore_output: Optional[List[str]],
271
336
  ) -> Any:
272
- """Filter and serialize output for span attributes."""
337
+ """
338
+ Filter and serialize output for span attributes.
339
+ Client's default ignore patterns are automatically merged with ignore_output.
340
+ """
273
341
  output_data = result
274
342
  if filter_output:
275
343
  if isinstance(output_data, dict):
@@ -277,11 +345,19 @@ def _filter_and_serialize_output(
277
345
  output_data = filter_output(output_data)
278
346
 
279
347
  # Apply ignore_output patterns (supports key, wildcard, and list patterns)
348
+ # Merge with client's default ignore patterns
349
+ client = get_aiqa_client()
350
+ merged_ignore_output = _merge_with_default_ignore_patterns(ignore_output, client)
351
+
280
352
  if isinstance(output_data, dict):
281
- output_data = _apply_ignore_patterns(output_data, ignore_output)
282
- elif ignore_output:
283
- # Warn if ignore_output is set but output_data is not a dict
284
- logger.warning(f"_filter_and_serialize_output: skip: ignore_output is set but output_data is not a dict: {type(output_data)}")
353
+ output_data = _apply_ignore_patterns(
354
+ output_data,
355
+ merged_ignore_output,
356
+ recursive=client.ignore_recursive
357
+ )
358
+ elif merged_ignore_output:
359
+ # Warn if ignore patterns are set but output_data is not a dict
360
+ logger.warning(f"_filter_and_serialize_output: skip: ignore patterns are set but output_data is not a dict: {type(output_data)}")
285
361
 
286
362
  # Serialize immediately to create immutable result (removes mutable structures)
287
363
  return serialize_for_span(output_data)
@@ -487,6 +563,7 @@ def WithTracing(
487
563
  ignore_output: Optional[List[str]] = None,
488
564
  filter_input: Optional[Callable[[Any], Any]] = None,
489
565
  filter_output: Optional[Callable[[Any], Any]] = None,
566
+ root: bool = False,
490
567
  ):
491
568
  """
492
569
  Decorator to automatically create spans for function calls.
@@ -500,12 +577,14 @@ def WithTracing(
500
577
  ignore_input: List of keys to exclude from input data when recording span attributes.
501
578
  self is handled as "self"
502
579
  Supports simple wildcards (e.g., "_*"
503
- matches "_apple", "_fruit"). For example, use ["password", "api_key"] or
504
- ["_*", "password"] to exclude sensitive fields from being traced.
580
+ matches "_apple", "_fruit"). The pattern "_*" is applied by default
581
+ to filter properties starting with '_' in nested objects. For example, use
582
+ ["password", "api_key"] to exclude additional sensitive fields from being traced.
505
583
  ignore_output: List of keys to exclude from output data when recording span attributes.
506
584
  Only applies when output is a dictionary. Supports simple wildcards (e.g., "_*"
507
- matches "_apple", "_fruit"). Useful for excluding large or sensitive
508
- fields from traces.
585
+ matches "_apple", "_fruit"). The pattern "_*" is applied by default
586
+ to filter properties starting with '_' in nested objects. Useful for excluding
587
+ large or sensitive fields from traces.
509
588
  filter_input: Function to filter/transform input before recording.
510
589
  Receives the same arguments as the decorated function (*args, **kwargs),
511
590
  including `self` for bound methods. This allows you to extract specific
@@ -514,6 +593,7 @@ def WithTracing(
514
593
  Returns a dict or any value (will be converted to dict). Applied before ignore_input.
515
594
  filter_output: Function to filter/transform output before recording.
516
595
  Receives the output value and returns a filtered/transformed version.
596
+ root: Whether this is a root span. If True, the span will not be linked to any parent spans.
517
597
 
518
598
  Example:
519
599
  @WithTracing
@@ -607,7 +687,8 @@ def WithTracing(
607
687
  return executor()
608
688
  # Get tracer after initialization (lazy)
609
689
  tracer = get_aiqa_tracer()
610
- with tracer.start_as_current_span(fn_name) as span:
690
+ span_kw = {"context": otel_context.Context()} if root else {}
691
+ with tracer.start_as_current_span(fn_name, **span_kw) as span:
611
692
  if not _setup_span(span, input_data):
612
693
  return executor() # span is not recording, so just execute the function and return the result
613
694
  try:
@@ -628,7 +709,8 @@ def WithTracing(
628
709
 
629
710
  # Get tracer after initialization (lazy)
630
711
  tracer = get_aiqa_tracer()
631
- with tracer.start_as_current_span(fn_name) as span:
712
+ span_kw = {"context": otel_context.Context()} if root else {}
713
+ with tracer.start_as_current_span(fn_name, **span_kw) as span:
632
714
  if not _setup_span(span, input_data):
633
715
  return await executor()
634
716
 
@@ -654,7 +736,8 @@ def WithTracing(
654
736
  # Get tracer after initialization (lazy)
655
737
  tracer = get_aiqa_tracer()
656
738
  # Create span but don't use 'with' - span will be closed by TracedGenerator
657
- span = tracer.start_span(fn_name)
739
+ span_kw = {"context": otel_context.Context()} if root else {}
740
+ span = tracer.start_span(fn_name, **span_kw)
658
741
  token = trace.context_api.attach(trace.context_api.set_span_in_context(span))
659
742
 
660
743
  try:
@@ -678,12 +761,14 @@ def WithTracing(
678
761
  # This is called lazily when the function runs, not at decorator definition time
679
762
  client = get_aiqa_client()
680
763
  if not client.enabled:
681
- return await executor()
764
+ # executor() returns an async generator object, not a coroutine, so don't await it
765
+ return executor()
682
766
 
683
767
  # Get tracer after initialization (lazy)
684
768
  tracer = get_aiqa_tracer()
685
769
  # Create span but don't use 'with' - span will be closed by TracedAsyncGenerator
686
- span = tracer.start_span(fn_name)
770
+ span_kw = {"context": otel_context.Context()} if root else {}
771
+ span = tracer.start_span(fn_name, **span_kw)
687
772
  token = trace.context_api.attach(trace.context_api.set_span_in_context(span))
688
773
 
689
774
  try:
aiqa/types.py CHANGED
@@ -29,7 +29,7 @@ class MetricResult(TypedDict):
29
29
 
30
30
  class Result(TypedDict):
31
31
  """Result of evaluating a set of metrics on an output (i.e. the full set of metrics for a single example)."""
32
- exampleId: str
32
+ example: str
33
33
  scores: Dict[str, Number]
34
34
  messages: Optional[Dict[str, str]] = None
35
35
  errors: Optional[Dict[str, str]] = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiqa-client
3
- Version: 0.6.1
3
+ Version: 0.7.2
4
4
  Summary: OpenTelemetry-based Python client for tracing functions and sending traces to the AIQA server
5
5
  Author-email: AIQA <info@aiqa.dev>
6
6
  License: MIT
@@ -0,0 +1,17 @@
1
+ aiqa/__init__.py,sha256=JLQjgQgsyGQ1mRl4kcYygJq9i_91jN4WDem3dF1eMGA,1888
2
+ aiqa/client.py,sha256=zS9OQQhdvVeIoBz0o8qrz-rjXngEbS9Lrli2ZWNIsrM,15993
3
+ aiqa/constants.py,sha256=if54R1OD111iPvB53mw0U9NRrBV-zvvm1gOAVxRj-vE,226
4
+ aiqa/experiment_runner.py,sha256=YpUOoBS_3DvT_ipofWe8MnrSjmWz4Bmfe8yaErdprBA,17730
5
+ aiqa/http_utils.py,sha256=OIB4tRI2TiDl4VKDmtbLWg9Q7TicMBeL7scLYEhVPXI,4944
6
+ aiqa/llm_as_judge.py,sha256=ESmqQfaYpypCNfsODkdn5s85n_nzJ4WKbhUMVTb2djE,10087
7
+ aiqa/object_serialiser.py,sha256=mzd2U_mFcAPalN2m9wxq35-BBeRJOhNK1k0-BmRSfQM,17055
8
+ aiqa/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ aiqa/span_helpers.py,sha256=Ht4T_JJXK4HlqBY_Qwe8QDk9XwWCjagx_DkOUVY-PmY,18189
10
+ aiqa/tracing.py,sha256=XKYUwZUIkybxIkOKMj5xwVV2IwX6QTvRIzsZOv8jUOc,35771
11
+ aiqa/tracing_llm_utils.py,sha256=zQSxzkEhPmgel1P2kFueNWTr846re-qHEFxD-_EHhNQ,10241
12
+ aiqa/types.py,sha256=Rv27oC1R0P1soJz5wsdwkVW-jfHQEVi4vUhwRJid270,2529
13
+ aiqa_client-0.7.2.dist-info/licenses/LICENSE.txt,sha256=kIzkzLuzG0HHaWYm4F4W5FeJ1Yxut3Ec6bhLWyw798A,1062
14
+ aiqa_client-0.7.2.dist-info/METADATA,sha256=Q4Wwu_FqNSB7IRdydBcRFDcL2bHNLyStT6DYkc_aS8E,7705
15
+ aiqa_client-0.7.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
16
+ aiqa_client-0.7.2.dist-info/top_level.txt,sha256=nwcsuVVSuWu27iLxZd4n1evVzv1W6FVTrSnCXCc-NQs,5
17
+ aiqa_client-0.7.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.10.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,17 +0,0 @@
1
- aiqa/__init__.py,sha256=JLQjgQgsyGQ1mRl4kcYygJq9i_91jN4WDem3dF1eMGA,1888
2
- aiqa/client.py,sha256=pChJBbaPwd6flVaucEt1oZDoSSlLQWJpBLcqruLuM60,13296
3
- aiqa/constants.py,sha256=Xq8425ozX9T4d_gmSYvWjEqPIdNlrasuymmImetC3rs,226
4
- aiqa/experiment_runner.py,sha256=FVhAtvjV5_jAmPCq55Xl2TNwXV5YAIrv2OFaV3wbjbs,19426
5
- aiqa/http_utils.py,sha256=OIB4tRI2TiDl4VKDmtbLWg9Q7TicMBeL7scLYEhVPXI,4944
6
- aiqa/llm_as_judge.py,sha256=tJlYX6qZaqhZEC-3wvSk7btb4SMk1O1avDtujj9lHj4,9980
7
- aiqa/object_serialiser.py,sha256=DBv7EyXIwfwjwXHDsIwdZNFmQffRb5fKAE0r8qhoqgc,16958
8
- aiqa/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- aiqa/span_helpers.py,sha256=Ht4T_JJXK4HlqBY_Qwe8QDk9XwWCjagx_DkOUVY-PmY,18189
10
- aiqa/tracing.py,sha256=juRFgt-uR5Z726F3pOZcxgcnZWom-sZ_-fYrhPHbFP4,32159
11
- aiqa/tracing_llm_utils.py,sha256=zQSxzkEhPmgel1P2kFueNWTr846re-qHEFxD-_EHhNQ,10241
12
- aiqa/types.py,sha256=E1-IPJNbH9A4TPUT0bXZDIT6SHwHQSolzOM4j9NXR5E,2531
13
- aiqa_client-0.6.1.dist-info/licenses/LICENSE.txt,sha256=kIzkzLuzG0HHaWYm4F4W5FeJ1Yxut3Ec6bhLWyw798A,1062
14
- aiqa_client-0.6.1.dist-info/METADATA,sha256=n76Zntwmd9tIP3aXQKYuGaUJQkJr37GmF-Vn_EmToCg,7705
15
- aiqa_client-0.6.1.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
16
- aiqa_client-0.6.1.dist-info/top_level.txt,sha256=nwcsuVVSuWu27iLxZd4n1evVzv1W6FVTrSnCXCc-NQs,5
17
- aiqa_client-0.6.1.dist-info/RECORD,,