PyPI - aiqa-client - Versions diffs - 0.6.1__py3-none-any.whl → 0.7.2__py3-none-any.whl - Mend

aiqa-client 0.6.1py3-none-any.whl → 0.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

aiqa/client.py +74 -4
aiqa/constants.py +1 -1
aiqa/experiment_runner.py +108 -149
aiqa/llm_as_judge.py +3 -2
aiqa/object_serialiser.py +5 -2
aiqa/tracing.py +124 -39
aiqa/types.py +1 -1
{aiqa_client-0.6.1.dist-info → aiqa_client-0.7.2.dist-info}/METADATA +1 -1
aiqa_client-0.7.2.dist-info/RECORD +17 -0
{aiqa_client-0.6.1.dist-info → aiqa_client-0.7.2.dist-info}/WHEEL +1 -1
aiqa_client-0.6.1.dist-info/RECORD +0 -17
{aiqa_client-0.6.1.dist-info → aiqa_client-0.7.2.dist-info}/licenses/LICENSE.txt +0 -0
{aiqa_client-0.6.1.dist-info → aiqa_client-0.7.2.dist-info}/top_level.txt +0 -0

aiqa/client.py CHANGED Viewed

@@ -2,10 +2,10 @@
 import os
 import logging
 from functools import lru_cache
-from typing import Optional, TYPE_CHECKING, Any, Dict
+from typing import Optional, TYPE_CHECKING, Any, Dict, List
 from opentelemetry import trace
 from opentelemetry.sdk.trace import TracerProvider
-from opentelemetry.sdk.trace.export import BatchSpanProcessor, SpanExporter, SpanExportResult, SpanExporter as SpanExporterBase
+from opentelemetry.sdk.trace.export import BatchSpanProcessor, SpanExporter, SpanExportResult
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
 from opentelemetry.sdk.trace import ReadableSpan
 from opentelemetry.trace import SpanContext
@@ -52,6 +52,8 @@ class AIQAClient:
             cls._instance._exporter = None # reduce circular import issues by not importing for typecheck here
             cls._instance._enabled: bool = True
             cls._instance._initialized: bool = False
+            cls._instance._default_ignore_patterns: List[str] = ["_*"]  # Default: filter properties starting with '_'
+            cls._instance._ignore_recursive: bool = True  # Default: recursive filtering enabled
         return cls._instance
     @property
@@ -90,6 +92,76 @@ class AIQAClient:
         logger.info(f"AIQA tracing {'enabled' if value else 'disabled'}")
         self._enabled = value
+    @property
+    def default_ignore_patterns(self) -> List[str]:
+        """
+        Get the default ignore patterns applied to all traced inputs and outputs.
+        Default: ["_*"] (filters properties starting with '_')
+        Returns:
+            List of ignore patterns (supports wildcards like "_*")
+        """
+        return self._default_ignore_patterns.copy()
+    @default_ignore_patterns.setter
+    def default_ignore_patterns(self, value: Optional[List[str]]) -> None:
+        """
+        Set the default ignore patterns applied to all traced inputs and outputs.
+        Args:
+            value: List of patterns to ignore (e.g., ["_*", "password"]).
+                   Set to None or [] to disable default ignore patterns.
+                   Supports wildcards (e.g., "_*" matches "_apple", "_fruit").
+        Example:
+            from aiqa import get_aiqa_client
+            client = get_aiqa_client()
+            # Add password to default ignore patterns
+            client.default_ignore_patterns = ["_*", "password", "api_key"]
+            # Disable default ignore patterns
+            client.default_ignore_patterns = []
+        """
+        if value is None:
+            self._default_ignore_patterns = []
+        else:
+            self._default_ignore_patterns = list(value)
+        logger.info(f"Default ignore patterns set to: {self._default_ignore_patterns}")
+    @property
+    def ignore_recursive(self) -> bool:
+        """
+        Get whether ignore patterns are applied recursively to nested objects.
+        Default: True (recursive filtering enabled)
+        Returns:
+            True if recursive filtering is enabled, False otherwise
+        """
+        return self._ignore_recursive
+    @ignore_recursive.setter
+    def ignore_recursive(self, value: bool) -> None:
+        """
+        Set whether ignore patterns are applied recursively to nested objects.
+        When True (default), ignore patterns are applied at all nesting levels.
+        When False, ignore patterns are only applied to top-level keys.
+        Args:
+            value: True to enable recursive filtering, False to disable
+        Example:
+            from aiqa import get_aiqa_client
+            client = get_aiqa_client()
+            # Disable recursive filtering (only filter top-level keys)
+            client.ignore_recursive = False
+        """
+        self._ignore_recursive = bool(value)
+        logger.info(f"Ignore recursive filtering {'enabled' if self._ignore_recursive else 'disabled'}")
     def shutdown(self) -> None:
         """
         Shutdown the tracer provider and exporter.
@@ -245,8 +317,6 @@ def _attach_aiqa_processor(provider: TracerProvider) -> None:
         auth_headers = {}
         if api_key:
             auth_headers["Authorization"] = f"ApiKey {api_key}"
-        elif os.getenv("AIQA_API_KEY"):
-            auth_headers["Authorization"] = f"ApiKey {os.getenv('AIQA_API_KEY')}"
         # OTLP HTTP exporter requires the full endpoint URL including /v1/traces
         # Ensure server_url doesn't have trailing slash or /v1/traces, then append /v1/traces

aiqa/constants.py CHANGED Viewed

@@ -3,6 +3,6 @@ Constants used across the AIQA client package.
 """
 AIQA_TRACER_NAME = "aiqa-tracer"
-VERSION = "0.6.1" # automatically updated by set-version-json.sh
+VERSION = "0.7.2" # automatically updated by set-version-json.sh
 LOG_TAG = "AIQA" # Used in all logging output to identify AIQA messages

aiqa/experiment_runner.py CHANGED Viewed

@@ -5,11 +5,16 @@ ExperimentRunner - runs experiments on datasets and scores results
 import os
 import time
 import asyncio
+from opentelemetry import context as otel_context
+from opentelemetry.trace import Status, StatusCode, set_span_in_context
 from .constants import LOG_TAG
 from .http_utils import build_headers, get_server_url, get_api_key, format_http_error
 from typing import Any, Dict, List, Optional, Callable, Awaitable, Union
 from .tracing import WithTracing
-from .span_helpers import set_span_attribute, flush_tracing
+from .span_helpers import set_span_attribute, flush_tracing, get_active_trace_id
+from .client import get_aiqa_client, get_aiqa_tracer, get_component_tag
+from .object_serialiser import serialize_for_span
+from .tracing_llm_utils import _extract_and_set_token_usage, _extract_and_set_provider_and_model
 from .llm_as_judge import score_llm_metric_local, get_model_from_server, call_llm_fallback
 import requests
 from .types import MetricResult, ScoreThisInputOutputMetricType, Example, Result, Metric, CallLLMType
@@ -25,31 +30,9 @@ CallMyCodeType = Callable[[Any, Dict[str, Any]], Union[Any, Awaitable[Any]]]
 ScoreThisOutputType = Callable[[Any, Any, Dict[str, Any], Dict[str, Any]], Awaitable[Dict[str, Any]]]
-def _filter_input_for_run(input_data: Any) -> Dict[str, Any]:
-    """Tracing:Filter input - drop most, keep just ids"""
-    if not isinstance(input_data, dict):
-        return {}
-    self_obj = input_data.get("self")
-    if not self_obj:
-        return {}
-    return {
-        "dataset": getattr(self_obj, "dataset_id", None),
-        "experiment": getattr(self_obj, "experiment_id", None),
-    }
-def _filter_input_for_run_example(
-    self: "ExperimentRunner",
-    example: Dict[str, Any],
-    call_my_code: Any = None,
-    score_this_output: Any = None,
-) -> Dict[str, Any]:
-    """Filter input for run_example method to extract dataset, experiment, and example IDs."""
-    result = _filter_input_for_run({"self": self})
-    if isinstance(example, dict):
-        result["example"] = example.get("id")
-    return result
+def _metric_score_key(metric: Dict[str, Any]) -> str:
+    """Key for scores in API: server expects metric name (fallback to id)."""
+    return (metric.get("name") or metric.get("id")) or ""
 class ExperimentRunner:
@@ -123,7 +106,17 @@ class ExperimentRunner:
         return dataset
-    def get_example_inputs(self, limit: int = 10000) -> List[Dict[str, Any]]:
+    def get_example(self, example_id: str) -> Dict[str, Any]:
+        """
+        Fetch an example by ID.
+        """
+        response = requests.get(
+            f"{self.server_url}/example/{example_id}",
+            headers=self._get_headers(),
+        )
+        return response.json()
+    def get_examples_for_dataset(self, limit: int = 10000) -> List[Dict[str, Any]]:
         """
         Fetch example inputs from the dataset.
@@ -134,7 +127,7 @@ class ExperimentRunner:
             List of example objects
         """
         params = {
-            "dataset_id": self.dataset_id,
+            "dataset": self.dataset_id,
             "limit": str(limit),
         }
         if self.organisation:
@@ -162,7 +155,6 @@ class ExperimentRunner:
             experiment_setup: Optional setup for the experiment object. You may wish to set:
                 - name (recommended for labelling the experiment)
                 - parameters
-                - comparison_parameters
         Returns:
             The created experiment object
@@ -184,7 +176,7 @@ class ExperimentRunner:
             "organisation": self.organisation,
             "dataset": self.dataset_id,
             "results": [],
-            "summary_results": {},
+            "summaries": {},
         }
         print(f"Creating experiment")
@@ -207,6 +199,7 @@ class ExperimentRunner:
         example: Example,
         output: Any,
         result: Result,
+        trace_id: Optional[str] = None,
     ) -> Result:
         """
         Ask the server to score an example result. Stores the score for later summary calculation.
@@ -226,24 +219,20 @@ class ExperimentRunner:
         if not example_id:
             raise ValueError("Example must have an 'id' field")
         if result is None:
-            example_id = example.get("id")
-            if not example_id:
-                raise ValueError("Example must have an 'id' field")
-            result = Result(exampleId=example_id, scores={}, messages={}, errors={})
+            result = {"example": example_id, "scores": {}, "messages": {}, "errors": {}}
         scores = result.get("scores") or {}
         print(f"Scoring and storing example: {example_id}")
         print(f"Scores: {scores}")
         # Run synchronous requests.post in a thread pool to avoid blocking
+        # Server expects output = raw output to score, not the result dict; scores keyed by metric name
         def _do_request():
             return requests.post(
                 f"{self.server_url}/experiment/{self.experiment_id}/example/{example_id}/scoreAndStore",
                 json={
-                    "output": result,
-                    "traceId": example.get("traceId"),
+                    "output": output,
+                    "trace": trace_id,
                     "scores": scores,
                 },
                 headers=self._get_headers(),
@@ -258,7 +247,6 @@ class ExperimentRunner:
         print(f"scoreAndStore response: {json_result}")
         return json_result
-    @WithTracing(filter_input=_filter_input_for_run)
     async def run(
         self,
         call_my_code: CallMyCodeType,
@@ -271,19 +259,11 @@ class ExperimentRunner:
             engine: Function that takes input, returns output (can be async)
             scorer: Optional function that scores the output given the example
         """
-        examples = self.get_example_inputs()
-        # Wrap engine to match run_example signature (input, parameters)
-        async def wrapped_engine(input_data, parameters):
-            result = call_my_code(input_data, parameters)
-            # Handle async functions
-            if hasattr(result, "__await__"):
-                result = await result
-            return result
+        examples = self.get_examples_for_dataset()
         for example in examples:
             try:
-                scores = await self.run_example(example, wrapped_engine, scorer_for_metric_id)
+                scores = await self.run_example(example, call_my_code, scorer_for_metric_id)
                 if scores:
                     self.scores.append(
                         {
@@ -296,7 +276,6 @@ class ExperimentRunner:
                 print(f"Error processing example {example.get('id', 'unknown')}: {e}")
                 # Continue with next example instead of failing entire run
-    @WithTracing(filter_input=_filter_input_for_run_example)
     async def run_example(
         self,
         example: Example,
@@ -304,8 +283,10 @@ class ExperimentRunner:
         scorer_for_metric_id: Optional[Dict[str, ScoreThisInputOutputMetricType]] = None,
     ) -> List[Result]:
         """
-        Run the engine on an example with the given parameters (looping over comparison parameters),
-        and score the result. Also calls scoreAndStore to store the result in the server.
+        Run the engine on an example with the experiment's parameters, score the result, and store it.
+        Spans: one root "RunExample" span (input, call_my_code, output) and one child "ScoreExample"
+        span for scoring, so the server sees a clear call_my_code vs scoring split (aligned with client-go).
         Args:
             example: The example to run. See Example.ts type
@@ -313,117 +294,94 @@ class ExperimentRunner:
             scorer_for_metric_id: Optional dictionary of metric IDs to functions that score the output given the example and parameters
         Returns:
-            One set of scores for each comparison parameter set. If no comparison parameters,
-            returns an array of one.
+            List of one result (for API compatibility).
         """
-        # Ensure experiment exists
         if not self.experiment:
             self.create_experiment()
         if not self.experiment:
             raise Exception("Failed to create experiment")
-        # Make the parameters
-        parameters_fixed = self.experiment.get("parameters") or {}
-        # If comparison_parameters is empty/undefined, default to [{}] so we run at least once
-        parameters_loop = self.experiment.get("comparison_parameters") or [{}]
-        # Handle both spans array and input field
+        parameters_here = self.experiment.get("parameters") or {}
         input_data = example.get("input")
         if not input_data and example.get("spans") and len(example["spans"]) > 0:
             input_data = example["spans"][0].get("attributes", {}).get("input")
         if not input_data:
-            print(f"Warning: Example has no input field or spans with input attribute: {example}"
-            )
-            # Run engine anyway -- this could make sense if it's all about the parameters
+            print(f"Warning: Example has no input field or spans with input attribute: {example}")
-        # Set example.id on the root span (created by @WithTracing decorator)
-        # This ensures the root span from the trace has example=Example.id set
         example_id = example.get("id")
         if not example_id:
             raise ValueError("Example must have an 'id' field")
-        set_span_attribute("example", example_id)
-        all_scores: List[Dict[str, Any]] = []
-        dataset_metrics = self.get_dataset().get("metrics", [])
-        specific_metrics = example.get("metrics", [])
-        metrics = [*dataset_metrics, *specific_metrics]
-        # This loop should not be parallelized - it should run sequentially, one after the other
-        # to avoid creating interference between the runs.
-        for parameters in parameters_loop:
-            parameters_here = {**parameters_fixed, **parameters}
-            print(f"Running with parameters: {parameters_here}")
-            # Save original env var values for cleanup
-            original_env_vars: Dict[str, Optional[str]] = {}
-            # Set env vars from parameters_here
-            for key, value in parameters_here.items():
-                if value:
-                    original_env_vars[key] = os.environ.get(key)
-                    os.environ[key] = str(value)
-            try:
-                start = time.time() * 1000  # milliseconds
-                output = call_my_code(input_data, parameters_here)
+        print(f"Running with parameters: {parameters_here}")
+        original_env_vars: Dict[str, Optional[str]] = {}
+        for key, value in parameters_here.items():
+            if value:
+                original_env_vars[key] = os.environ.get(key)
+                os.environ[key] = str(value)
+        try:
+            start = time.time() * 1000
+            run_trace_id_ref: List[Optional[str]] = [None]
+            # Wrap engine to match run_example signature (input, parameters)
+            # Root span so server can find it by parent:unset; trace ID is sent to scoreAndStore
+            def set_trace_id(tid: Optional[str]) -> None:
+                run_trace_id_ref[0] = tid
+            @WithTracing(root=True)
+            async def wrapped_engine(input_data, parameters, set_trace_id: Callable[[Optional[str]], None]):
+                trace_id_here = get_active_trace_id()
+                set_trace_id(trace_id_here)
+                result = call_my_code(input_data, parameters)
                 # Handle async functions
-                if hasattr(output, "__await__"):
-                    output = await output
-                end = time.time() * 1000  # milliseconds
-                duration = int(end - start)
-                print(f"Output: {output}")
-                # Score it
-                result = Result(exampleId=example_id, scores={}, messages={}, errors={})
-                for metric in metrics:
-                    metric_id = metric.get("id")
-                    if not metric_id:
-                        print(f"Warning: Metric missing 'id' field, skipping: {metric}")
-                        continue
-                    scorer = scorer_for_metric_id.get(metric_id) if scorer_for_metric_id else None
-                    if scorer:
-                        metric_result = await scorer(input_data, output, metric)
-                    elif metric.get("type") == "llm":
-                        metric_result = await self._score_llm_metric(input_data, output, example, metric)
-                    else:
-                        metric_type = metric.get("type", "unknown")
-                        print(f"Skipping metric: {metric_id} {metric_type} - no scorer")
-                        continue
-                    # Handle None metric_result (e.g., if scoring failed)
-                    if not metric_result:
-                        print(f"Warning: Metric {metric_id} returned None result, skipping")
-                        result["errors"][metric_id] = "Scoring function returned None"
-                        continue
-                    result["scores"][metric_id] = metric_result.get("score")
-                    result["messages"][metric_id] = metric_result.get("message")
-                    result["errors"][metric_id] = metric_result.get("error")
-                # Always add duration to scores as a system metric
-                result["scores"]["duration"] = duration
-                # Flush spans before scoreAndStore to ensure they're indexed in ES
-                # This prevents race condition where scoreAndStore looks up spans before they're indexed
-                await flush_tracing()
-                print(f"Call scoreAndStore ... for example: {example_id} with scores: {result['scores']}")
-                result = await self.score_and_store(example, output, result)
-                print(f"scoreAndStore returned: {result}")
-                all_scores.append(result)
-            finally:
-                # Restore original env var values
-                for key, original_value in original_env_vars.items():
-                    if original_value is None:
-                        # Variable didn't exist before, remove it
-                        os.environ.pop(key, None)
-                    else:
-                        # Restore original value
-                        os.environ[key] = original_value
-        return all_scores
-    def get_summary_results(self) -> Dict[str, Any]:
+                if hasattr(result, "__await__"):
+                    result = await result
+                return result
+            output = wrapped_engine(input_data, parameters_here, set_trace_id)
+            if hasattr(output, "__await__"):
+                output = await output
+            duration = int((time.time() * 1000) - start)
+            print(f"Output: {output}")
+            dataset_metrics = self.get_dataset().get("metrics", [])
+            specific_metrics = example.get("metrics", [])
+            metrics = [*dataset_metrics, *specific_metrics]
+            result: Result = {"example": example_id, "scores": {}, "messages": {}, "errors": {}}
+            for metric in metrics:
+                metric_id = metric.get("id")
+                score_key = _metric_score_key(metric)
+                if not metric_id or not score_key:
+                    continue
+                scorer = scorer_for_metric_id.get(metric_id) if scorer_for_metric_id else None
+                if scorer:
+                    metric_result = await scorer(input_data, output, metric)
+                elif metric.get("type") == "llm":
+                    metric_result = await self._score_llm_metric(input_data, output, example, metric)
+                else:
+                    continue
+                if not metric_result:
+                    result["errors"][score_key] = "Scoring function returned None"
+                    continue
+                result["scores"][score_key] = metric_result.get("score")
+                result["messages"][score_key] = metric_result.get("message")
+                result["errors"][score_key] = metric_result.get("error")
+            result["scores"]["duration"] = duration
+            await flush_tracing()
+            print(f"Call scoreAndStore ... for example: {example_id} with scores: {result['scores']}")
+            result = await self.score_and_store(example, output, result, trace_id=run_trace_id_ref[0])
+            print(f"scoreAndStore returned: {result}")
+            return [result]
+        finally:
+            for key, original_value in original_env_vars.items():
+                if original_value is None:
+                    os.environ.pop(key, None)
+                else:
+                    os.environ[key] = original_value
+    def get_summaries(self) -> Dict[str, Any]:
         """
-        Get summary results from the experiment.
+        Get summaries from the experiment.
         Returns:
             Dictionary of metric names to summary statistics
@@ -435,12 +393,12 @@ class ExperimentRunner:
             f"{self.server_url}/experiment/{self.experiment_id}",
             headers=self._get_headers(),
         )
         if not response.ok:
             raise Exception(format_http_error(response, "fetch summary results"))
         experiment2 = response.json()
-        return experiment2.get("summary_results", {})
+        return experiment2.get("summaries", {})
     async def _score_llm_metric(
         self,
@@ -471,7 +429,8 @@ class ExperimentRunner:
                 model_id, self.server_url, self._get_headers()
             )
             if model_data:
-                api_key = model_data.get("api_key")
+                # Server returns 'apiKey' (camelCase)
+                api_key = model_data.get("apiKey")
                 # If provider not set in metric, try to get it from model
                 if not provider and model_data.get("provider"):
                     provider = model_data.get("provider")

aiqa/llm_as_judge.py CHANGED Viewed

@@ -52,14 +52,15 @@ async def get_model_from_server(
     try:
         def _do_request():
             return requests.get(
-                f"{server_url}/model/{model_id}?fields=api_key",
+                f"{server_url}/model/{model_id}?fields=apiKey",  # Server uses camelCase 'apiKey' (also accepts 'api_key')
                 headers=headers,
             )
         response = await asyncio.to_thread(_do_request)
         if response.ok:
             model = response.json()
-            if model.get("api_key"):
+            # Server returns 'apiKey' (camelCase)
+            if model.get("apiKey"):
                 return model
         return None
     except Exception as e:

aiqa/object_serialiser.py CHANGED Viewed

@@ -25,7 +25,7 @@ def sanitize_string_for_utf8(text: str) -> str:
     Returns:
         A string with surrogate characters replaced by the Unicode replacement character (U+FFFD)
     """
-    if text == None:
+    if text is None:
         return None
     if not isinstance(text, str): # paranoia
         text = str(text)
@@ -43,7 +43,10 @@ def toNumber(value: str|int|None) -> int:
     if value is None:
         return 0
     if isinstance(value, int):
-        return value
+        return value
+    # Convert to string if not already
+    if not isinstance(value, str):
+        value = str(value)
     if value.endswith("b"): # drop the b
         value = value[:-1]
     if value.endswith("g"):

aiqa/tracing.py CHANGED Viewed

@@ -7,7 +7,7 @@ import inspect
 import fnmatch
 from typing import Any, Callable, Optional, List
 from functools import wraps
-from opentelemetry import trace
+from opentelemetry import context as otel_context, trace
 from opentelemetry.trace import Status, StatusCode
 from .client import get_aiqa_client, get_component_tag, get_aiqa_tracer
@@ -47,16 +47,16 @@ class TracingOptions:
             ignore_input: Iterable of keys (e.g., list, set) to exclude from
                 input data when recording span attributes. Applies after filter_input if both are set.
-                Only applies when
-                input is a dictionary. Supports simple wildcards (e.g., `"_*"`
-                matches `"_apple"`, `"_fruit"`). For example, use `["password", "api_key"]`
-                or `["_*", "password"]` to exclude sensitive fields from being traced.
+                Supports "self" and simple wildcards (e.g., `"_*"`
+                matches `"_apple"`, `"_fruit"`). The pattern `"_*"` is applied by default
+                to filter properties starting with '_' in nested objects.
             ignore_output: Iterable of keys (e.g., list, set) to exclude from
                 output data when recording span attributes. Only applies when
                 output is a dictionary. Supports simple wildcards (e.g., `"_*"`
-                matches `"_apple"`, `"_fruit"`). Useful for excluding large or sensitive
-                fields from traces.
+                matches `"_apple"`, `"_fruit"`). The pattern `"_*"` is applied by default
+                to filter properties starting with '_' in nested objects. Useful for excluding
+                large or sensitive fields from traces.
             filter_input: Callable function that receives the same arguments as the
                 decorated function (*args, **kwargs) and returns a filtered/transformed
@@ -96,7 +96,7 @@ class TracingOptions:
                     filter_input=lambda self, example: {
                         "dataset": self.dataset_id,
                         "experiment": self.experiment_id,
-                        "example_id": example.id if hasattr(example, 'id') else None
+                        "example": example.id if hasattr(example, 'id') else None
                     }
                 )
                 def run_example(self, example):
@@ -168,33 +168,89 @@ def _prepare_input(args: tuple, kwargs: dict, sig: Optional[inspect.Signature] =
     return result
-def _apply_ignore_patterns(data_dict: dict, ignore_patterns: Optional[List[str]]) -> dict:
+def _apply_ignore_patterns(
+    data_dict: dict,
+    ignore_patterns: Optional[List[str]],
+    recursive: bool = True,
+    max_depth: int = 100,
+    current_depth: int = 0
+) -> dict:
     """
-    Apply ignore patterns to a dict.
+    Apply ignore patterns to a dict, optionally recursively.
     Supports string keys, wildcard patterns (*), and list of patterns.
     Used for both ignore_input and ignore_output.
     Args:
-        data_dict: Dictionary to filter
+        data_dict: Dictionary to filter (may contain nested dictionaries)
         ignore_patterns: List of patterns to exclude (e.g., ["self", "_*", "password"])
+        recursive: Whether to apply patterns recursively to nested dictionaries
+        max_depth: Maximum recursion depth to prevent infinite loops (default: 100)
+        current_depth: Current recursion depth (internal use)
     Returns:
         Filtered dictionary with matching keys removed
     """
-    if not ignore_patterns or not isinstance(data_dict, dict):
+    if not isinstance(data_dict, dict):
         return data_dict
-    result = data_dict.copy()
-    keys_to_delete = [
-        key for key in result.keys()
-        if _matches_ignore_pattern(key, ignore_patterns)
-    ]
-    for key in keys_to_delete:
-        del result[key]
+    # Safety check: prevent infinite loops from extremely deep nesting
+    if current_depth >= max_depth:
+        logger.warning(
+            f"_apply_ignore_patterns: max depth {max_depth} reached, "
+            f"stopping recursion to prevent infinite loop"
+        )
+        return data_dict
+    # If no patterns, return copy (no filtering needed, even if recursive=True)
+    if not ignore_patterns:
+        return data_dict.copy()
+    result = {}
+    for key, value in data_dict.items():
+        # Skip keys that match ignore patterns
+        if _matches_ignore_pattern(key, ignore_patterns):
+            continue
+        # Recursively process nested dictionaries if recursive=True
+        if recursive and isinstance(value, dict):
+            result[key] = _apply_ignore_patterns(
+                value, ignore_patterns, recursive, max_depth, current_depth + 1
+            )
+        else:
+            result[key] = value
     return result
+def _merge_with_default_ignore_patterns(
+    ignore_patterns: Optional[List[str]],
+    client: Optional[Any] = None
+) -> List[str]:
+    """
+    Merge user-provided ignore patterns with client's default ignore patterns.
+    Args:
+        ignore_patterns: Optional list of user-provided patterns
+        client: Optional client instance (to avoid repeated get_aiqa_client() calls)
+    Returns:
+        List of patterns including client's default ignore patterns
+    """
+    if client is None:
+        client = get_aiqa_client()
+    default_patterns = client.default_ignore_patterns
+    if ignore_patterns is None:
+        return default_patterns.copy() if default_patterns else []
+    # Merge patterns, avoiding duplicates
+    merged = list(default_patterns)
+    for pattern in ignore_patterns:
+        if pattern not in merged:
+            merged.append(pattern)
+    return merged
 def _prepare_and_filter_input(
     args: tuple,
     kwargs: dict,
@@ -209,6 +265,7 @@ def _prepare_and_filter_input(
     1. Apply filter_input to args, kwargs (receives same inputs as decorated function, including self)
     2. Convert into dict ready for span.attributes.input
     3. Apply ignore_input to the dict (supports string, wildcard, and list patterns)
+       Client's default ignore patterns are automatically merged with ignore_input.
     Args:
         args: Positional arguments (including self for bound methods)
@@ -218,7 +275,7 @@ def _prepare_and_filter_input(
             including `self` for bound methods. This allows extracting properties from any object.
         ignore_input: Optional list of keys/patterns to exclude from the final dict.
             If "self" is in ignore_input, it will be removed from the final dict but filter_input
-            still receives it.
+            still receives it. Client's default ignore patterns are automatically merged.
         sig: Optional function signature for proper arg name resolution
     Returns:
@@ -251,15 +308,23 @@ def _prepare_and_filter_input(
         input_data = _prepare_input(args, kwargs, sig)
     # Step 3: Apply ignore_input to the dict (removes "self" from final dict if specified)
-    should_ignore_self = ignore_input and "self" in ignore_input
+    # Merge with client's default ignore patterns
+    client = get_aiqa_client()
+    merged_ignore_input = _merge_with_default_ignore_patterns(ignore_input, client)
+    should_ignore_self = "self" in merged_ignore_input
     if isinstance(input_data, dict):
-        input_data = _apply_ignore_patterns(input_data, ignore_input)
+        input_data = _apply_ignore_patterns(
+            input_data,
+            merged_ignore_input,
+            recursive=client.ignore_recursive
+        )
         # Handle case where we removed self and there are no remaining args/kwargs
         if should_ignore_self and not input_data:
             return None
-    elif ignore_input:
-        # Warn if ignore_input is set but input_data is not a dict
-        logger.warning(f"_prepare_and_filter_input: skip: ignore_input is set but input_data is not a dict: {type(input_data)}")
+    elif merged_ignore_input:
+        # Warn if ignore patterns are set but input_data is not a dict
+        logger.warning(f"_prepare_and_filter_input: skip: ignore patterns are set but input_data is not a dict: {type(input_data)}")
     return input_data
@@ -269,7 +334,10 @@ def _filter_and_serialize_output(
     filter_output: Optional[Callable[[Any], Any]],
     ignore_output: Optional[List[str]],
 ) -> Any:
-    """Filter and serialize output for span attributes."""
+    """
+    Filter and serialize output for span attributes.
+    Client's default ignore patterns are automatically merged with ignore_output.
+    """
     output_data = result
     if filter_output:
         if isinstance(output_data, dict):
@@ -277,11 +345,19 @@ def _filter_and_serialize_output(
         output_data = filter_output(output_data)
     # Apply ignore_output patterns (supports key, wildcard, and list patterns)
+    # Merge with client's default ignore patterns
+    client = get_aiqa_client()
+    merged_ignore_output = _merge_with_default_ignore_patterns(ignore_output, client)
     if isinstance(output_data, dict):
-        output_data = _apply_ignore_patterns(output_data, ignore_output)
-    elif ignore_output:
-        # Warn if ignore_output is set but output_data is not a dict
-        logger.warning(f"_filter_and_serialize_output: skip: ignore_output is set but output_data is not a dict: {type(output_data)}")
+        output_data = _apply_ignore_patterns(
+            output_data,
+            merged_ignore_output,
+            recursive=client.ignore_recursive
+        )
+    elif merged_ignore_output:
+        # Warn if ignore patterns are set but output_data is not a dict
+        logger.warning(f"_filter_and_serialize_output: skip: ignore patterns are set but output_data is not a dict: {type(output_data)}")
     # Serialize immediately to create immutable result (removes mutable structures)
     return serialize_for_span(output_data)
@@ -487,6 +563,7 @@ def WithTracing(
     ignore_output: Optional[List[str]] = None,
     filter_input: Optional[Callable[[Any], Any]] = None,
     filter_output: Optional[Callable[[Any], Any]] = None,
+    root: bool = False,
 ):
     """
     Decorator to automatically create spans for function calls.
@@ -500,12 +577,14 @@ def WithTracing(
         ignore_input: List of keys to exclude from input data when recording span attributes.
             self is handled as "self"
             Supports simple wildcards (e.g., "_*"
-            matches "_apple", "_fruit"). For example, use ["password", "api_key"] or
-            ["_*", "password"] to exclude sensitive fields from being traced.
+            matches "_apple", "_fruit"). The pattern "_*" is applied by default
+            to filter properties starting with '_' in nested objects. For example, use
+            ["password", "api_key"] to exclude additional sensitive fields from being traced.
         ignore_output: List of keys to exclude from output data when recording span attributes.
             Only applies when output is a dictionary. Supports simple wildcards (e.g., "_*"
-            matches "_apple", "_fruit"). Useful for excluding large or sensitive
-            fields from traces.
+            matches "_apple", "_fruit"). The pattern "_*" is applied by default
+            to filter properties starting with '_' in nested objects. Useful for excluding
+            large or sensitive fields from traces.
         filter_input: Function to filter/transform input before recording.
             Receives the same arguments as the decorated function (*args, **kwargs),
             including `self` for bound methods. This allows you to extract specific
@@ -514,6 +593,7 @@ def WithTracing(
             Returns a dict or any value (will be converted to dict). Applied before ignore_input.
         filter_output: Function to filter/transform output before recording.
             Receives the output value and returns a filtered/transformed version.
+        root: Whether this is a root span. If True, the span will not be linked to any parent spans.
     Example:
         @WithTracing
@@ -607,7 +687,8 @@ def WithTracing(
                 return executor()
             # Get tracer after initialization (lazy)
             tracer = get_aiqa_tracer()
-            with tracer.start_as_current_span(fn_name) as span:
+            span_kw = {"context": otel_context.Context()} if root else {}
+            with tracer.start_as_current_span(fn_name, **span_kw) as span:
                 if not _setup_span(span, input_data):
                     return executor() # span is not recording, so just execute the function and return the result
                 try:
@@ -628,7 +709,8 @@ def WithTracing(
             # Get tracer after initialization (lazy)
             tracer = get_aiqa_tracer()
-            with tracer.start_as_current_span(fn_name) as span:
+            span_kw = {"context": otel_context.Context()} if root else {}
+            with tracer.start_as_current_span(fn_name, **span_kw) as span:
                 if not _setup_span(span, input_data):
                     return await executor()
@@ -654,7 +736,8 @@ def WithTracing(
             # Get tracer after initialization (lazy)
             tracer = get_aiqa_tracer()
             # Create span but don't use 'with' - span will be closed by TracedGenerator
-            span = tracer.start_span(fn_name)
+            span_kw = {"context": otel_context.Context()} if root else {}
+            span = tracer.start_span(fn_name, **span_kw)
             token = trace.context_api.attach(trace.context_api.set_span_in_context(span))
             try:
@@ -678,12 +761,14 @@ def WithTracing(
             # This is called lazily when the function runs, not at decorator definition time
             client = get_aiqa_client()
             if not client.enabled:
-                return await executor()
+                # executor() returns an async generator object, not a coroutine, so don't await it
+                return executor()
             # Get tracer after initialization (lazy)
             tracer = get_aiqa_tracer()
             # Create span but don't use 'with' - span will be closed by TracedAsyncGenerator
-            span = tracer.start_span(fn_name)
+            span_kw = {"context": otel_context.Context()} if root else {}
+            span = tracer.start_span(fn_name, **span_kw)
             token = trace.context_api.attach(trace.context_api.set_span_in_context(span))
             try:

aiqa/types.py CHANGED Viewed

@@ -29,7 +29,7 @@ class MetricResult(TypedDict):
 class Result(TypedDict):
   """Result of evaluating a set of metrics on an output (i.e. the full set of metrics for a single example)."""
-  exampleId: str
+  example: str
   scores: Dict[str, Number]
   messages: Optional[Dict[str, str]] = None
   errors: Optional[Dict[str, str]] = None

{aiqa_client-0.6.1.dist-info → aiqa_client-0.7.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aiqa-client
-Version: 0.6.1
+Version: 0.7.2
 Summary: OpenTelemetry-based Python client for tracing functions and sending traces to the AIQA server
 Author-email: AIQA <info@aiqa.dev>
 License: MIT

aiqa_client-0.7.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,17 @@
+aiqa/__init__.py,sha256=JLQjgQgsyGQ1mRl4kcYygJq9i_91jN4WDem3dF1eMGA,1888
+aiqa/client.py,sha256=zS9OQQhdvVeIoBz0o8qrz-rjXngEbS9Lrli2ZWNIsrM,15993
+aiqa/constants.py,sha256=if54R1OD111iPvB53mw0U9NRrBV-zvvm1gOAVxRj-vE,226
+aiqa/experiment_runner.py,sha256=YpUOoBS_3DvT_ipofWe8MnrSjmWz4Bmfe8yaErdprBA,17730
+aiqa/http_utils.py,sha256=OIB4tRI2TiDl4VKDmtbLWg9Q7TicMBeL7scLYEhVPXI,4944
+aiqa/llm_as_judge.py,sha256=ESmqQfaYpypCNfsODkdn5s85n_nzJ4WKbhUMVTb2djE,10087
+aiqa/object_serialiser.py,sha256=mzd2U_mFcAPalN2m9wxq35-BBeRJOhNK1k0-BmRSfQM,17055
+aiqa/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+aiqa/span_helpers.py,sha256=Ht4T_JJXK4HlqBY_Qwe8QDk9XwWCjagx_DkOUVY-PmY,18189
+aiqa/tracing.py,sha256=XKYUwZUIkybxIkOKMj5xwVV2IwX6QTvRIzsZOv8jUOc,35771
+aiqa/tracing_llm_utils.py,sha256=zQSxzkEhPmgel1P2kFueNWTr846re-qHEFxD-_EHhNQ,10241
+aiqa/types.py,sha256=Rv27oC1R0P1soJz5wsdwkVW-jfHQEVi4vUhwRJid270,2529
+aiqa_client-0.7.2.dist-info/licenses/LICENSE.txt,sha256=kIzkzLuzG0HHaWYm4F4W5FeJ1Yxut3Ec6bhLWyw798A,1062
+aiqa_client-0.7.2.dist-info/METADATA,sha256=Q4Wwu_FqNSB7IRdydBcRFDcL2bHNLyStT6DYkc_aS8E,7705
+aiqa_client-0.7.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+aiqa_client-0.7.2.dist-info/top_level.txt,sha256=nwcsuVVSuWu27iLxZd4n1evVzv1W6FVTrSnCXCc-NQs,5
+aiqa_client-0.7.2.dist-info/RECORD,,

{aiqa_client-0.6.1.dist-info → aiqa_client-0.7.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.10.1)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

aiqa_client-0.6.1.dist-info/RECORD DELETED Viewed

@@ -1,17 +0,0 @@
-aiqa/__init__.py,sha256=JLQjgQgsyGQ1mRl4kcYygJq9i_91jN4WDem3dF1eMGA,1888
-aiqa/client.py,sha256=pChJBbaPwd6flVaucEt1oZDoSSlLQWJpBLcqruLuM60,13296
-aiqa/constants.py,sha256=Xq8425ozX9T4d_gmSYvWjEqPIdNlrasuymmImetC3rs,226
-aiqa/experiment_runner.py,sha256=FVhAtvjV5_jAmPCq55Xl2TNwXV5YAIrv2OFaV3wbjbs,19426
-aiqa/http_utils.py,sha256=OIB4tRI2TiDl4VKDmtbLWg9Q7TicMBeL7scLYEhVPXI,4944
-aiqa/llm_as_judge.py,sha256=tJlYX6qZaqhZEC-3wvSk7btb4SMk1O1avDtujj9lHj4,9980
-aiqa/object_serialiser.py,sha256=DBv7EyXIwfwjwXHDsIwdZNFmQffRb5fKAE0r8qhoqgc,16958
-aiqa/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-aiqa/span_helpers.py,sha256=Ht4T_JJXK4HlqBY_Qwe8QDk9XwWCjagx_DkOUVY-PmY,18189
-aiqa/tracing.py,sha256=juRFgt-uR5Z726F3pOZcxgcnZWom-sZ_-fYrhPHbFP4,32159
-aiqa/tracing_llm_utils.py,sha256=zQSxzkEhPmgel1P2kFueNWTr846re-qHEFxD-_EHhNQ,10241
-aiqa/types.py,sha256=E1-IPJNbH9A4TPUT0bXZDIT6SHwHQSolzOM4j9NXR5E,2531
-aiqa_client-0.6.1.dist-info/licenses/LICENSE.txt,sha256=kIzkzLuzG0HHaWYm4F4W5FeJ1Yxut3Ec6bhLWyw798A,1062
-aiqa_client-0.6.1.dist-info/METADATA,sha256=n76Zntwmd9tIP3aXQKYuGaUJQkJr37GmF-Vn_EmToCg,7705
-aiqa_client-0.6.1.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
-aiqa_client-0.6.1.dist-info/top_level.txt,sha256=nwcsuVVSuWu27iLxZd4n1evVzv1W6FVTrSnCXCc-NQs,5
-aiqa_client-0.6.1.dist-info/RECORD,,

{aiqa_client-0.6.1.dist-info → aiqa_client-0.7.2.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

{aiqa_client-0.6.1.dist-info → aiqa_client-0.7.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

aiqa-client 0.6.1__py3-none-any.whl → 0.7.2__py3-none-any.whl

aiqa-client 0.6.1py3-none-any.whl → 0.7.2py3-none-any.whl