PyPI - aiqa-client - Versions diffs - 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl - Mend

aiqa-client 0.7.0py3-none-any.whl → 0.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

aiqa/constants.py CHANGED Viewed

@@ -3,6 +3,6 @@ Constants used across the AIQA client package.
 """
 AIQA_TRACER_NAME = "aiqa-tracer"
-VERSION = "0.7.0" # automatically updated by set-version-json.sh
+VERSION = "0.7.2" # automatically updated by set-version-json.sh
 LOG_TAG = "AIQA" # Used in all logging output to identify AIQA messages

aiqa/experiment_runner.py CHANGED Viewed

@@ -5,11 +5,16 @@ ExperimentRunner - runs experiments on datasets and scores results
 import os
 import time
 import asyncio
+from opentelemetry import context as otel_context
+from opentelemetry.trace import Status, StatusCode, set_span_in_context
 from .constants import LOG_TAG
 from .http_utils import build_headers, get_server_url, get_api_key, format_http_error
 from typing import Any, Dict, List, Optional, Callable, Awaitable, Union
 from .tracing import WithTracing
-from .span_helpers import set_span_attribute, flush_tracing
+from .span_helpers import set_span_attribute, flush_tracing, get_active_trace_id
+from .client import get_aiqa_client, get_aiqa_tracer, get_component_tag
+from .object_serialiser import serialize_for_span
+from .tracing_llm_utils import _extract_and_set_token_usage, _extract_and_set_provider_and_model
 from .llm_as_judge import score_llm_metric_local, get_model_from_server, call_llm_fallback
 import requests
 from .types import MetricResult, ScoreThisInputOutputMetricType, Example, Result, Metric, CallLLMType
@@ -25,31 +30,9 @@ CallMyCodeType = Callable[[Any, Dict[str, Any]], Union[Any, Awaitable[Any]]]
 ScoreThisOutputType = Callable[[Any, Any, Dict[str, Any], Dict[str, Any]], Awaitable[Dict[str, Any]]]
-def _filter_input_for_run(input_data: Any) -> Dict[str, Any]:
-    """Tracing:Filter input - drop most, keep just ids"""
-    if not isinstance(input_data, dict):
-        return {}
-    self_obj = input_data.get("self")
-    if not self_obj:
-        return {}
-    return {
-        "dataset": getattr(self_obj, "dataset_id", None),
-        "experiment": getattr(self_obj, "experiment_id", None),
-    }
-def _filter_input_for_run_example(
-    self: "ExperimentRunner",
-    example: Dict[str, Any],
-    call_my_code: Any = None,
-    score_this_output: Any = None,
-) -> Dict[str, Any]:
-    """Filter input for run_example method to extract dataset, experiment, and example IDs."""
-    result = _filter_input_for_run({"self": self})
-    if isinstance(example, dict):
-        result["example"] = example.get("id")
-    return result
+def _metric_score_key(metric: Dict[str, Any]) -> str:
+    """Key for scores in API: server expects metric name (fallback to id)."""
+    return (metric.get("name") or metric.get("id")) or ""
 class ExperimentRunner:
@@ -144,7 +127,7 @@ class ExperimentRunner:
             List of example objects
         """
         params = {
-            "dataset_id": self.dataset_id,
+            "dataset": self.dataset_id,
             "limit": str(limit),
         }
         if self.organisation:
@@ -216,6 +199,7 @@ class ExperimentRunner:
         example: Example,
         output: Any,
         result: Result,
+        trace_id: Optional[str] = None,
     ) -> Result:
         """
         Ask the server to score an example result. Stores the score for later summary calculation.
@@ -235,21 +219,20 @@ class ExperimentRunner:
         if not example_id:
             raise ValueError("Example must have an 'id' field")
         if result is None:
-            result = Result(example=example_id, scores={}, messages={}, errors={})
+            result = {"example": example_id, "scores": {}, "messages": {}, "errors": {}}
         scores = result.get("scores") or {}
         print(f"Scoring and storing example: {example_id}")
         print(f"Scores: {scores}")
         # Run synchronous requests.post in a thread pool to avoid blocking
+        # Server expects output = raw output to score, not the result dict; scores keyed by metric name
         def _do_request():
             return requests.post(
                 f"{self.server_url}/experiment/{self.experiment_id}/example/{example_id}/scoreAndStore",
                 json={
-                    "output": result,
-                    "traceId": example.get("trace"),  # Server returns 'trace' (lowercase), but API expects 'traceId' (camelCase)
+                    "output": output,
+                    "trace": trace_id,
                     "scores": scores,
                 },
                 headers=self._get_headers(),
@@ -264,7 +247,6 @@ class ExperimentRunner:
         print(f"scoreAndStore response: {json_result}")
         return json_result
-    @WithTracing(filter_input=_filter_input_for_run)
     async def run(
         self,
         call_my_code: CallMyCodeType,
@@ -279,17 +261,9 @@ class ExperimentRunner:
         """
         examples = self.get_examples_for_dataset()
-        # Wrap engine to match run_example signature (input, parameters)
-        async def wrapped_engine(input_data, parameters):
-            result = call_my_code(input_data, parameters)
-            # Handle async functions
-            if hasattr(result, "__await__"):
-                result = await result
-            return result
         for example in examples:
             try:
-                scores = await self.run_example(example, wrapped_engine, scorer_for_metric_id)
+                scores = await self.run_example(example, call_my_code, scorer_for_metric_id)
                 if scores:
                     self.scores.append(
                         {
@@ -302,7 +276,6 @@ class ExperimentRunner:
                 print(f"Error processing example {example.get('id', 'unknown')}: {e}")
                 # Continue with next example instead of failing entire run
-    @WithTracing(filter_input=_filter_input_for_run_example)
     async def run_example(
         self,
         example: Example,
@@ -312,6 +285,9 @@ class ExperimentRunner:
         """
         Run the engine on an example with the experiment's parameters, score the result, and store it.
+        Spans: one root "RunExample" span (input, call_my_code, output) and one child "ScoreExample"
+        span for scoring, so the server sees a clear call_my_code vs scoring split (aligned with client-go).
         Args:
             example: The example to run. See Example.ts type
             call_my_code: Function that takes input and parameters, returns output (can be async)
@@ -335,7 +311,6 @@ class ExperimentRunner:
         example_id = example.get("id")
         if not example_id:
             raise ValueError("Example must have an 'id' field")
-        set_span_attribute("example", example_id)
         print(f"Running with parameters: {parameters_here}")
         original_env_vars: Dict[str, Optional[str]] = {}
@@ -345,7 +320,25 @@ class ExperimentRunner:
                 os.environ[key] = str(value)
         try:
             start = time.time() * 1000
-            output = call_my_code(input_data, parameters_here)
+            run_trace_id_ref: List[Optional[str]] = [None]
+            # Wrap engine to match run_example signature (input, parameters)
+            # Root span so server can find it by parent:unset; trace ID is sent to scoreAndStore
+            def set_trace_id(tid: Optional[str]) -> None:
+                run_trace_id_ref[0] = tid
+            @WithTracing(root=True)
+            async def wrapped_engine(input_data, parameters, set_trace_id: Callable[[Optional[str]], None]):
+                trace_id_here = get_active_trace_id()
+                set_trace_id(trace_id_here)
+                result = call_my_code(input_data, parameters)
+                # Handle async functions
+                if hasattr(result, "__await__"):
+                    result = await result
+                return result
+            output = wrapped_engine(input_data, parameters_here, set_trace_id)
             if hasattr(output, "__await__"):
                 output = await output
             duration = int((time.time() * 1000) - start)
@@ -354,10 +347,11 @@ class ExperimentRunner:
             dataset_metrics = self.get_dataset().get("metrics", [])
             specific_metrics = example.get("metrics", [])
             metrics = [*dataset_metrics, *specific_metrics]
-            result = Result(example=example_id, scores={}, messages={}, errors={})
+            result: Result = {"example": example_id, "scores": {}, "messages": {}, "errors": {}}
             for metric in metrics:
                 metric_id = metric.get("id")
-                if not metric_id:
+                score_key = _metric_score_key(metric)
+                if not metric_id or not score_key:
                     continue
                 scorer = scorer_for_metric_id.get(metric_id) if scorer_for_metric_id else None
                 if scorer:
@@ -367,15 +361,15 @@ class ExperimentRunner:
                 else:
                     continue
                 if not metric_result:
-                    result["errors"][metric_id] = "Scoring function returned None"
+                    result["errors"][score_key] = "Scoring function returned None"
                     continue
-                result["scores"][metric_id] = metric_result.get("score")
-                result["messages"][metric_id] = metric_result.get("message")
-                result["errors"][metric_id] = metric_result.get("error")
+                result["scores"][score_key] = metric_result.get("score")
+                result["messages"][score_key] = metric_result.get("message")
+                result["errors"][score_key] = metric_result.get("error")
             result["scores"]["duration"] = duration
             await flush_tracing()
             print(f"Call scoreAndStore ... for example: {example_id} with scores: {result['scores']}")
-            result = await self.score_and_store(example, output, result)
+            result = await self.score_and_store(example, output, result, trace_id=run_trace_id_ref[0])
             print(f"scoreAndStore returned: {result}")
             return [result]
         finally:

aiqa/tracing.py CHANGED Viewed

@@ -7,7 +7,7 @@ import inspect
 import fnmatch
 from typing import Any, Callable, Optional, List
 from functools import wraps
-from opentelemetry import trace
+from opentelemetry import context as otel_context, trace
 from opentelemetry.trace import Status, StatusCode
 from .client import get_aiqa_client, get_component_tag, get_aiqa_tracer
@@ -563,6 +563,7 @@ def WithTracing(
     ignore_output: Optional[List[str]] = None,
     filter_input: Optional[Callable[[Any], Any]] = None,
     filter_output: Optional[Callable[[Any], Any]] = None,
+    root: bool = False,
 ):
     """
     Decorator to automatically create spans for function calls.
@@ -592,6 +593,7 @@ def WithTracing(
             Returns a dict or any value (will be converted to dict). Applied before ignore_input.
         filter_output: Function to filter/transform output before recording.
             Receives the output value and returns a filtered/transformed version.
+        root: Whether this is a root span. If True, the span will not be linked to any parent spans.
     Example:
         @WithTracing
@@ -685,7 +687,8 @@ def WithTracing(
                 return executor()
             # Get tracer after initialization (lazy)
             tracer = get_aiqa_tracer()
-            with tracer.start_as_current_span(fn_name) as span:
+            span_kw = {"context": otel_context.Context()} if root else {}
+            with tracer.start_as_current_span(fn_name, **span_kw) as span:
                 if not _setup_span(span, input_data):
                     return executor() # span is not recording, so just execute the function and return the result
                 try:
@@ -706,7 +709,8 @@ def WithTracing(
             # Get tracer after initialization (lazy)
             tracer = get_aiqa_tracer()
-            with tracer.start_as_current_span(fn_name) as span:
+            span_kw = {"context": otel_context.Context()} if root else {}
+            with tracer.start_as_current_span(fn_name, **span_kw) as span:
                 if not _setup_span(span, input_data):
                     return await executor()
@@ -732,7 +736,8 @@ def WithTracing(
             # Get tracer after initialization (lazy)
             tracer = get_aiqa_tracer()
             # Create span but don't use 'with' - span will be closed by TracedGenerator
-            span = tracer.start_span(fn_name)
+            span_kw = {"context": otel_context.Context()} if root else {}
+            span = tracer.start_span(fn_name, **span_kw)
             token = trace.context_api.attach(trace.context_api.set_span_in_context(span))
             try:
@@ -762,7 +767,8 @@ def WithTracing(
             # Get tracer after initialization (lazy)
             tracer = get_aiqa_tracer()
             # Create span but don't use 'with' - span will be closed by TracedAsyncGenerator
-            span = tracer.start_span(fn_name)
+            span_kw = {"context": otel_context.Context()} if root else {}
+            span = tracer.start_span(fn_name, **span_kw)
             token = trace.context_api.attach(trace.context_api.set_span_in_context(span))
             try:

{aiqa_client-0.7.0.dist-info → aiqa_client-0.7.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aiqa-client
-Version: 0.7.0
+Version: 0.7.2
 Summary: OpenTelemetry-based Python client for tracing functions and sending traces to the AIQA server
 Author-email: AIQA <info@aiqa.dev>
 License: MIT

{aiqa_client-0.7.0.dist-info → aiqa_client-0.7.2.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
 aiqa/__init__.py,sha256=JLQjgQgsyGQ1mRl4kcYygJq9i_91jN4WDem3dF1eMGA,1888
 aiqa/client.py,sha256=zS9OQQhdvVeIoBz0o8qrz-rjXngEbS9Lrli2ZWNIsrM,15993
-aiqa/constants.py,sha256=Z1Z8wzO5_tNGCAvhKtxdCB1zCjQFvL8tctwLkL1og3w,226
-aiqa/experiment_runner.py,sha256=c_XxeM3W3EfsK4WoC6BCh6wr0Ph1M-W6Bhi9wkmFSKo,17313
+aiqa/constants.py,sha256=if54R1OD111iPvB53mw0U9NRrBV-zvvm1gOAVxRj-vE,226
+aiqa/experiment_runner.py,sha256=YpUOoBS_3DvT_ipofWe8MnrSjmWz4Bmfe8yaErdprBA,17730
 aiqa/http_utils.py,sha256=OIB4tRI2TiDl4VKDmtbLWg9Q7TicMBeL7scLYEhVPXI,4944
 aiqa/llm_as_judge.py,sha256=ESmqQfaYpypCNfsODkdn5s85n_nzJ4WKbhUMVTb2djE,10087
 aiqa/object_serialiser.py,sha256=mzd2U_mFcAPalN2m9wxq35-BBeRJOhNK1k0-BmRSfQM,17055
 aiqa/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 aiqa/span_helpers.py,sha256=Ht4T_JJXK4HlqBY_Qwe8QDk9XwWCjagx_DkOUVY-PmY,18189
-aiqa/tracing.py,sha256=yOQoeiJi1VeITnrJL2giz1gMN8xKt-ZR35HeY40pj5U,35281
+aiqa/tracing.py,sha256=XKYUwZUIkybxIkOKMj5xwVV2IwX6QTvRIzsZOv8jUOc,35771
 aiqa/tracing_llm_utils.py,sha256=zQSxzkEhPmgel1P2kFueNWTr846re-qHEFxD-_EHhNQ,10241
 aiqa/types.py,sha256=Rv27oC1R0P1soJz5wsdwkVW-jfHQEVi4vUhwRJid270,2529
-aiqa_client-0.7.0.dist-info/licenses/LICENSE.txt,sha256=kIzkzLuzG0HHaWYm4F4W5FeJ1Yxut3Ec6bhLWyw798A,1062
-aiqa_client-0.7.0.dist-info/METADATA,sha256=diEKUaiP3xtihOXOsFN6dPozS9EjzqY_NoR5XrijaUY,7705
-aiqa_client-0.7.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
-aiqa_client-0.7.0.dist-info/top_level.txt,sha256=nwcsuVVSuWu27iLxZd4n1evVzv1W6FVTrSnCXCc-NQs,5
-aiqa_client-0.7.0.dist-info/RECORD,,
+aiqa_client-0.7.2.dist-info/licenses/LICENSE.txt,sha256=kIzkzLuzG0HHaWYm4F4W5FeJ1Yxut3Ec6bhLWyw798A,1062
+aiqa_client-0.7.2.dist-info/METADATA,sha256=Q4Wwu_FqNSB7IRdydBcRFDcL2bHNLyStT6DYkc_aS8E,7705
+aiqa_client-0.7.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+aiqa_client-0.7.2.dist-info/top_level.txt,sha256=nwcsuVVSuWu27iLxZd4n1evVzv1W6FVTrSnCXCc-NQs,5
+aiqa_client-0.7.2.dist-info/RECORD,,

{aiqa_client-0.7.0.dist-info → aiqa_client-0.7.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{aiqa_client-0.7.0.dist-info → aiqa_client-0.7.2.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

{aiqa_client-0.7.0.dist-info → aiqa_client-0.7.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

aiqa-client 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl

aiqa-client 0.7.0py3-none-any.whl → 0.7.2py3-none-any.whl