aiqa-client 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aiqa/constants.py CHANGED
@@ -3,6 +3,6 @@ Constants used across the AIQA client package.
3
3
  """
4
4
 
5
5
  AIQA_TRACER_NAME = "aiqa-tracer"
6
- VERSION = "0.7.0" # automatically updated by set-version-json.sh
6
+ VERSION = "0.7.2" # automatically updated by set-version-json.sh
7
7
 
8
8
  LOG_TAG = "AIQA" # Used in all logging output to identify AIQA messages
aiqa/experiment_runner.py CHANGED
@@ -5,11 +5,16 @@ ExperimentRunner - runs experiments on datasets and scores results
5
5
  import os
6
6
  import time
7
7
  import asyncio
8
+ from opentelemetry import context as otel_context
9
+ from opentelemetry.trace import Status, StatusCode, set_span_in_context
8
10
  from .constants import LOG_TAG
9
11
  from .http_utils import build_headers, get_server_url, get_api_key, format_http_error
10
12
  from typing import Any, Dict, List, Optional, Callable, Awaitable, Union
11
13
  from .tracing import WithTracing
12
- from .span_helpers import set_span_attribute, flush_tracing
14
+ from .span_helpers import set_span_attribute, flush_tracing, get_active_trace_id
15
+ from .client import get_aiqa_client, get_aiqa_tracer, get_component_tag
16
+ from .object_serialiser import serialize_for_span
17
+ from .tracing_llm_utils import _extract_and_set_token_usage, _extract_and_set_provider_and_model
13
18
  from .llm_as_judge import score_llm_metric_local, get_model_from_server, call_llm_fallback
14
19
  import requests
15
20
  from .types import MetricResult, ScoreThisInputOutputMetricType, Example, Result, Metric, CallLLMType
@@ -25,31 +30,9 @@ CallMyCodeType = Callable[[Any, Dict[str, Any]], Union[Any, Awaitable[Any]]]
25
30
  ScoreThisOutputType = Callable[[Any, Any, Dict[str, Any], Dict[str, Any]], Awaitable[Dict[str, Any]]]
26
31
 
27
32
 
28
-
29
- def _filter_input_for_run(input_data: Any) -> Dict[str, Any]:
30
- """Tracing:Filter input - drop most, keep just ids"""
31
- if not isinstance(input_data, dict):
32
- return {}
33
- self_obj = input_data.get("self")
34
- if not self_obj:
35
- return {}
36
- return {
37
- "dataset": getattr(self_obj, "dataset_id", None),
38
- "experiment": getattr(self_obj, "experiment_id", None),
39
- }
40
-
41
-
42
- def _filter_input_for_run_example(
43
- self: "ExperimentRunner",
44
- example: Dict[str, Any],
45
- call_my_code: Any = None,
46
- score_this_output: Any = None,
47
- ) -> Dict[str, Any]:
48
- """Filter input for run_example method to extract dataset, experiment, and example IDs."""
49
- result = _filter_input_for_run({"self": self})
50
- if isinstance(example, dict):
51
- result["example"] = example.get("id")
52
- return result
33
+ def _metric_score_key(metric: Dict[str, Any]) -> str:
34
+ """Key for scores in API: server expects metric name (fallback to id)."""
35
+ return (metric.get("name") or metric.get("id")) or ""
53
36
 
54
37
 
55
38
  class ExperimentRunner:
@@ -144,7 +127,7 @@ class ExperimentRunner:
144
127
  List of example objects
145
128
  """
146
129
  params = {
147
- "dataset_id": self.dataset_id,
130
+ "dataset": self.dataset_id,
148
131
  "limit": str(limit),
149
132
  }
150
133
  if self.organisation:
@@ -216,6 +199,7 @@ class ExperimentRunner:
216
199
  example: Example,
217
200
  output: Any,
218
201
  result: Result,
202
+ trace_id: Optional[str] = None,
219
203
  ) -> Result:
220
204
  """
221
205
  Ask the server to score an example result. Stores the score for later summary calculation.
@@ -235,21 +219,20 @@ class ExperimentRunner:
235
219
  if not example_id:
236
220
  raise ValueError("Example must have an 'id' field")
237
221
  if result is None:
238
- result = Result(example=example_id, scores={}, messages={}, errors={})
222
+ result = {"example": example_id, "scores": {}, "messages": {}, "errors": {}}
239
223
  scores = result.get("scores") or {}
240
-
241
-
242
224
 
243
225
  print(f"Scoring and storing example: {example_id}")
244
226
  print(f"Scores: {scores}")
245
227
 
246
228
  # Run synchronous requests.post in a thread pool to avoid blocking
229
+ # Server expects output = raw output to score, not the result dict; scores keyed by metric name
247
230
  def _do_request():
248
231
  return requests.post(
249
232
  f"{self.server_url}/experiment/{self.experiment_id}/example/{example_id}/scoreAndStore",
250
233
  json={
251
- "output": result,
252
- "traceId": example.get("trace"), # Server returns 'trace' (lowercase), but API expects 'traceId' (camelCase)
234
+ "output": output,
235
+ "trace": trace_id,
253
236
  "scores": scores,
254
237
  },
255
238
  headers=self._get_headers(),
@@ -264,7 +247,6 @@ class ExperimentRunner:
264
247
  print(f"scoreAndStore response: {json_result}")
265
248
  return json_result
266
249
 
267
- @WithTracing(filter_input=_filter_input_for_run)
268
250
  async def run(
269
251
  self,
270
252
  call_my_code: CallMyCodeType,
@@ -279,17 +261,9 @@ class ExperimentRunner:
279
261
  """
280
262
  examples = self.get_examples_for_dataset()
281
263
 
282
- # Wrap engine to match run_example signature (input, parameters)
283
- async def wrapped_engine(input_data, parameters):
284
- result = call_my_code(input_data, parameters)
285
- # Handle async functions
286
- if hasattr(result, "__await__"):
287
- result = await result
288
- return result
289
-
290
264
  for example in examples:
291
265
  try:
292
- scores = await self.run_example(example, wrapped_engine, scorer_for_metric_id)
266
+ scores = await self.run_example(example, call_my_code, scorer_for_metric_id)
293
267
  if scores:
294
268
  self.scores.append(
295
269
  {
@@ -302,7 +276,6 @@ class ExperimentRunner:
302
276
  print(f"Error processing example {example.get('id', 'unknown')}: {e}")
303
277
  # Continue with next example instead of failing entire run
304
278
 
305
- @WithTracing(filter_input=_filter_input_for_run_example)
306
279
  async def run_example(
307
280
  self,
308
281
  example: Example,
@@ -312,6 +285,9 @@ class ExperimentRunner:
312
285
  """
313
286
  Run the engine on an example with the experiment's parameters, score the result, and store it.
314
287
 
288
+ Spans: one root "RunExample" span (input, call_my_code, output) and one child "ScoreExample"
289
+ span for scoring, so the server sees a clear call_my_code vs scoring split (aligned with client-go).
290
+
315
291
  Args:
316
292
  example: The example to run. See Example.ts type
317
293
  call_my_code: Function that takes input and parameters, returns output (can be async)
@@ -335,7 +311,6 @@ class ExperimentRunner:
335
311
  example_id = example.get("id")
336
312
  if not example_id:
337
313
  raise ValueError("Example must have an 'id' field")
338
- set_span_attribute("example", example_id)
339
314
 
340
315
  print(f"Running with parameters: {parameters_here}")
341
316
  original_env_vars: Dict[str, Optional[str]] = {}
@@ -345,7 +320,25 @@ class ExperimentRunner:
345
320
  os.environ[key] = str(value)
346
321
  try:
347
322
  start = time.time() * 1000
348
- output = call_my_code(input_data, parameters_here)
323
+
324
+ run_trace_id_ref: List[Optional[str]] = [None]
325
+
326
+ # Wrap engine to match run_example signature (input, parameters)
327
+ # Root span so server can find it by parent:unset; trace ID is sent to scoreAndStore
328
+ def set_trace_id(tid: Optional[str]) -> None:
329
+ run_trace_id_ref[0] = tid
330
+
331
+ @WithTracing(root=True)
332
+ async def wrapped_engine(input_data, parameters, set_trace_id: Callable[[Optional[str]], None]):
333
+ trace_id_here = get_active_trace_id()
334
+ set_trace_id(trace_id_here)
335
+ result = call_my_code(input_data, parameters)
336
+ # Handle async functions
337
+ if hasattr(result, "__await__"):
338
+ result = await result
339
+ return result
340
+
341
+ output = wrapped_engine(input_data, parameters_here, set_trace_id)
349
342
  if hasattr(output, "__await__"):
350
343
  output = await output
351
344
  duration = int((time.time() * 1000) - start)
@@ -354,10 +347,11 @@ class ExperimentRunner:
354
347
  dataset_metrics = self.get_dataset().get("metrics", [])
355
348
  specific_metrics = example.get("metrics", [])
356
349
  metrics = [*dataset_metrics, *specific_metrics]
357
- result = Result(example=example_id, scores={}, messages={}, errors={})
350
+ result: Result = {"example": example_id, "scores": {}, "messages": {}, "errors": {}}
358
351
  for metric in metrics:
359
352
  metric_id = metric.get("id")
360
- if not metric_id:
353
+ score_key = _metric_score_key(metric)
354
+ if not metric_id or not score_key:
361
355
  continue
362
356
  scorer = scorer_for_metric_id.get(metric_id) if scorer_for_metric_id else None
363
357
  if scorer:
@@ -367,15 +361,15 @@ class ExperimentRunner:
367
361
  else:
368
362
  continue
369
363
  if not metric_result:
370
- result["errors"][metric_id] = "Scoring function returned None"
364
+ result["errors"][score_key] = "Scoring function returned None"
371
365
  continue
372
- result["scores"][metric_id] = metric_result.get("score")
373
- result["messages"][metric_id] = metric_result.get("message")
374
- result["errors"][metric_id] = metric_result.get("error")
366
+ result["scores"][score_key] = metric_result.get("score")
367
+ result["messages"][score_key] = metric_result.get("message")
368
+ result["errors"][score_key] = metric_result.get("error")
375
369
  result["scores"]["duration"] = duration
376
370
  await flush_tracing()
377
371
  print(f"Call scoreAndStore ... for example: {example_id} with scores: {result['scores']}")
378
- result = await self.score_and_store(example, output, result)
372
+ result = await self.score_and_store(example, output, result, trace_id=run_trace_id_ref[0])
379
373
  print(f"scoreAndStore returned: {result}")
380
374
  return [result]
381
375
  finally:
aiqa/tracing.py CHANGED
@@ -7,7 +7,7 @@ import inspect
7
7
  import fnmatch
8
8
  from typing import Any, Callable, Optional, List
9
9
  from functools import wraps
10
- from opentelemetry import trace
10
+ from opentelemetry import context as otel_context, trace
11
11
  from opentelemetry.trace import Status, StatusCode
12
12
 
13
13
  from .client import get_aiqa_client, get_component_tag, get_aiqa_tracer
@@ -563,6 +563,7 @@ def WithTracing(
563
563
  ignore_output: Optional[List[str]] = None,
564
564
  filter_input: Optional[Callable[[Any], Any]] = None,
565
565
  filter_output: Optional[Callable[[Any], Any]] = None,
566
+ root: bool = False,
566
567
  ):
567
568
  """
568
569
  Decorator to automatically create spans for function calls.
@@ -592,6 +593,7 @@ def WithTracing(
592
593
  Returns a dict or any value (will be converted to dict). Applied before ignore_input.
593
594
  filter_output: Function to filter/transform output before recording.
594
595
  Receives the output value and returns a filtered/transformed version.
596
+ root: Whether this is a root span. If True, the span will not be linked to any parent spans.
595
597
 
596
598
  Example:
597
599
  @WithTracing
@@ -685,7 +687,8 @@ def WithTracing(
685
687
  return executor()
686
688
  # Get tracer after initialization (lazy)
687
689
  tracer = get_aiqa_tracer()
688
- with tracer.start_as_current_span(fn_name) as span:
690
+ span_kw = {"context": otel_context.Context()} if root else {}
691
+ with tracer.start_as_current_span(fn_name, **span_kw) as span:
689
692
  if not _setup_span(span, input_data):
690
693
  return executor() # span is not recording, so just execute the function and return the result
691
694
  try:
@@ -706,7 +709,8 @@ def WithTracing(
706
709
 
707
710
  # Get tracer after initialization (lazy)
708
711
  tracer = get_aiqa_tracer()
709
- with tracer.start_as_current_span(fn_name) as span:
712
+ span_kw = {"context": otel_context.Context()} if root else {}
713
+ with tracer.start_as_current_span(fn_name, **span_kw) as span:
710
714
  if not _setup_span(span, input_data):
711
715
  return await executor()
712
716
 
@@ -732,7 +736,8 @@ def WithTracing(
732
736
  # Get tracer after initialization (lazy)
733
737
  tracer = get_aiqa_tracer()
734
738
  # Create span but don't use 'with' - span will be closed by TracedGenerator
735
- span = tracer.start_span(fn_name)
739
+ span_kw = {"context": otel_context.Context()} if root else {}
740
+ span = tracer.start_span(fn_name, **span_kw)
736
741
  token = trace.context_api.attach(trace.context_api.set_span_in_context(span))
737
742
 
738
743
  try:
@@ -762,7 +767,8 @@ def WithTracing(
762
767
  # Get tracer after initialization (lazy)
763
768
  tracer = get_aiqa_tracer()
764
769
  # Create span but don't use 'with' - span will be closed by TracedAsyncGenerator
765
- span = tracer.start_span(fn_name)
770
+ span_kw = {"context": otel_context.Context()} if root else {}
771
+ span = tracer.start_span(fn_name, **span_kw)
766
772
  token = trace.context_api.attach(trace.context_api.set_span_in_context(span))
767
773
 
768
774
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiqa-client
3
- Version: 0.7.0
3
+ Version: 0.7.2
4
4
  Summary: OpenTelemetry-based Python client for tracing functions and sending traces to the AIQA server
5
5
  Author-email: AIQA <info@aiqa.dev>
6
6
  License: MIT
@@ -1,17 +1,17 @@
1
1
  aiqa/__init__.py,sha256=JLQjgQgsyGQ1mRl4kcYygJq9i_91jN4WDem3dF1eMGA,1888
2
2
  aiqa/client.py,sha256=zS9OQQhdvVeIoBz0o8qrz-rjXngEbS9Lrli2ZWNIsrM,15993
3
- aiqa/constants.py,sha256=Z1Z8wzO5_tNGCAvhKtxdCB1zCjQFvL8tctwLkL1og3w,226
4
- aiqa/experiment_runner.py,sha256=c_XxeM3W3EfsK4WoC6BCh6wr0Ph1M-W6Bhi9wkmFSKo,17313
3
+ aiqa/constants.py,sha256=if54R1OD111iPvB53mw0U9NRrBV-zvvm1gOAVxRj-vE,226
4
+ aiqa/experiment_runner.py,sha256=YpUOoBS_3DvT_ipofWe8MnrSjmWz4Bmfe8yaErdprBA,17730
5
5
  aiqa/http_utils.py,sha256=OIB4tRI2TiDl4VKDmtbLWg9Q7TicMBeL7scLYEhVPXI,4944
6
6
  aiqa/llm_as_judge.py,sha256=ESmqQfaYpypCNfsODkdn5s85n_nzJ4WKbhUMVTb2djE,10087
7
7
  aiqa/object_serialiser.py,sha256=mzd2U_mFcAPalN2m9wxq35-BBeRJOhNK1k0-BmRSfQM,17055
8
8
  aiqa/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  aiqa/span_helpers.py,sha256=Ht4T_JJXK4HlqBY_Qwe8QDk9XwWCjagx_DkOUVY-PmY,18189
10
- aiqa/tracing.py,sha256=yOQoeiJi1VeITnrJL2giz1gMN8xKt-ZR35HeY40pj5U,35281
10
+ aiqa/tracing.py,sha256=XKYUwZUIkybxIkOKMj5xwVV2IwX6QTvRIzsZOv8jUOc,35771
11
11
  aiqa/tracing_llm_utils.py,sha256=zQSxzkEhPmgel1P2kFueNWTr846re-qHEFxD-_EHhNQ,10241
12
12
  aiqa/types.py,sha256=Rv27oC1R0P1soJz5wsdwkVW-jfHQEVi4vUhwRJid270,2529
13
- aiqa_client-0.7.0.dist-info/licenses/LICENSE.txt,sha256=kIzkzLuzG0HHaWYm4F4W5FeJ1Yxut3Ec6bhLWyw798A,1062
14
- aiqa_client-0.7.0.dist-info/METADATA,sha256=diEKUaiP3xtihOXOsFN6dPozS9EjzqY_NoR5XrijaUY,7705
15
- aiqa_client-0.7.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
16
- aiqa_client-0.7.0.dist-info/top_level.txt,sha256=nwcsuVVSuWu27iLxZd4n1evVzv1W6FVTrSnCXCc-NQs,5
17
- aiqa_client-0.7.0.dist-info/RECORD,,
13
+ aiqa_client-0.7.2.dist-info/licenses/LICENSE.txt,sha256=kIzkzLuzG0HHaWYm4F4W5FeJ1Yxut3Ec6bhLWyw798A,1062
14
+ aiqa_client-0.7.2.dist-info/METADATA,sha256=Q4Wwu_FqNSB7IRdydBcRFDcL2bHNLyStT6DYkc_aS8E,7705
15
+ aiqa_client-0.7.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
16
+ aiqa_client-0.7.2.dist-info/top_level.txt,sha256=nwcsuVVSuWu27iLxZd4n1evVzv1W6FVTrSnCXCc-NQs,5
17
+ aiqa_client-0.7.2.dist-info/RECORD,,