aiqa-client 0.6.1__py3-none-any.whl → 0.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiqa/client.py +74 -4
- aiqa/constants.py +1 -1
- aiqa/experiment_runner.py +108 -149
- aiqa/llm_as_judge.py +3 -2
- aiqa/object_serialiser.py +5 -2
- aiqa/tracing.py +124 -39
- aiqa/types.py +1 -1
- {aiqa_client-0.6.1.dist-info → aiqa_client-0.7.2.dist-info}/METADATA +1 -1
- aiqa_client-0.7.2.dist-info/RECORD +17 -0
- {aiqa_client-0.6.1.dist-info → aiqa_client-0.7.2.dist-info}/WHEEL +1 -1
- aiqa_client-0.6.1.dist-info/RECORD +0 -17
- {aiqa_client-0.6.1.dist-info → aiqa_client-0.7.2.dist-info}/licenses/LICENSE.txt +0 -0
- {aiqa_client-0.6.1.dist-info → aiqa_client-0.7.2.dist-info}/top_level.txt +0 -0
aiqa/client.py
CHANGED
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
import os
|
|
3
3
|
import logging
|
|
4
4
|
from functools import lru_cache
|
|
5
|
-
from typing import Optional, TYPE_CHECKING, Any, Dict
|
|
5
|
+
from typing import Optional, TYPE_CHECKING, Any, Dict, List
|
|
6
6
|
from opentelemetry import trace
|
|
7
7
|
from opentelemetry.sdk.trace import TracerProvider
|
|
8
|
-
from opentelemetry.sdk.trace.export import BatchSpanProcessor, SpanExporter, SpanExportResult
|
|
8
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor, SpanExporter, SpanExportResult
|
|
9
9
|
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
10
10
|
from opentelemetry.sdk.trace import ReadableSpan
|
|
11
11
|
from opentelemetry.trace import SpanContext
|
|
@@ -52,6 +52,8 @@ class AIQAClient:
|
|
|
52
52
|
cls._instance._exporter = None # reduce circular import issues by not importing for typecheck here
|
|
53
53
|
cls._instance._enabled: bool = True
|
|
54
54
|
cls._instance._initialized: bool = False
|
|
55
|
+
cls._instance._default_ignore_patterns: List[str] = ["_*"] # Default: filter properties starting with '_'
|
|
56
|
+
cls._instance._ignore_recursive: bool = True # Default: recursive filtering enabled
|
|
55
57
|
return cls._instance
|
|
56
58
|
|
|
57
59
|
@property
|
|
@@ -90,6 +92,76 @@ class AIQAClient:
|
|
|
90
92
|
logger.info(f"AIQA tracing {'enabled' if value else 'disabled'}")
|
|
91
93
|
self._enabled = value
|
|
92
94
|
|
|
95
|
+
@property
|
|
96
|
+
def default_ignore_patterns(self) -> List[str]:
|
|
97
|
+
"""
|
|
98
|
+
Get the default ignore patterns applied to all traced inputs and outputs.
|
|
99
|
+
|
|
100
|
+
Default: ["_*"] (filters properties starting with '_')
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
List of ignore patterns (supports wildcards like "_*")
|
|
104
|
+
"""
|
|
105
|
+
return self._default_ignore_patterns.copy()
|
|
106
|
+
|
|
107
|
+
@default_ignore_patterns.setter
|
|
108
|
+
def default_ignore_patterns(self, value: Optional[List[str]]) -> None:
|
|
109
|
+
"""
|
|
110
|
+
Set the default ignore patterns applied to all traced inputs and outputs.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
value: List of patterns to ignore (e.g., ["_*", "password"]).
|
|
114
|
+
Set to None or [] to disable default ignore patterns.
|
|
115
|
+
Supports wildcards (e.g., "_*" matches "_apple", "_fruit").
|
|
116
|
+
|
|
117
|
+
Example:
|
|
118
|
+
from aiqa import get_aiqa_client
|
|
119
|
+
|
|
120
|
+
client = get_aiqa_client()
|
|
121
|
+
# Add password to default ignore patterns
|
|
122
|
+
client.default_ignore_patterns = ["_*", "password", "api_key"]
|
|
123
|
+
# Disable default ignore patterns
|
|
124
|
+
client.default_ignore_patterns = []
|
|
125
|
+
"""
|
|
126
|
+
if value is None:
|
|
127
|
+
self._default_ignore_patterns = []
|
|
128
|
+
else:
|
|
129
|
+
self._default_ignore_patterns = list(value)
|
|
130
|
+
logger.info(f"Default ignore patterns set to: {self._default_ignore_patterns}")
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def ignore_recursive(self) -> bool:
|
|
134
|
+
"""
|
|
135
|
+
Get whether ignore patterns are applied recursively to nested objects.
|
|
136
|
+
|
|
137
|
+
Default: True (recursive filtering enabled)
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
True if recursive filtering is enabled, False otherwise
|
|
141
|
+
"""
|
|
142
|
+
return self._ignore_recursive
|
|
143
|
+
|
|
144
|
+
@ignore_recursive.setter
|
|
145
|
+
def ignore_recursive(self, value: bool) -> None:
|
|
146
|
+
"""
|
|
147
|
+
Set whether ignore patterns are applied recursively to nested objects.
|
|
148
|
+
|
|
149
|
+
When True (default), ignore patterns are applied at all nesting levels.
|
|
150
|
+
When False, ignore patterns are only applied to top-level keys.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
value: True to enable recursive filtering, False to disable
|
|
154
|
+
|
|
155
|
+
Example:
|
|
156
|
+
from aiqa import get_aiqa_client
|
|
157
|
+
|
|
158
|
+
client = get_aiqa_client()
|
|
159
|
+
# Disable recursive filtering (only filter top-level keys)
|
|
160
|
+
client.ignore_recursive = False
|
|
161
|
+
"""
|
|
162
|
+
self._ignore_recursive = bool(value)
|
|
163
|
+
logger.info(f"Ignore recursive filtering {'enabled' if self._ignore_recursive else 'disabled'}")
|
|
164
|
+
|
|
93
165
|
def shutdown(self) -> None:
|
|
94
166
|
"""
|
|
95
167
|
Shutdown the tracer provider and exporter.
|
|
@@ -245,8 +317,6 @@ def _attach_aiqa_processor(provider: TracerProvider) -> None:
|
|
|
245
317
|
auth_headers = {}
|
|
246
318
|
if api_key:
|
|
247
319
|
auth_headers["Authorization"] = f"ApiKey {api_key}"
|
|
248
|
-
elif os.getenv("AIQA_API_KEY"):
|
|
249
|
-
auth_headers["Authorization"] = f"ApiKey {os.getenv('AIQA_API_KEY')}"
|
|
250
320
|
|
|
251
321
|
# OTLP HTTP exporter requires the full endpoint URL including /v1/traces
|
|
252
322
|
# Ensure server_url doesn't have trailing slash or /v1/traces, then append /v1/traces
|
aiqa/constants.py
CHANGED
|
@@ -3,6 +3,6 @@ Constants used across the AIQA client package.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
AIQA_TRACER_NAME = "aiqa-tracer"
|
|
6
|
-
VERSION = "0.
|
|
6
|
+
VERSION = "0.7.2" # automatically updated by set-version-json.sh
|
|
7
7
|
|
|
8
8
|
LOG_TAG = "AIQA" # Used in all logging output to identify AIQA messages
|
aiqa/experiment_runner.py
CHANGED
|
@@ -5,11 +5,16 @@ ExperimentRunner - runs experiments on datasets and scores results
|
|
|
5
5
|
import os
|
|
6
6
|
import time
|
|
7
7
|
import asyncio
|
|
8
|
+
from opentelemetry import context as otel_context
|
|
9
|
+
from opentelemetry.trace import Status, StatusCode, set_span_in_context
|
|
8
10
|
from .constants import LOG_TAG
|
|
9
11
|
from .http_utils import build_headers, get_server_url, get_api_key, format_http_error
|
|
10
12
|
from typing import Any, Dict, List, Optional, Callable, Awaitable, Union
|
|
11
13
|
from .tracing import WithTracing
|
|
12
|
-
from .span_helpers import set_span_attribute, flush_tracing
|
|
14
|
+
from .span_helpers import set_span_attribute, flush_tracing, get_active_trace_id
|
|
15
|
+
from .client import get_aiqa_client, get_aiqa_tracer, get_component_tag
|
|
16
|
+
from .object_serialiser import serialize_for_span
|
|
17
|
+
from .tracing_llm_utils import _extract_and_set_token_usage, _extract_and_set_provider_and_model
|
|
13
18
|
from .llm_as_judge import score_llm_metric_local, get_model_from_server, call_llm_fallback
|
|
14
19
|
import requests
|
|
15
20
|
from .types import MetricResult, ScoreThisInputOutputMetricType, Example, Result, Metric, CallLLMType
|
|
@@ -25,31 +30,9 @@ CallMyCodeType = Callable[[Any, Dict[str, Any]], Union[Any, Awaitable[Any]]]
|
|
|
25
30
|
ScoreThisOutputType = Callable[[Any, Any, Dict[str, Any], Dict[str, Any]], Awaitable[Dict[str, Any]]]
|
|
26
31
|
|
|
27
32
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
""
|
|
31
|
-
if not isinstance(input_data, dict):
|
|
32
|
-
return {}
|
|
33
|
-
self_obj = input_data.get("self")
|
|
34
|
-
if not self_obj:
|
|
35
|
-
return {}
|
|
36
|
-
return {
|
|
37
|
-
"dataset": getattr(self_obj, "dataset_id", None),
|
|
38
|
-
"experiment": getattr(self_obj, "experiment_id", None),
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def _filter_input_for_run_example(
|
|
43
|
-
self: "ExperimentRunner",
|
|
44
|
-
example: Dict[str, Any],
|
|
45
|
-
call_my_code: Any = None,
|
|
46
|
-
score_this_output: Any = None,
|
|
47
|
-
) -> Dict[str, Any]:
|
|
48
|
-
"""Filter input for run_example method to extract dataset, experiment, and example IDs."""
|
|
49
|
-
result = _filter_input_for_run({"self": self})
|
|
50
|
-
if isinstance(example, dict):
|
|
51
|
-
result["example"] = example.get("id")
|
|
52
|
-
return result
|
|
33
|
+
def _metric_score_key(metric: Dict[str, Any]) -> str:
|
|
34
|
+
"""Key for scores in API: server expects metric name (fallback to id)."""
|
|
35
|
+
return (metric.get("name") or metric.get("id")) or ""
|
|
53
36
|
|
|
54
37
|
|
|
55
38
|
class ExperimentRunner:
|
|
@@ -123,7 +106,17 @@ class ExperimentRunner:
|
|
|
123
106
|
|
|
124
107
|
return dataset
|
|
125
108
|
|
|
126
|
-
def
|
|
109
|
+
def get_example(self, example_id: str) -> Dict[str, Any]:
|
|
110
|
+
"""
|
|
111
|
+
Fetch an example by ID.
|
|
112
|
+
"""
|
|
113
|
+
response = requests.get(
|
|
114
|
+
f"{self.server_url}/example/{example_id}",
|
|
115
|
+
headers=self._get_headers(),
|
|
116
|
+
)
|
|
117
|
+
return response.json()
|
|
118
|
+
|
|
119
|
+
def get_examples_for_dataset(self, limit: int = 10000) -> List[Dict[str, Any]]:
|
|
127
120
|
"""
|
|
128
121
|
Fetch example inputs from the dataset.
|
|
129
122
|
|
|
@@ -134,7 +127,7 @@ class ExperimentRunner:
|
|
|
134
127
|
List of example objects
|
|
135
128
|
"""
|
|
136
129
|
params = {
|
|
137
|
-
"
|
|
130
|
+
"dataset": self.dataset_id,
|
|
138
131
|
"limit": str(limit),
|
|
139
132
|
}
|
|
140
133
|
if self.organisation:
|
|
@@ -162,7 +155,6 @@ class ExperimentRunner:
|
|
|
162
155
|
experiment_setup: Optional setup for the experiment object. You may wish to set:
|
|
163
156
|
- name (recommended for labelling the experiment)
|
|
164
157
|
- parameters
|
|
165
|
-
- comparison_parameters
|
|
166
158
|
|
|
167
159
|
Returns:
|
|
168
160
|
The created experiment object
|
|
@@ -184,7 +176,7 @@ class ExperimentRunner:
|
|
|
184
176
|
"organisation": self.organisation,
|
|
185
177
|
"dataset": self.dataset_id,
|
|
186
178
|
"results": [],
|
|
187
|
-
"
|
|
179
|
+
"summaries": {},
|
|
188
180
|
}
|
|
189
181
|
|
|
190
182
|
print(f"Creating experiment")
|
|
@@ -207,6 +199,7 @@ class ExperimentRunner:
|
|
|
207
199
|
example: Example,
|
|
208
200
|
output: Any,
|
|
209
201
|
result: Result,
|
|
202
|
+
trace_id: Optional[str] = None,
|
|
210
203
|
) -> Result:
|
|
211
204
|
"""
|
|
212
205
|
Ask the server to score an example result. Stores the score for later summary calculation.
|
|
@@ -226,24 +219,20 @@ class ExperimentRunner:
|
|
|
226
219
|
if not example_id:
|
|
227
220
|
raise ValueError("Example must have an 'id' field")
|
|
228
221
|
if result is None:
|
|
229
|
-
|
|
230
|
-
if not example_id:
|
|
231
|
-
raise ValueError("Example must have an 'id' field")
|
|
232
|
-
result = Result(exampleId=example_id, scores={}, messages={}, errors={})
|
|
222
|
+
result = {"example": example_id, "scores": {}, "messages": {}, "errors": {}}
|
|
233
223
|
scores = result.get("scores") or {}
|
|
234
|
-
|
|
235
|
-
|
|
236
224
|
|
|
237
225
|
print(f"Scoring and storing example: {example_id}")
|
|
238
226
|
print(f"Scores: {scores}")
|
|
239
227
|
|
|
240
228
|
# Run synchronous requests.post in a thread pool to avoid blocking
|
|
229
|
+
# Server expects output = raw output to score, not the result dict; scores keyed by metric name
|
|
241
230
|
def _do_request():
|
|
242
231
|
return requests.post(
|
|
243
232
|
f"{self.server_url}/experiment/{self.experiment_id}/example/{example_id}/scoreAndStore",
|
|
244
233
|
json={
|
|
245
|
-
"output":
|
|
246
|
-
"
|
|
234
|
+
"output": output,
|
|
235
|
+
"trace": trace_id,
|
|
247
236
|
"scores": scores,
|
|
248
237
|
},
|
|
249
238
|
headers=self._get_headers(),
|
|
@@ -258,7 +247,6 @@ class ExperimentRunner:
|
|
|
258
247
|
print(f"scoreAndStore response: {json_result}")
|
|
259
248
|
return json_result
|
|
260
249
|
|
|
261
|
-
@WithTracing(filter_input=_filter_input_for_run)
|
|
262
250
|
async def run(
|
|
263
251
|
self,
|
|
264
252
|
call_my_code: CallMyCodeType,
|
|
@@ -271,19 +259,11 @@ class ExperimentRunner:
|
|
|
271
259
|
engine: Function that takes input, returns output (can be async)
|
|
272
260
|
scorer: Optional function that scores the output given the example
|
|
273
261
|
"""
|
|
274
|
-
examples = self.
|
|
275
|
-
|
|
276
|
-
# Wrap engine to match run_example signature (input, parameters)
|
|
277
|
-
async def wrapped_engine(input_data, parameters):
|
|
278
|
-
result = call_my_code(input_data, parameters)
|
|
279
|
-
# Handle async functions
|
|
280
|
-
if hasattr(result, "__await__"):
|
|
281
|
-
result = await result
|
|
282
|
-
return result
|
|
262
|
+
examples = self.get_examples_for_dataset()
|
|
283
263
|
|
|
284
264
|
for example in examples:
|
|
285
265
|
try:
|
|
286
|
-
scores = await self.run_example(example,
|
|
266
|
+
scores = await self.run_example(example, call_my_code, scorer_for_metric_id)
|
|
287
267
|
if scores:
|
|
288
268
|
self.scores.append(
|
|
289
269
|
{
|
|
@@ -296,7 +276,6 @@ class ExperimentRunner:
|
|
|
296
276
|
print(f"Error processing example {example.get('id', 'unknown')}: {e}")
|
|
297
277
|
# Continue with next example instead of failing entire run
|
|
298
278
|
|
|
299
|
-
@WithTracing(filter_input=_filter_input_for_run_example)
|
|
300
279
|
async def run_example(
|
|
301
280
|
self,
|
|
302
281
|
example: Example,
|
|
@@ -304,8 +283,10 @@ class ExperimentRunner:
|
|
|
304
283
|
scorer_for_metric_id: Optional[Dict[str, ScoreThisInputOutputMetricType]] = None,
|
|
305
284
|
) -> List[Result]:
|
|
306
285
|
"""
|
|
307
|
-
Run the engine on an example with the
|
|
308
|
-
|
|
286
|
+
Run the engine on an example with the experiment's parameters, score the result, and store it.
|
|
287
|
+
|
|
288
|
+
Spans: one root "RunExample" span (input, call_my_code, output) and one child "ScoreExample"
|
|
289
|
+
span for scoring, so the server sees a clear call_my_code vs scoring split (aligned with client-go).
|
|
309
290
|
|
|
310
291
|
Args:
|
|
311
292
|
example: The example to run. See Example.ts type
|
|
@@ -313,117 +294,94 @@ class ExperimentRunner:
|
|
|
313
294
|
scorer_for_metric_id: Optional dictionary of metric IDs to functions that score the output given the example and parameters
|
|
314
295
|
|
|
315
296
|
Returns:
|
|
316
|
-
|
|
317
|
-
returns an array of one.
|
|
297
|
+
List of one result (for API compatibility).
|
|
318
298
|
"""
|
|
319
|
-
# Ensure experiment exists
|
|
320
299
|
if not self.experiment:
|
|
321
300
|
self.create_experiment()
|
|
322
301
|
if not self.experiment:
|
|
323
302
|
raise Exception("Failed to create experiment")
|
|
324
303
|
|
|
325
|
-
|
|
326
|
-
parameters_fixed = self.experiment.get("parameters") or {}
|
|
327
|
-
# If comparison_parameters is empty/undefined, default to [{}] so we run at least once
|
|
328
|
-
parameters_loop = self.experiment.get("comparison_parameters") or [{}]
|
|
329
|
-
|
|
330
|
-
# Handle both spans array and input field
|
|
304
|
+
parameters_here = self.experiment.get("parameters") or {}
|
|
331
305
|
input_data = example.get("input")
|
|
332
306
|
if not input_data and example.get("spans") and len(example["spans"]) > 0:
|
|
333
307
|
input_data = example["spans"][0].get("attributes", {}).get("input")
|
|
334
|
-
|
|
335
308
|
if not input_data:
|
|
336
|
-
print(f"Warning: Example has no input field or spans with input attribute: {example}"
|
|
337
|
-
)
|
|
338
|
-
# Run engine anyway -- this could make sense if it's all about the parameters
|
|
309
|
+
print(f"Warning: Example has no input field or spans with input attribute: {example}")
|
|
339
310
|
|
|
340
|
-
# Set example.id on the root span (created by @WithTracing decorator)
|
|
341
|
-
# This ensures the root span from the trace has example=Example.id set
|
|
342
311
|
example_id = example.get("id")
|
|
343
312
|
if not example_id:
|
|
344
313
|
raise ValueError("Example must have an 'id' field")
|
|
345
|
-
set_span_attribute("example", example_id)
|
|
346
|
-
|
|
347
|
-
all_scores: List[Dict[str, Any]] = []
|
|
348
|
-
dataset_metrics = self.get_dataset().get("metrics", [])
|
|
349
|
-
specific_metrics = example.get("metrics", [])
|
|
350
|
-
metrics = [*dataset_metrics, *specific_metrics]
|
|
351
|
-
# This loop should not be parallelized - it should run sequentially, one after the other
|
|
352
|
-
# to avoid creating interference between the runs.
|
|
353
|
-
for parameters in parameters_loop:
|
|
354
|
-
parameters_here = {**parameters_fixed, **parameters}
|
|
355
|
-
print(f"Running with parameters: {parameters_here}")
|
|
356
|
-
|
|
357
|
-
# Save original env var values for cleanup
|
|
358
|
-
original_env_vars: Dict[str, Optional[str]] = {}
|
|
359
|
-
# Set env vars from parameters_here
|
|
360
|
-
for key, value in parameters_here.items():
|
|
361
|
-
if value:
|
|
362
|
-
original_env_vars[key] = os.environ.get(key)
|
|
363
|
-
os.environ[key] = str(value)
|
|
364
314
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
315
|
+
print(f"Running with parameters: {parameters_here}")
|
|
316
|
+
original_env_vars: Dict[str, Optional[str]] = {}
|
|
317
|
+
for key, value in parameters_here.items():
|
|
318
|
+
if value:
|
|
319
|
+
original_env_vars[key] = os.environ.get(key)
|
|
320
|
+
os.environ[key] = str(value)
|
|
321
|
+
try:
|
|
322
|
+
start = time.time() * 1000
|
|
323
|
+
|
|
324
|
+
run_trace_id_ref: List[Optional[str]] = [None]
|
|
325
|
+
|
|
326
|
+
# Wrap engine to match run_example signature (input, parameters)
|
|
327
|
+
# Root span so server can find it by parent:unset; trace ID is sent to scoreAndStore
|
|
328
|
+
def set_trace_id(tid: Optional[str]) -> None:
|
|
329
|
+
run_trace_id_ref[0] = tid
|
|
330
|
+
|
|
331
|
+
@WithTracing(root=True)
|
|
332
|
+
async def wrapped_engine(input_data, parameters, set_trace_id: Callable[[Optional[str]], None]):
|
|
333
|
+
trace_id_here = get_active_trace_id()
|
|
334
|
+
set_trace_id(trace_id_here)
|
|
335
|
+
result = call_my_code(input_data, parameters)
|
|
368
336
|
# Handle async functions
|
|
369
|
-
if hasattr(
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
if original_value is None:
|
|
416
|
-
# Variable didn't exist before, remove it
|
|
417
|
-
os.environ.pop(key, None)
|
|
418
|
-
else:
|
|
419
|
-
# Restore original value
|
|
420
|
-
os.environ[key] = original_value
|
|
421
|
-
|
|
422
|
-
return all_scores
|
|
423
|
-
|
|
424
|
-
def get_summary_results(self) -> Dict[str, Any]:
|
|
337
|
+
if hasattr(result, "__await__"):
|
|
338
|
+
result = await result
|
|
339
|
+
return result
|
|
340
|
+
|
|
341
|
+
output = wrapped_engine(input_data, parameters_here, set_trace_id)
|
|
342
|
+
if hasattr(output, "__await__"):
|
|
343
|
+
output = await output
|
|
344
|
+
duration = int((time.time() * 1000) - start)
|
|
345
|
+
print(f"Output: {output}")
|
|
346
|
+
|
|
347
|
+
dataset_metrics = self.get_dataset().get("metrics", [])
|
|
348
|
+
specific_metrics = example.get("metrics", [])
|
|
349
|
+
metrics = [*dataset_metrics, *specific_metrics]
|
|
350
|
+
result: Result = {"example": example_id, "scores": {}, "messages": {}, "errors": {}}
|
|
351
|
+
for metric in metrics:
|
|
352
|
+
metric_id = metric.get("id")
|
|
353
|
+
score_key = _metric_score_key(metric)
|
|
354
|
+
if not metric_id or not score_key:
|
|
355
|
+
continue
|
|
356
|
+
scorer = scorer_for_metric_id.get(metric_id) if scorer_for_metric_id else None
|
|
357
|
+
if scorer:
|
|
358
|
+
metric_result = await scorer(input_data, output, metric)
|
|
359
|
+
elif metric.get("type") == "llm":
|
|
360
|
+
metric_result = await self._score_llm_metric(input_data, output, example, metric)
|
|
361
|
+
else:
|
|
362
|
+
continue
|
|
363
|
+
if not metric_result:
|
|
364
|
+
result["errors"][score_key] = "Scoring function returned None"
|
|
365
|
+
continue
|
|
366
|
+
result["scores"][score_key] = metric_result.get("score")
|
|
367
|
+
result["messages"][score_key] = metric_result.get("message")
|
|
368
|
+
result["errors"][score_key] = metric_result.get("error")
|
|
369
|
+
result["scores"]["duration"] = duration
|
|
370
|
+
await flush_tracing()
|
|
371
|
+
print(f"Call scoreAndStore ... for example: {example_id} with scores: {result['scores']}")
|
|
372
|
+
result = await self.score_and_store(example, output, result, trace_id=run_trace_id_ref[0])
|
|
373
|
+
print(f"scoreAndStore returned: {result}")
|
|
374
|
+
return [result]
|
|
375
|
+
finally:
|
|
376
|
+
for key, original_value in original_env_vars.items():
|
|
377
|
+
if original_value is None:
|
|
378
|
+
os.environ.pop(key, None)
|
|
379
|
+
else:
|
|
380
|
+
os.environ[key] = original_value
|
|
381
|
+
|
|
382
|
+
def get_summaries(self) -> Dict[str, Any]:
|
|
425
383
|
"""
|
|
426
|
-
Get
|
|
384
|
+
Get summaries from the experiment.
|
|
427
385
|
|
|
428
386
|
Returns:
|
|
429
387
|
Dictionary of metric names to summary statistics
|
|
@@ -435,12 +393,12 @@ class ExperimentRunner:
|
|
|
435
393
|
f"{self.server_url}/experiment/{self.experiment_id}",
|
|
436
394
|
headers=self._get_headers(),
|
|
437
395
|
)
|
|
438
|
-
|
|
396
|
+
|
|
439
397
|
if not response.ok:
|
|
440
398
|
raise Exception(format_http_error(response, "fetch summary results"))
|
|
441
399
|
|
|
442
400
|
experiment2 = response.json()
|
|
443
|
-
return experiment2.get("
|
|
401
|
+
return experiment2.get("summaries", {})
|
|
444
402
|
|
|
445
403
|
async def _score_llm_metric(
|
|
446
404
|
self,
|
|
@@ -471,7 +429,8 @@ class ExperimentRunner:
|
|
|
471
429
|
model_id, self.server_url, self._get_headers()
|
|
472
430
|
)
|
|
473
431
|
if model_data:
|
|
474
|
-
|
|
432
|
+
# Server returns 'apiKey' (camelCase)
|
|
433
|
+
api_key = model_data.get("apiKey")
|
|
475
434
|
# If provider not set in metric, try to get it from model
|
|
476
435
|
if not provider and model_data.get("provider"):
|
|
477
436
|
provider = model_data.get("provider")
|
aiqa/llm_as_judge.py
CHANGED
|
@@ -52,14 +52,15 @@ async def get_model_from_server(
|
|
|
52
52
|
try:
|
|
53
53
|
def _do_request():
|
|
54
54
|
return requests.get(
|
|
55
|
-
f"{server_url}/model/{model_id}?fields=
|
|
55
|
+
f"{server_url}/model/{model_id}?fields=apiKey", # Server uses camelCase 'apiKey' (also accepts 'api_key')
|
|
56
56
|
headers=headers,
|
|
57
57
|
)
|
|
58
58
|
|
|
59
59
|
response = await asyncio.to_thread(_do_request)
|
|
60
60
|
if response.ok:
|
|
61
61
|
model = response.json()
|
|
62
|
-
|
|
62
|
+
# Server returns 'apiKey' (camelCase)
|
|
63
|
+
if model.get("apiKey"):
|
|
63
64
|
return model
|
|
64
65
|
return None
|
|
65
66
|
except Exception as e:
|
aiqa/object_serialiser.py
CHANGED
|
@@ -25,7 +25,7 @@ def sanitize_string_for_utf8(text: str) -> str:
|
|
|
25
25
|
Returns:
|
|
26
26
|
A string with surrogate characters replaced by the Unicode replacement character (U+FFFD)
|
|
27
27
|
"""
|
|
28
|
-
if text
|
|
28
|
+
if text is None:
|
|
29
29
|
return None
|
|
30
30
|
if not isinstance(text, str): # paranoia
|
|
31
31
|
text = str(text)
|
|
@@ -43,7 +43,10 @@ def toNumber(value: str|int|None) -> int:
|
|
|
43
43
|
if value is None:
|
|
44
44
|
return 0
|
|
45
45
|
if isinstance(value, int):
|
|
46
|
-
return value
|
|
46
|
+
return value
|
|
47
|
+
# Convert to string if not already
|
|
48
|
+
if not isinstance(value, str):
|
|
49
|
+
value = str(value)
|
|
47
50
|
if value.endswith("b"): # drop the b
|
|
48
51
|
value = value[:-1]
|
|
49
52
|
if value.endswith("g"):
|
aiqa/tracing.py
CHANGED
|
@@ -7,7 +7,7 @@ import inspect
|
|
|
7
7
|
import fnmatch
|
|
8
8
|
from typing import Any, Callable, Optional, List
|
|
9
9
|
from functools import wraps
|
|
10
|
-
from opentelemetry import trace
|
|
10
|
+
from opentelemetry import context as otel_context, trace
|
|
11
11
|
from opentelemetry.trace import Status, StatusCode
|
|
12
12
|
|
|
13
13
|
from .client import get_aiqa_client, get_component_tag, get_aiqa_tracer
|
|
@@ -47,16 +47,16 @@ class TracingOptions:
|
|
|
47
47
|
|
|
48
48
|
ignore_input: Iterable of keys (e.g., list, set) to exclude from
|
|
49
49
|
input data when recording span attributes. Applies after filter_input if both are set.
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
or `["_*", "password"]` to exclude sensitive fields from being traced.
|
|
50
|
+
Supports "self" and simple wildcards (e.g., `"_*"`
|
|
51
|
+
matches `"_apple"`, `"_fruit"`). The pattern `"_*"` is applied by default
|
|
52
|
+
to filter properties starting with '_' in nested objects.
|
|
54
53
|
|
|
55
54
|
ignore_output: Iterable of keys (e.g., list, set) to exclude from
|
|
56
55
|
output data when recording span attributes. Only applies when
|
|
57
56
|
output is a dictionary. Supports simple wildcards (e.g., `"_*"`
|
|
58
|
-
matches `"_apple"`, `"_fruit"`).
|
|
59
|
-
|
|
57
|
+
matches `"_apple"`, `"_fruit"`). The pattern `"_*"` is applied by default
|
|
58
|
+
to filter properties starting with '_' in nested objects. Useful for excluding
|
|
59
|
+
large or sensitive fields from traces.
|
|
60
60
|
|
|
61
61
|
filter_input: Callable function that receives the same arguments as the
|
|
62
62
|
decorated function (*args, **kwargs) and returns a filtered/transformed
|
|
@@ -96,7 +96,7 @@ class TracingOptions:
|
|
|
96
96
|
filter_input=lambda self, example: {
|
|
97
97
|
"dataset": self.dataset_id,
|
|
98
98
|
"experiment": self.experiment_id,
|
|
99
|
-
"
|
|
99
|
+
"example": example.id if hasattr(example, 'id') else None
|
|
100
100
|
}
|
|
101
101
|
)
|
|
102
102
|
def run_example(self, example):
|
|
@@ -168,33 +168,89 @@ def _prepare_input(args: tuple, kwargs: dict, sig: Optional[inspect.Signature] =
|
|
|
168
168
|
return result
|
|
169
169
|
|
|
170
170
|
|
|
171
|
-
def _apply_ignore_patterns(
|
|
171
|
+
def _apply_ignore_patterns(
|
|
172
|
+
data_dict: dict,
|
|
173
|
+
ignore_patterns: Optional[List[str]],
|
|
174
|
+
recursive: bool = True,
|
|
175
|
+
max_depth: int = 100,
|
|
176
|
+
current_depth: int = 0
|
|
177
|
+
) -> dict:
|
|
172
178
|
"""
|
|
173
|
-
Apply ignore patterns to a dict.
|
|
179
|
+
Apply ignore patterns to a dict, optionally recursively.
|
|
174
180
|
Supports string keys, wildcard patterns (*), and list of patterns.
|
|
175
181
|
Used for both ignore_input and ignore_output.
|
|
176
182
|
|
|
177
183
|
Args:
|
|
178
|
-
data_dict: Dictionary to filter
|
|
184
|
+
data_dict: Dictionary to filter (may contain nested dictionaries)
|
|
179
185
|
ignore_patterns: List of patterns to exclude (e.g., ["self", "_*", "password"])
|
|
186
|
+
recursive: Whether to apply patterns recursively to nested dictionaries
|
|
187
|
+
max_depth: Maximum recursion depth to prevent infinite loops (default: 100)
|
|
188
|
+
current_depth: Current recursion depth (internal use)
|
|
180
189
|
|
|
181
190
|
Returns:
|
|
182
191
|
Filtered dictionary with matching keys removed
|
|
183
192
|
"""
|
|
184
|
-
if not
|
|
193
|
+
if not isinstance(data_dict, dict):
|
|
185
194
|
return data_dict
|
|
186
195
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
196
|
+
# Safety check: prevent infinite loops from extremely deep nesting
|
|
197
|
+
if current_depth >= max_depth:
|
|
198
|
+
logger.warning(
|
|
199
|
+
f"_apply_ignore_patterns: max depth {max_depth} reached, "
|
|
200
|
+
f"stopping recursion to prevent infinite loop"
|
|
201
|
+
)
|
|
202
|
+
return data_dict
|
|
203
|
+
|
|
204
|
+
# If no patterns, return copy (no filtering needed, even if recursive=True)
|
|
205
|
+
if not ignore_patterns:
|
|
206
|
+
return data_dict.copy()
|
|
207
|
+
|
|
208
|
+
result = {}
|
|
209
|
+
for key, value in data_dict.items():
|
|
210
|
+
# Skip keys that match ignore patterns
|
|
211
|
+
if _matches_ignore_pattern(key, ignore_patterns):
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
# Recursively process nested dictionaries if recursive=True
|
|
215
|
+
if recursive and isinstance(value, dict):
|
|
216
|
+
result[key] = _apply_ignore_patterns(
|
|
217
|
+
value, ignore_patterns, recursive, max_depth, current_depth + 1
|
|
218
|
+
)
|
|
219
|
+
else:
|
|
220
|
+
result[key] = value
|
|
194
221
|
|
|
195
222
|
return result
|
|
196
223
|
|
|
197
224
|
|
|
225
|
+
def _merge_with_default_ignore_patterns(
|
|
226
|
+
ignore_patterns: Optional[List[str]],
|
|
227
|
+
client: Optional[Any] = None
|
|
228
|
+
) -> List[str]:
|
|
229
|
+
"""
|
|
230
|
+
Merge user-provided ignore patterns with client's default ignore patterns.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
ignore_patterns: Optional list of user-provided patterns
|
|
234
|
+
client: Optional client instance (to avoid repeated get_aiqa_client() calls)
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
List of patterns including client's default ignore patterns
|
|
238
|
+
"""
|
|
239
|
+
if client is None:
|
|
240
|
+
client = get_aiqa_client()
|
|
241
|
+
default_patterns = client.default_ignore_patterns
|
|
242
|
+
|
|
243
|
+
if ignore_patterns is None:
|
|
244
|
+
return default_patterns.copy() if default_patterns else []
|
|
245
|
+
|
|
246
|
+
# Merge patterns, avoiding duplicates
|
|
247
|
+
merged = list(default_patterns)
|
|
248
|
+
for pattern in ignore_patterns:
|
|
249
|
+
if pattern not in merged:
|
|
250
|
+
merged.append(pattern)
|
|
251
|
+
return merged
|
|
252
|
+
|
|
253
|
+
|
|
198
254
|
def _prepare_and_filter_input(
|
|
199
255
|
args: tuple,
|
|
200
256
|
kwargs: dict,
|
|
@@ -209,6 +265,7 @@ def _prepare_and_filter_input(
|
|
|
209
265
|
1. Apply filter_input to args, kwargs (receives same inputs as decorated function, including self)
|
|
210
266
|
2. Convert into dict ready for span.attributes.input
|
|
211
267
|
3. Apply ignore_input to the dict (supports string, wildcard, and list patterns)
|
|
268
|
+
Client's default ignore patterns are automatically merged with ignore_input.
|
|
212
269
|
|
|
213
270
|
Args:
|
|
214
271
|
args: Positional arguments (including self for bound methods)
|
|
@@ -218,7 +275,7 @@ def _prepare_and_filter_input(
|
|
|
218
275
|
including `self` for bound methods. This allows extracting properties from any object.
|
|
219
276
|
ignore_input: Optional list of keys/patterns to exclude from the final dict.
|
|
220
277
|
If "self" is in ignore_input, it will be removed from the final dict but filter_input
|
|
221
|
-
still receives it.
|
|
278
|
+
still receives it. Client's default ignore patterns are automatically merged.
|
|
222
279
|
sig: Optional function signature for proper arg name resolution
|
|
223
280
|
|
|
224
281
|
Returns:
|
|
@@ -251,15 +308,23 @@ def _prepare_and_filter_input(
|
|
|
251
308
|
input_data = _prepare_input(args, kwargs, sig)
|
|
252
309
|
|
|
253
310
|
# Step 3: Apply ignore_input to the dict (removes "self" from final dict if specified)
|
|
254
|
-
|
|
311
|
+
# Merge with client's default ignore patterns
|
|
312
|
+
client = get_aiqa_client()
|
|
313
|
+
merged_ignore_input = _merge_with_default_ignore_patterns(ignore_input, client)
|
|
314
|
+
should_ignore_self = "self" in merged_ignore_input
|
|
315
|
+
|
|
255
316
|
if isinstance(input_data, dict):
|
|
256
|
-
input_data = _apply_ignore_patterns(
|
|
317
|
+
input_data = _apply_ignore_patterns(
|
|
318
|
+
input_data,
|
|
319
|
+
merged_ignore_input,
|
|
320
|
+
recursive=client.ignore_recursive
|
|
321
|
+
)
|
|
257
322
|
# Handle case where we removed self and there are no remaining args/kwargs
|
|
258
323
|
if should_ignore_self and not input_data:
|
|
259
324
|
return None
|
|
260
|
-
elif
|
|
261
|
-
# Warn if
|
|
262
|
-
logger.warning(f"_prepare_and_filter_input: skip:
|
|
325
|
+
elif merged_ignore_input:
|
|
326
|
+
# Warn if ignore patterns are set but input_data is not a dict
|
|
327
|
+
logger.warning(f"_prepare_and_filter_input: skip: ignore patterns are set but input_data is not a dict: {type(input_data)}")
|
|
263
328
|
|
|
264
329
|
return input_data
|
|
265
330
|
|
|
@@ -269,7 +334,10 @@ def _filter_and_serialize_output(
|
|
|
269
334
|
filter_output: Optional[Callable[[Any], Any]],
|
|
270
335
|
ignore_output: Optional[List[str]],
|
|
271
336
|
) -> Any:
|
|
272
|
-
"""
|
|
337
|
+
"""
|
|
338
|
+
Filter and serialize output for span attributes.
|
|
339
|
+
Client's default ignore patterns are automatically merged with ignore_output.
|
|
340
|
+
"""
|
|
273
341
|
output_data = result
|
|
274
342
|
if filter_output:
|
|
275
343
|
if isinstance(output_data, dict):
|
|
@@ -277,11 +345,19 @@ def _filter_and_serialize_output(
|
|
|
277
345
|
output_data = filter_output(output_data)
|
|
278
346
|
|
|
279
347
|
# Apply ignore_output patterns (supports key, wildcard, and list patterns)
|
|
348
|
+
# Merge with client's default ignore patterns
|
|
349
|
+
client = get_aiqa_client()
|
|
350
|
+
merged_ignore_output = _merge_with_default_ignore_patterns(ignore_output, client)
|
|
351
|
+
|
|
280
352
|
if isinstance(output_data, dict):
|
|
281
|
-
output_data = _apply_ignore_patterns(
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
353
|
+
output_data = _apply_ignore_patterns(
|
|
354
|
+
output_data,
|
|
355
|
+
merged_ignore_output,
|
|
356
|
+
recursive=client.ignore_recursive
|
|
357
|
+
)
|
|
358
|
+
elif merged_ignore_output:
|
|
359
|
+
# Warn if ignore patterns are set but output_data is not a dict
|
|
360
|
+
logger.warning(f"_filter_and_serialize_output: skip: ignore patterns are set but output_data is not a dict: {type(output_data)}")
|
|
285
361
|
|
|
286
362
|
# Serialize immediately to create immutable result (removes mutable structures)
|
|
287
363
|
return serialize_for_span(output_data)
|
|
@@ -487,6 +563,7 @@ def WithTracing(
|
|
|
487
563
|
ignore_output: Optional[List[str]] = None,
|
|
488
564
|
filter_input: Optional[Callable[[Any], Any]] = None,
|
|
489
565
|
filter_output: Optional[Callable[[Any], Any]] = None,
|
|
566
|
+
root: bool = False,
|
|
490
567
|
):
|
|
491
568
|
"""
|
|
492
569
|
Decorator to automatically create spans for function calls.
|
|
@@ -500,12 +577,14 @@ def WithTracing(
|
|
|
500
577
|
ignore_input: List of keys to exclude from input data when recording span attributes.
|
|
501
578
|
self is handled as "self"
|
|
502
579
|
Supports simple wildcards (e.g., "_*"
|
|
503
|
-
matches "_apple", "_fruit").
|
|
504
|
-
|
|
580
|
+
matches "_apple", "_fruit"). The pattern "_*" is applied by default
|
|
581
|
+
to filter properties starting with '_' in nested objects. For example, use
|
|
582
|
+
["password", "api_key"] to exclude additional sensitive fields from being traced.
|
|
505
583
|
ignore_output: List of keys to exclude from output data when recording span attributes.
|
|
506
584
|
Only applies when output is a dictionary. Supports simple wildcards (e.g., "_*"
|
|
507
|
-
matches "_apple", "_fruit").
|
|
508
|
-
|
|
585
|
+
matches "_apple", "_fruit"). The pattern "_*" is applied by default
|
|
586
|
+
to filter properties starting with '_' in nested objects. Useful for excluding
|
|
587
|
+
large or sensitive fields from traces.
|
|
509
588
|
filter_input: Function to filter/transform input before recording.
|
|
510
589
|
Receives the same arguments as the decorated function (*args, **kwargs),
|
|
511
590
|
including `self` for bound methods. This allows you to extract specific
|
|
@@ -514,6 +593,7 @@ def WithTracing(
|
|
|
514
593
|
Returns a dict or any value (will be converted to dict). Applied before ignore_input.
|
|
515
594
|
filter_output: Function to filter/transform output before recording.
|
|
516
595
|
Receives the output value and returns a filtered/transformed version.
|
|
596
|
+
root: Whether this is a root span. If True, the span will not be linked to any parent spans.
|
|
517
597
|
|
|
518
598
|
Example:
|
|
519
599
|
@WithTracing
|
|
@@ -607,7 +687,8 @@ def WithTracing(
|
|
|
607
687
|
return executor()
|
|
608
688
|
# Get tracer after initialization (lazy)
|
|
609
689
|
tracer = get_aiqa_tracer()
|
|
610
|
-
|
|
690
|
+
span_kw = {"context": otel_context.Context()} if root else {}
|
|
691
|
+
with tracer.start_as_current_span(fn_name, **span_kw) as span:
|
|
611
692
|
if not _setup_span(span, input_data):
|
|
612
693
|
return executor() # span is not recording, so just execute the function and return the result
|
|
613
694
|
try:
|
|
@@ -628,7 +709,8 @@ def WithTracing(
|
|
|
628
709
|
|
|
629
710
|
# Get tracer after initialization (lazy)
|
|
630
711
|
tracer = get_aiqa_tracer()
|
|
631
|
-
|
|
712
|
+
span_kw = {"context": otel_context.Context()} if root else {}
|
|
713
|
+
with tracer.start_as_current_span(fn_name, **span_kw) as span:
|
|
632
714
|
if not _setup_span(span, input_data):
|
|
633
715
|
return await executor()
|
|
634
716
|
|
|
@@ -654,7 +736,8 @@ def WithTracing(
|
|
|
654
736
|
# Get tracer after initialization (lazy)
|
|
655
737
|
tracer = get_aiqa_tracer()
|
|
656
738
|
# Create span but don't use 'with' - span will be closed by TracedGenerator
|
|
657
|
-
|
|
739
|
+
span_kw = {"context": otel_context.Context()} if root else {}
|
|
740
|
+
span = tracer.start_span(fn_name, **span_kw)
|
|
658
741
|
token = trace.context_api.attach(trace.context_api.set_span_in_context(span))
|
|
659
742
|
|
|
660
743
|
try:
|
|
@@ -678,12 +761,14 @@ def WithTracing(
|
|
|
678
761
|
# This is called lazily when the function runs, not at decorator definition time
|
|
679
762
|
client = get_aiqa_client()
|
|
680
763
|
if not client.enabled:
|
|
681
|
-
|
|
764
|
+
# executor() returns an async generator object, not a coroutine, so don't await it
|
|
765
|
+
return executor()
|
|
682
766
|
|
|
683
767
|
# Get tracer after initialization (lazy)
|
|
684
768
|
tracer = get_aiqa_tracer()
|
|
685
769
|
# Create span but don't use 'with' - span will be closed by TracedAsyncGenerator
|
|
686
|
-
|
|
770
|
+
span_kw = {"context": otel_context.Context()} if root else {}
|
|
771
|
+
span = tracer.start_span(fn_name, **span_kw)
|
|
687
772
|
token = trace.context_api.attach(trace.context_api.set_span_in_context(span))
|
|
688
773
|
|
|
689
774
|
try:
|
aiqa/types.py
CHANGED
|
@@ -29,7 +29,7 @@ class MetricResult(TypedDict):
|
|
|
29
29
|
|
|
30
30
|
class Result(TypedDict):
|
|
31
31
|
"""Result of evaluating a set of metrics on an output (i.e. the full set of metrics for a single example)."""
|
|
32
|
-
|
|
32
|
+
example: str
|
|
33
33
|
scores: Dict[str, Number]
|
|
34
34
|
messages: Optional[Dict[str, str]] = None
|
|
35
35
|
errors: Optional[Dict[str, str]] = None
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
aiqa/__init__.py,sha256=JLQjgQgsyGQ1mRl4kcYygJq9i_91jN4WDem3dF1eMGA,1888
|
|
2
|
+
aiqa/client.py,sha256=zS9OQQhdvVeIoBz0o8qrz-rjXngEbS9Lrli2ZWNIsrM,15993
|
|
3
|
+
aiqa/constants.py,sha256=if54R1OD111iPvB53mw0U9NRrBV-zvvm1gOAVxRj-vE,226
|
|
4
|
+
aiqa/experiment_runner.py,sha256=YpUOoBS_3DvT_ipofWe8MnrSjmWz4Bmfe8yaErdprBA,17730
|
|
5
|
+
aiqa/http_utils.py,sha256=OIB4tRI2TiDl4VKDmtbLWg9Q7TicMBeL7scLYEhVPXI,4944
|
|
6
|
+
aiqa/llm_as_judge.py,sha256=ESmqQfaYpypCNfsODkdn5s85n_nzJ4WKbhUMVTb2djE,10087
|
|
7
|
+
aiqa/object_serialiser.py,sha256=mzd2U_mFcAPalN2m9wxq35-BBeRJOhNK1k0-BmRSfQM,17055
|
|
8
|
+
aiqa/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
aiqa/span_helpers.py,sha256=Ht4T_JJXK4HlqBY_Qwe8QDk9XwWCjagx_DkOUVY-PmY,18189
|
|
10
|
+
aiqa/tracing.py,sha256=XKYUwZUIkybxIkOKMj5xwVV2IwX6QTvRIzsZOv8jUOc,35771
|
|
11
|
+
aiqa/tracing_llm_utils.py,sha256=zQSxzkEhPmgel1P2kFueNWTr846re-qHEFxD-_EHhNQ,10241
|
|
12
|
+
aiqa/types.py,sha256=Rv27oC1R0P1soJz5wsdwkVW-jfHQEVi4vUhwRJid270,2529
|
|
13
|
+
aiqa_client-0.7.2.dist-info/licenses/LICENSE.txt,sha256=kIzkzLuzG0HHaWYm4F4W5FeJ1Yxut3Ec6bhLWyw798A,1062
|
|
14
|
+
aiqa_client-0.7.2.dist-info/METADATA,sha256=Q4Wwu_FqNSB7IRdydBcRFDcL2bHNLyStT6DYkc_aS8E,7705
|
|
15
|
+
aiqa_client-0.7.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
16
|
+
aiqa_client-0.7.2.dist-info/top_level.txt,sha256=nwcsuVVSuWu27iLxZd4n1evVzv1W6FVTrSnCXCc-NQs,5
|
|
17
|
+
aiqa_client-0.7.2.dist-info/RECORD,,
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
aiqa/__init__.py,sha256=JLQjgQgsyGQ1mRl4kcYygJq9i_91jN4WDem3dF1eMGA,1888
|
|
2
|
-
aiqa/client.py,sha256=pChJBbaPwd6flVaucEt1oZDoSSlLQWJpBLcqruLuM60,13296
|
|
3
|
-
aiqa/constants.py,sha256=Xq8425ozX9T4d_gmSYvWjEqPIdNlrasuymmImetC3rs,226
|
|
4
|
-
aiqa/experiment_runner.py,sha256=FVhAtvjV5_jAmPCq55Xl2TNwXV5YAIrv2OFaV3wbjbs,19426
|
|
5
|
-
aiqa/http_utils.py,sha256=OIB4tRI2TiDl4VKDmtbLWg9Q7TicMBeL7scLYEhVPXI,4944
|
|
6
|
-
aiqa/llm_as_judge.py,sha256=tJlYX6qZaqhZEC-3wvSk7btb4SMk1O1avDtujj9lHj4,9980
|
|
7
|
-
aiqa/object_serialiser.py,sha256=DBv7EyXIwfwjwXHDsIwdZNFmQffRb5fKAE0r8qhoqgc,16958
|
|
8
|
-
aiqa/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
aiqa/span_helpers.py,sha256=Ht4T_JJXK4HlqBY_Qwe8QDk9XwWCjagx_DkOUVY-PmY,18189
|
|
10
|
-
aiqa/tracing.py,sha256=juRFgt-uR5Z726F3pOZcxgcnZWom-sZ_-fYrhPHbFP4,32159
|
|
11
|
-
aiqa/tracing_llm_utils.py,sha256=zQSxzkEhPmgel1P2kFueNWTr846re-qHEFxD-_EHhNQ,10241
|
|
12
|
-
aiqa/types.py,sha256=E1-IPJNbH9A4TPUT0bXZDIT6SHwHQSolzOM4j9NXR5E,2531
|
|
13
|
-
aiqa_client-0.6.1.dist-info/licenses/LICENSE.txt,sha256=kIzkzLuzG0HHaWYm4F4W5FeJ1Yxut3Ec6bhLWyw798A,1062
|
|
14
|
-
aiqa_client-0.6.1.dist-info/METADATA,sha256=n76Zntwmd9tIP3aXQKYuGaUJQkJr37GmF-Vn_EmToCg,7705
|
|
15
|
-
aiqa_client-0.6.1.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
16
|
-
aiqa_client-0.6.1.dist-info/top_level.txt,sha256=nwcsuVVSuWu27iLxZd4n1evVzv1W6FVTrSnCXCc-NQs,5
|
|
17
|
-
aiqa_client-0.6.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|