aiqa-client 0.5.2__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiqa/__init__.py +8 -2
- aiqa/client.py +17 -2
- aiqa/constants.py +1 -1
- aiqa/experiment_runner.py +248 -77
- aiqa/llm_as_judge.py +281 -0
- aiqa/span_helpers.py +511 -0
- aiqa/tracing.py +169 -561
- aiqa/tracing_llm_utils.py +20 -9
- aiqa/types.py +61 -0
- {aiqa_client-0.5.2.dist-info → aiqa_client-0.6.1.dist-info}/METADATA +1 -1
- aiqa_client-0.6.1.dist-info/RECORD +17 -0
- {aiqa_client-0.5.2.dist-info → aiqa_client-0.6.1.dist-info}/WHEEL +1 -1
- aiqa_client-0.5.2.dist-info/RECORD +0 -14
- {aiqa_client-0.5.2.dist-info → aiqa_client-0.6.1.dist-info}/licenses/LICENSE.txt +0 -0
- {aiqa_client-0.5.2.dist-info → aiqa_client-0.6.1.dist-info}/top_level.txt +0 -0
aiqa/tracing_llm_utils.py
CHANGED
|
@@ -38,16 +38,18 @@ def _extract_and_set_token_usage(span: trace.Span, result: Any) -> None:
|
|
|
38
38
|
Extract OpenAI API style token usage from result and add to span attributes
|
|
39
39
|
using OpenTelemetry semantic conventions for gen_ai.
|
|
40
40
|
|
|
41
|
-
Looks for usage dict with prompt_tokens, completion_tokens, and total_tokens.
|
|
41
|
+
Looks for usage dict or object with prompt_tokens, completion_tokens, and total_tokens.
|
|
42
42
|
Sets gen_ai.usage.input_tokens, gen_ai.usage.output_tokens, and gen_ai.usage.total_tokens.
|
|
43
43
|
Only sets attributes that are not already set.
|
|
44
44
|
|
|
45
45
|
This function detects token usage from OpenAI API response patterns:
|
|
46
|
-
- OpenAI Chat Completions API: The 'usage' object contains 'prompt_tokens', 'completion_tokens', and 'total_tokens'.
|
|
46
|
+
- OpenAI Chat Completions API: The 'usage' object (dict or Usage object) contains 'prompt_tokens', 'completion_tokens', and 'total_tokens'.
|
|
47
47
|
See https://platform.openai.com/docs/api-reference/chat/object (usage field)
|
|
48
48
|
- OpenAI Completions API: The 'usage' object contains 'prompt_tokens', 'completion_tokens', and 'total_tokens'.
|
|
49
49
|
See https://platform.openai.com/docs/api-reference/completions/object (usage field)
|
|
50
50
|
|
|
51
|
+
Handles both dict and object cases (e.g., OpenAI SDK Usage objects).
|
|
52
|
+
|
|
51
53
|
This function is safe against exceptions and will not derail tracing or program execution.
|
|
52
54
|
"""
|
|
53
55
|
try:
|
|
@@ -74,15 +76,24 @@ def _extract_and_set_token_usage(span: trace.Span, result: Any) -> None:
|
|
|
74
76
|
# If accessing result properties fails, just return silently
|
|
75
77
|
return
|
|
76
78
|
|
|
77
|
-
# Extract token usage if found
|
|
78
|
-
if
|
|
79
|
+
# Extract token usage if found (handle both dict and object cases)
|
|
80
|
+
if usage is not None:
|
|
79
81
|
try:
|
|
80
82
|
# Support both OpenAI format (prompt_tokens/completion_tokens) and Bedrock format (input_tokens/output_tokens)
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
83
|
+
# Handle dict case
|
|
84
|
+
if isinstance(usage, dict):
|
|
85
|
+
prompt_tokens = usage.get("prompt_tokens") or usage.get("PromptTokens")
|
|
86
|
+
completion_tokens = usage.get("completion_tokens") or usage.get("CompletionTokens")
|
|
87
|
+
input_tokens = usage.get("input_tokens") or usage.get("InputTokens")
|
|
88
|
+
output_tokens = usage.get("output_tokens") or usage.get("OutputTokens")
|
|
89
|
+
total_tokens = usage.get("total_tokens") or usage.get("TotalTokens")
|
|
90
|
+
# Handle object case (e.g., OpenAI Usage object)
|
|
91
|
+
else:
|
|
92
|
+
prompt_tokens = getattr(usage, "prompt_tokens", None) or getattr(usage, "PromptTokens", None)
|
|
93
|
+
completion_tokens = getattr(usage, "completion_tokens", None) or getattr(usage, "CompletionTokens", None)
|
|
94
|
+
input_tokens = getattr(usage, "input_tokens", None) or getattr(usage, "InputTokens", None)
|
|
95
|
+
output_tokens = getattr(usage, "output_tokens", None) or getattr(usage, "OutputTokens", None)
|
|
96
|
+
total_tokens = getattr(usage, "total_tokens", None) or getattr(usage, "TotalTokens", None)
|
|
86
97
|
|
|
87
98
|
# Use Bedrock format if OpenAI format not available
|
|
88
99
|
if prompt_tokens is None:
|
aiqa/types.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
from typing import TypedDict, Dict, Optional, Awaitable, Callable, Any, Union, Annotated, List
|
|
4
|
+
from numbers import Number
|
|
5
|
+
|
|
6
|
+
class Metric(TypedDict):
|
|
7
|
+
"""Definition of a metric to score. See Metric.ts for more details."""
|
|
8
|
+
id: str
|
|
9
|
+
type: str
|
|
10
|
+
name: Optional[str] = None
|
|
11
|
+
description: Optional[str] = None
|
|
12
|
+
provider: Optional[str] = None
|
|
13
|
+
model: Optional[str] = None
|
|
14
|
+
prompt: Optional[str] = None
|
|
15
|
+
code: Optional[str] = None
|
|
16
|
+
|
|
17
|
+
class Example(TypedDict):
|
|
18
|
+
"""Definition of an example to score. See Example.ts for more details."""
|
|
19
|
+
id: str
|
|
20
|
+
input: Optional[str] = None
|
|
21
|
+
spans: Optional[List[Dict[str, Any]]] = None
|
|
22
|
+
metrics: Optional[List[Metric]] = None
|
|
23
|
+
|
|
24
|
+
class MetricResult(TypedDict):
|
|
25
|
+
"""Result of evaluating a metric on an output (i.e. a single metric for a single example)."""
|
|
26
|
+
score: Annotated[Number, "Numeric score for the metric evaluation, typically a float in the range [0, 1]"]
|
|
27
|
+
message: Optional[str] = None
|
|
28
|
+
error: Optional[str] = None
|
|
29
|
+
|
|
30
|
+
class Result(TypedDict):
|
|
31
|
+
"""Result of evaluating a set of metrics on an output (i.e. the full set of metrics for a single example)."""
|
|
32
|
+
exampleId: str
|
|
33
|
+
scores: Dict[str, Number]
|
|
34
|
+
messages: Optional[Dict[str, str]] = None
|
|
35
|
+
errors: Optional[Dict[str, str]] = None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# Function that processes input and parameters to produce an output (sync or async)
|
|
39
|
+
# Args:
|
|
40
|
+
# input: The input data for the example (typically a dict with the example's input fields)
|
|
41
|
+
# parameters: Dictionary of parameters to pass to the function (e.g., model settings, temperature)
|
|
42
|
+
# Returns:
|
|
43
|
+
# The output result, which can be any type. If the function is async, returns an Awaitable.
|
|
44
|
+
CallMyCodeType = Callable[[Any, Dict[str, Any]], Union[Any, Awaitable[Any]]]
|
|
45
|
+
|
|
46
|
+
# Function that calls an LLM with a system prompt and user message (async)
|
|
47
|
+
# Args:
|
|
48
|
+
# system_prompt: The system prompt/instructions for the LLM
|
|
49
|
+
# user_message: The "user" message containing the content to process (e.g., input and output to score)
|
|
50
|
+
# Returns:
|
|
51
|
+
# The raw response content string from the LLM (typically JSON for scoring)
|
|
52
|
+
CallLLMType = Callable[[str, str], Awaitable[str]]
|
|
53
|
+
|
|
54
|
+
# Function that scores a given output, using input, example, and parameters (usually async)
|
|
55
|
+
# Args:
|
|
56
|
+
# input: The input data for the example (typically a dict with the example's input fields)
|
|
57
|
+
# output: The output to score
|
|
58
|
+
# metric: The metric to score
|
|
59
|
+
# Returns:
|
|
60
|
+
# MetricResult object with score:[0,1], message (optional), and error (optional)
|
|
61
|
+
ScoreThisInputOutputMetricType = Callable[[Any, Any, Metric], Awaitable[MetricResult]]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
aiqa/__init__.py,sha256=JLQjgQgsyGQ1mRl4kcYygJq9i_91jN4WDem3dF1eMGA,1888
|
|
2
|
+
aiqa/client.py,sha256=pChJBbaPwd6flVaucEt1oZDoSSlLQWJpBLcqruLuM60,13296
|
|
3
|
+
aiqa/constants.py,sha256=Xq8425ozX9T4d_gmSYvWjEqPIdNlrasuymmImetC3rs,226
|
|
4
|
+
aiqa/experiment_runner.py,sha256=FVhAtvjV5_jAmPCq55Xl2TNwXV5YAIrv2OFaV3wbjbs,19426
|
|
5
|
+
aiqa/http_utils.py,sha256=OIB4tRI2TiDl4VKDmtbLWg9Q7TicMBeL7scLYEhVPXI,4944
|
|
6
|
+
aiqa/llm_as_judge.py,sha256=tJlYX6qZaqhZEC-3wvSk7btb4SMk1O1avDtujj9lHj4,9980
|
|
7
|
+
aiqa/object_serialiser.py,sha256=DBv7EyXIwfwjwXHDsIwdZNFmQffRb5fKAE0r8qhoqgc,16958
|
|
8
|
+
aiqa/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
aiqa/span_helpers.py,sha256=Ht4T_JJXK4HlqBY_Qwe8QDk9XwWCjagx_DkOUVY-PmY,18189
|
|
10
|
+
aiqa/tracing.py,sha256=juRFgt-uR5Z726F3pOZcxgcnZWom-sZ_-fYrhPHbFP4,32159
|
|
11
|
+
aiqa/tracing_llm_utils.py,sha256=zQSxzkEhPmgel1P2kFueNWTr846re-qHEFxD-_EHhNQ,10241
|
|
12
|
+
aiqa/types.py,sha256=E1-IPJNbH9A4TPUT0bXZDIT6SHwHQSolzOM4j9NXR5E,2531
|
|
13
|
+
aiqa_client-0.6.1.dist-info/licenses/LICENSE.txt,sha256=kIzkzLuzG0HHaWYm4F4W5FeJ1Yxut3Ec6bhLWyw798A,1062
|
|
14
|
+
aiqa_client-0.6.1.dist-info/METADATA,sha256=n76Zntwmd9tIP3aXQKYuGaUJQkJr37GmF-Vn_EmToCg,7705
|
|
15
|
+
aiqa_client-0.6.1.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
16
|
+
aiqa_client-0.6.1.dist-info/top_level.txt,sha256=nwcsuVVSuWu27iLxZd4n1evVzv1W6FVTrSnCXCc-NQs,5
|
|
17
|
+
aiqa_client-0.6.1.dist-info/RECORD,,
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
aiqa/__init__.py,sha256=V1VLfpxU_BXfkvKyhGckZsPYF43NJhoXeVX6FkeHr9g,1721
|
|
2
|
-
aiqa/client.py,sha256=Ba3v-voBlTSCr-RU88INLXsF_5vqp42QiQWCFciSJbU,12542
|
|
3
|
-
aiqa/constants.py,sha256=tZuh7XvKs6hFvWc-YnQ5Na6uogJMsRrMy-rWOauvcIA,226
|
|
4
|
-
aiqa/experiment_runner.py,sha256=XAZsjVP70UH_QTk5ANSOQYAhmozuGXwKB5qWWHs-zeE,11186
|
|
5
|
-
aiqa/http_utils.py,sha256=OIB4tRI2TiDl4VKDmtbLWg9Q7TicMBeL7scLYEhVPXI,4944
|
|
6
|
-
aiqa/object_serialiser.py,sha256=DBv7EyXIwfwjwXHDsIwdZNFmQffRb5fKAE0r8qhoqgc,16958
|
|
7
|
-
aiqa/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
aiqa/tracing.py,sha256=gdmtpvBVbvc_HrJHgtr78_XH9sIWRjUoEkXuCuNmuc0,45662
|
|
9
|
-
aiqa/tracing_llm_utils.py,sha256=rNx6v6Wh_Mhv-_DPU9_aWS7YQcO46oiv0YPdBK1KVL8,9338
|
|
10
|
-
aiqa_client-0.5.2.dist-info/licenses/LICENSE.txt,sha256=kIzkzLuzG0HHaWYm4F4W5FeJ1Yxut3Ec6bhLWyw798A,1062
|
|
11
|
-
aiqa_client-0.5.2.dist-info/METADATA,sha256=xMaQSnI3AiNE6lYs2vM6BV9VxQWMHXyDoIl6JXwdi3I,7705
|
|
12
|
-
aiqa_client-0.5.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
-
aiqa_client-0.5.2.dist-info/top_level.txt,sha256=nwcsuVVSuWu27iLxZd4n1evVzv1W6FVTrSnCXCc-NQs,5
|
|
14
|
-
aiqa_client-0.5.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|