aiqtoolkit 1.2.0a20250630__py3-none-any.whl → 1.2.0a20250702__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiqtoolkit might be problematic. Click here for more details.
- aiq/eval/rag_evaluator/evaluate.py +30 -5
- aiq/eval/rag_evaluator/register.py +6 -2
- {aiqtoolkit-1.2.0a20250630.dist-info → aiqtoolkit-1.2.0a20250702.dist-info}/METADATA +1 -1
- {aiqtoolkit-1.2.0a20250630.dist-info → aiqtoolkit-1.2.0a20250702.dist-info}/RECORD +9 -9
- {aiqtoolkit-1.2.0a20250630.dist-info → aiqtoolkit-1.2.0a20250702.dist-info}/WHEEL +0 -0
- {aiqtoolkit-1.2.0a20250630.dist-info → aiqtoolkit-1.2.0a20250702.dist-info}/entry_points.txt +0 -0
- {aiqtoolkit-1.2.0a20250630.dist-info → aiqtoolkit-1.2.0a20250702.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {aiqtoolkit-1.2.0a20250630.dist-info → aiqtoolkit-1.2.0a20250702.dist-info}/licenses/LICENSE.md +0 -0
- {aiqtoolkit-1.2.0a20250630.dist-info → aiqtoolkit-1.2.0a20250702.dist-info}/top_level.txt +0 -0
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
import logging
|
|
17
17
|
from collections.abc import Sequence
|
|
18
18
|
|
|
19
|
+
from pydantic import BaseModel
|
|
19
20
|
from ragas import EvaluationDataset
|
|
20
21
|
from ragas import SingleTurnSample
|
|
21
22
|
from ragas.dataset_schema import EvaluationResult
|
|
@@ -25,6 +26,7 @@ from tqdm import tqdm
|
|
|
25
26
|
|
|
26
27
|
from aiq.data_models.intermediate_step import IntermediateStepType
|
|
27
28
|
from aiq.eval.evaluator.evaluator_model import EvalInput
|
|
29
|
+
from aiq.eval.evaluator.evaluator_model import EvalInputItem
|
|
28
30
|
from aiq.eval.evaluator.evaluator_model import EvalOutput
|
|
29
31
|
from aiq.eval.evaluator.evaluator_model import EvalOutputItem
|
|
30
32
|
from aiq.eval.utils.tqdm_position_registry import TqdmPositionRegistry
|
|
@@ -34,13 +36,36 @@ logger = logging.getLogger(__name__)
|
|
|
34
36
|
|
|
35
37
|
class RAGEvaluator:
|
|
36
38
|
|
|
37
|
-
def __init__(self,
|
|
39
|
+
def __init__(self,
|
|
40
|
+
evaluator_llm: LangchainLLMWrapper,
|
|
41
|
+
metrics: Sequence[Metric],
|
|
42
|
+
max_concurrency=8,
|
|
43
|
+
input_obj_field: str | None = None):
|
|
38
44
|
self.evaluator_llm = evaluator_llm
|
|
39
45
|
self.metrics = metrics
|
|
40
46
|
self.max_concurrency = max_concurrency
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def
|
|
47
|
+
self.input_obj_field = input_obj_field
|
|
48
|
+
|
|
49
|
+
def extract_input_obj(self, item: EvalInputItem) -> str:
|
|
50
|
+
"""Extracts the input object from EvalInputItem based on the configured input_obj_field."""
|
|
51
|
+
input_obj = item.input_obj
|
|
52
|
+
if isinstance(input_obj, BaseModel):
|
|
53
|
+
if self.input_obj_field and hasattr(input_obj, self.input_obj_field):
|
|
54
|
+
# If input_obj_field is specified, return the value of that field
|
|
55
|
+
return str(getattr(input_obj, self.input_obj_field, ""))
|
|
56
|
+
else:
|
|
57
|
+
# If no input_obj_field is specified, return the string representation of the model
|
|
58
|
+
return input_obj.model_dump_json()
|
|
59
|
+
|
|
60
|
+
if isinstance(input_obj, dict):
|
|
61
|
+
# If input_obj is a dict, return the JSON string representation
|
|
62
|
+
if self.input_obj_field and self.input_obj_field in input_obj:
|
|
63
|
+
# If input_obj_field is specified, return the value of that field
|
|
64
|
+
return str(input_obj[self.input_obj_field])
|
|
65
|
+
|
|
66
|
+
return str(input_obj) # Fallback to string representation of the dict
|
|
67
|
+
|
|
68
|
+
def eval_input_to_ragas(self, eval_input: EvalInput) -> EvaluationDataset:
|
|
44
69
|
"""Converts EvalInput into a Ragas-compatible EvaluationDataset."""
|
|
45
70
|
from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
|
|
46
71
|
event_filter = [IntermediateStepType.TOOL_END, IntermediateStepType.LLM_END, IntermediateStepType.CUSTOM_END]
|
|
@@ -49,7 +74,7 @@ class RAGEvaluator:
|
|
|
49
74
|
intermediate_step_adapter = IntermediateStepAdapter()
|
|
50
75
|
for item in eval_input.eval_input_items:
|
|
51
76
|
# Extract required fields from EvalInputItem
|
|
52
|
-
user_input = item
|
|
77
|
+
user_input = self.extract_input_obj(item) # Extract input object as string
|
|
53
78
|
reference = item.expected_output_obj # Reference correct answer
|
|
54
79
|
response = item.output_obj # Model's generated response
|
|
55
80
|
|
|
@@ -47,6 +47,8 @@ class RagasEvaluatorConfig(EvaluatorBaseConfig, name="ragas"):
|
|
|
47
47
|
# Ragas metric
|
|
48
48
|
metric: str | dict[str, RagasMetricConfig] = Field(default="AnswerAccuracy",
|
|
49
49
|
description="RAGAS metric callable with optional 'kwargs:'")
|
|
50
|
+
input_obj_field: str | None = Field(
|
|
51
|
+
default=None, description="The field in the input object that contains the content to evaluate.")
|
|
50
52
|
|
|
51
53
|
@model_validator(mode="before")
|
|
52
54
|
@classmethod
|
|
@@ -133,7 +135,9 @@ async def register_ragas_evaluator(config: RagasEvaluatorConfig, builder: EvalBu
|
|
|
133
135
|
metrics.append(metric_callable(**kwargs))
|
|
134
136
|
|
|
135
137
|
# Create the RAG evaluator
|
|
136
|
-
_evaluator = RAGEvaluator(evaluator_llm=llm,
|
|
137
|
-
|
|
138
|
+
_evaluator = RAGEvaluator(evaluator_llm=llm,
|
|
139
|
+
metrics=metrics,
|
|
140
|
+
max_concurrency=builder.get_max_concurrency(),
|
|
141
|
+
input_obj_field=config.input_obj_field) if metrics else None
|
|
138
142
|
|
|
139
143
|
yield EvaluatorInfo(config=config, evaluate_fn=evaluate_fn, description="Evaluator for RAGAS metrics")
|
|
@@ -121,8 +121,8 @@ aiq/eval/evaluator/__init__.py,sha256=GUJrgGtpvyMUCjUBvR3faAdv-tZzbU9W-izgx9aMEQ
|
|
|
121
121
|
aiq/eval/evaluator/base_evaluator.py,sha256=5kqOcTYNecnh9us_XvV58pj5tZI82NGkVN4tg9-R_ZE,3040
|
|
122
122
|
aiq/eval/evaluator/evaluator_model.py,sha256=5cxe3mqznlNGzv29v_VseYU7OzoT1eTf7hgSPQxytsM,1440
|
|
123
123
|
aiq/eval/rag_evaluator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
124
|
-
aiq/eval/rag_evaluator/evaluate.py,sha256=
|
|
125
|
-
aiq/eval/rag_evaluator/register.py,sha256=
|
|
124
|
+
aiq/eval/rag_evaluator/evaluate.py,sha256=S767ouF5FUFze9jhUyqAPG2aOKzVS_VnAkP4ppE1aNg,7837
|
|
125
|
+
aiq/eval/rag_evaluator/register.py,sha256=vmUxgMJsI42scapLFLvFI6oqXgu9Rl_XhiNedy5-Cqw,5889
|
|
126
126
|
aiq/eval/swe_bench_evaluator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
127
127
|
aiq/eval/swe_bench_evaluator/evaluate.py,sha256=kNukRruq1EM1RsGLvpVuC22xcP0gpn9acF3edGak9vY,9858
|
|
128
128
|
aiq/eval/swe_bench_evaluator/register.py,sha256=sTb74F7w4iuI0ROsEJ4bV13Nt1GEWQn7UvO2O0HXwXk,1537
|
|
@@ -312,10 +312,10 @@ aiq/utils/reactive/base/observer_base.py,sha256=UAlyAY_ky4q2t0P81RVFo2Bs_R7z5Nde
|
|
|
312
312
|
aiq/utils/reactive/base/subject_base.py,sha256=Ed-AC6P7cT3qkW1EXjzbd5M9WpVoeN_9KCe3OM3FLU4,2521
|
|
313
313
|
aiq/utils/settings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
314
314
|
aiq/utils/settings/global_settings.py,sha256=U9TCLdoZsKq5qOVGjREipGVv9e-FlStzqy5zv82_VYk,7454
|
|
315
|
-
aiqtoolkit-1.2.
|
|
316
|
-
aiqtoolkit-1.2.
|
|
317
|
-
aiqtoolkit-1.2.
|
|
318
|
-
aiqtoolkit-1.2.
|
|
319
|
-
aiqtoolkit-1.2.
|
|
320
|
-
aiqtoolkit-1.2.
|
|
321
|
-
aiqtoolkit-1.2.
|
|
315
|
+
aiqtoolkit-1.2.0a20250702.dist-info/licenses/LICENSE-3rd-party.txt,sha256=8o7aySJa9CBvFshPcsRdJbczzdNyDGJ8b0J67WRUQ2k,183936
|
|
316
|
+
aiqtoolkit-1.2.0a20250702.dist-info/licenses/LICENSE.md,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
|
317
|
+
aiqtoolkit-1.2.0a20250702.dist-info/METADATA,sha256=SSw-cDM0WiHZSrOttYEBIRm__HZZT8zZTqiTQAywcVQ,20274
|
|
318
|
+
aiqtoolkit-1.2.0a20250702.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
319
|
+
aiqtoolkit-1.2.0a20250702.dist-info/entry_points.txt,sha256=gRlPfR5g21t328WNEQ4CcEz80S1sJNS8A7rMDYnzl4A,452
|
|
320
|
+
aiqtoolkit-1.2.0a20250702.dist-info/top_level.txt,sha256=fo7AzYcNhZ_tRWrhGumtxwnxMew4xrT1iwouDy_f0Kc,4
|
|
321
|
+
aiqtoolkit-1.2.0a20250702.dist-info/RECORD,,
|
|
File without changes
|
{aiqtoolkit-1.2.0a20250630.dist-info → aiqtoolkit-1.2.0a20250702.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
{aiqtoolkit-1.2.0a20250630.dist-info → aiqtoolkit-1.2.0a20250702.dist-info}/licenses/LICENSE.md
RENAMED
|
File without changes
|
|
File without changes
|