aiqtoolkit 1.2.0a20250525__py3-none-any.whl → 1.2.0a20250526__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiqtoolkit might be problematic. Click here for more details.
- aiq/eval/intermediate_step_adapter.py +11 -5
- aiq/eval/rag_evaluator/evaluate.py +7 -3
- aiq/eval/rag_evaluator/register.py +2 -1
- {aiqtoolkit-1.2.0a20250525.dist-info → aiqtoolkit-1.2.0a20250526.dist-info}/METADATA +1 -1
- {aiqtoolkit-1.2.0a20250525.dist-info → aiqtoolkit-1.2.0a20250526.dist-info}/RECORD +10 -10
- {aiqtoolkit-1.2.0a20250525.dist-info → aiqtoolkit-1.2.0a20250526.dist-info}/WHEEL +0 -0
- {aiqtoolkit-1.2.0a20250525.dist-info → aiqtoolkit-1.2.0a20250526.dist-info}/entry_points.txt +0 -0
- {aiqtoolkit-1.2.0a20250525.dist-info → aiqtoolkit-1.2.0a20250526.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {aiqtoolkit-1.2.0a20250525.dist-info → aiqtoolkit-1.2.0a20250526.dist-info}/licenses/LICENSE.md +0 -0
- {aiqtoolkit-1.2.0a20250525.dist-info → aiqtoolkit-1.2.0a20250526.dist-info}/top_level.txt +0 -0
|
@@ -79,15 +79,21 @@ class IntermediateStepAdapter:
|
|
|
79
79
|
for step in steps:
|
|
80
80
|
if step.event_type == IntermediateStepType.LLM_END:
|
|
81
81
|
last_llm_end_step = step
|
|
82
|
+
action = self.get_agent_action_single(step, "")
|
|
83
|
+
agent_actions.append(action)
|
|
82
84
|
else:
|
|
83
85
|
action = self.get_agent_action_single(step, last_llm_end_step)
|
|
84
86
|
agent_actions.append(action)
|
|
85
87
|
|
|
86
88
|
return agent_actions
|
|
87
89
|
|
|
88
|
-
def get_context(self, intermediate_steps: list[IntermediateStep]
|
|
90
|
+
def get_context(self, intermediate_steps: list[IntermediateStep],
|
|
91
|
+
event_filter: list[IntermediateStepType]) -> list[str]:
|
|
89
92
|
"""Grab the output of all the tools and return them as retrieved context."""
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
93
|
+
count = 0
|
|
94
|
+
agent_actions = []
|
|
95
|
+
for step in intermediate_steps:
|
|
96
|
+
if step.event_type in event_filter and step.data and step.data.output:
|
|
97
|
+
agent_actions.append(f"**Step {count}**\n{str(step.data.output)}")
|
|
98
|
+
count += 1
|
|
99
|
+
return agent_actions
|
|
@@ -23,6 +23,7 @@ from ragas.llms import LangchainLLMWrapper
|
|
|
23
23
|
from ragas.metrics import Metric
|
|
24
24
|
from tqdm import tqdm
|
|
25
25
|
|
|
26
|
+
from aiq.data_models.intermediate_step import IntermediateStepType
|
|
26
27
|
from aiq.eval.evaluator.evaluator_model import EvalInput
|
|
27
28
|
from aiq.eval.evaluator.evaluator_model import EvalOutput
|
|
28
29
|
from aiq.eval.evaluator.evaluator_model import EvalOutputItem
|
|
@@ -33,15 +34,16 @@ logger = logging.getLogger(__name__)
|
|
|
33
34
|
|
|
34
35
|
class RAGEvaluator:
|
|
35
36
|
|
|
36
|
-
def __init__(self, evaluator_llm: LangchainLLMWrapper, metrics: Sequence[Metric]):
|
|
37
|
+
def __init__(self, evaluator_llm: LangchainLLMWrapper, metrics: Sequence[Metric], max_concurrency=8):
|
|
37
38
|
self.evaluator_llm = evaluator_llm
|
|
38
39
|
self.metrics = metrics
|
|
40
|
+
self.max_concurrency = max_concurrency
|
|
39
41
|
|
|
40
42
|
@staticmethod
|
|
41
43
|
def eval_input_to_ragas(eval_input: EvalInput) -> EvaluationDataset:
|
|
42
44
|
"""Converts EvalInput into a Ragas-compatible EvaluationDataset."""
|
|
43
45
|
from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
|
|
44
|
-
|
|
46
|
+
event_filter = [IntermediateStepType.TOOL_END, IntermediateStepType.LLM_END, IntermediateStepType.CUSTOM_END]
|
|
45
47
|
samples = []
|
|
46
48
|
|
|
47
49
|
intermediate_step_adapter = IntermediateStepAdapter()
|
|
@@ -55,7 +57,7 @@ class RAGEvaluator:
|
|
|
55
57
|
reference_contexts = [""] # Default to empty context
|
|
56
58
|
# implement context extraction from expected_trajectory
|
|
57
59
|
|
|
58
|
-
retrieved_contexts = intermediate_step_adapter.get_context(item.trajectory)
|
|
60
|
+
retrieved_contexts = intermediate_step_adapter.get_context(item.trajectory, event_filter)
|
|
59
61
|
# implement context extraction from expected_trajectory
|
|
60
62
|
|
|
61
63
|
# Create a SingleTurnSample
|
|
@@ -116,6 +118,7 @@ class RAGEvaluator:
|
|
|
116
118
|
async def evaluate(self, eval_input: EvalInput) -> EvalOutput:
|
|
117
119
|
"""Run Ragas metrics evaluation on the provided EvalInput"""
|
|
118
120
|
from ragas import evaluate as ragas_evaluate
|
|
121
|
+
from ragas.run_config import RunConfig
|
|
119
122
|
|
|
120
123
|
ragas_dataset = self.eval_input_to_ragas(eval_input)
|
|
121
124
|
tqdm_position = TqdmPositionRegistry.claim()
|
|
@@ -126,6 +129,7 @@ class RAGEvaluator:
|
|
|
126
129
|
metrics=self.metrics,
|
|
127
130
|
show_progress=True,
|
|
128
131
|
llm=self.evaluator_llm,
|
|
132
|
+
run_config=RunConfig(max_workers=self.max_concurrency),
|
|
129
133
|
_pbar=pbar)
|
|
130
134
|
except Exception as e:
|
|
131
135
|
# On exception we still continue with other evaluators. Log and return an avg_score of 0.0
|
|
@@ -133,6 +133,7 @@ async def register_ragas_evaluator(config: RagasEvaluatorConfig, builder: EvalBu
|
|
|
133
133
|
metrics.append(metric_callable(**kwargs))
|
|
134
134
|
|
|
135
135
|
# Create the RAG evaluator
|
|
136
|
-
_evaluator = RAGEvaluator(evaluator_llm=llm, metrics=metrics
|
|
136
|
+
_evaluator = RAGEvaluator(evaluator_llm=llm, metrics=metrics,
|
|
137
|
+
max_concurrency=builder.get_max_concurrency()) if metrics else None
|
|
137
138
|
|
|
138
139
|
yield EvaluatorInfo(config=config, evaluate_fn=evaluate_fn, description="Evaluator for RAGAS metrics")
|
|
@@ -108,7 +108,7 @@ aiq/embedder/register.py,sha256=3MTZrfNQKp6AZTbfaA-PpTnyXiMyu-8HH9JnDCC0v9o,978
|
|
|
108
108
|
aiq/eval/__init__.py,sha256=Xs1JQ16L9btwreh4pdGKwskffAw1YFO48jKrU4ib_7c,685
|
|
109
109
|
aiq/eval/config.py,sha256=IlOr2o618kbkXP0G1F-AklZfsKYVos9UB4Dvlxf66xk,1431
|
|
110
110
|
aiq/eval/evaluate.py,sha256=WPGLBeJ46mwIlnprbtia1cm2MwMqZ-GskXoTn6R4oV0,14624
|
|
111
|
-
aiq/eval/intermediate_step_adapter.py,sha256=
|
|
111
|
+
aiq/eval/intermediate_step_adapter.py,sha256=4cSsGgFBvNjXnclk5FvZnQaFEdeulp7VEdRWKLcREAQ,4498
|
|
112
112
|
aiq/eval/register.py,sha256=QOHJqA2CQixeWMC9InyKbzXo1jByvrntD_m9-2Mvg9k,1076
|
|
113
113
|
aiq/eval/remote_workflow.py,sha256=Fb7Z6gdP2L_gqyWB--AEWfcXe9xPpQ_hPsf9lmqGXjI,5524
|
|
114
114
|
aiq/eval/runtime_event_subscriber.py,sha256=2VM8MqmPc_EWPxxrDDR9naiioZirkJUfGwzbXQqbdZA,1906
|
|
@@ -119,8 +119,8 @@ aiq/eval/dataset_handler/dataset_handler.py,sha256=cqdGVgHm6tsKk3TwFcFhptxAvcHVl
|
|
|
119
119
|
aiq/eval/evaluator/__init__.py,sha256=GUJrgGtpvyMUCjUBvR3faAdv-tZzbU9W-izgx9aMEQg,680
|
|
120
120
|
aiq/eval/evaluator/evaluator_model.py,sha256=alO8bVoGmvej1LpN5wZ5HG29TSrL4IMWdVcMew8IOzM,1405
|
|
121
121
|
aiq/eval/rag_evaluator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
122
|
-
aiq/eval/rag_evaluator/evaluate.py,sha256=
|
|
123
|
-
aiq/eval/rag_evaluator/register.py,sha256=
|
|
122
|
+
aiq/eval/rag_evaluator/evaluate.py,sha256=lEjXKiuELAHyWckz-bM91dZ6AT2J6NC7SfvtedR-Qdk,6548
|
|
123
|
+
aiq/eval/rag_evaluator/register.py,sha256=2NzxkgqyoZ4wC8ARj3tiVoE8ENCmplBCIKrNOFh6_VI,5642
|
|
124
124
|
aiq/eval/swe_bench_evaluator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
125
125
|
aiq/eval/swe_bench_evaluator/evaluate.py,sha256=kNukRruq1EM1RsGLvpVuC22xcP0gpn9acF3edGak9vY,9858
|
|
126
126
|
aiq/eval/swe_bench_evaluator/register.py,sha256=sTb74F7w4iuI0ROsEJ4bV13Nt1GEWQn7UvO2O0HXwXk,1537
|
|
@@ -307,10 +307,10 @@ aiq/utils/reactive/base/observer_base.py,sha256=UAlyAY_ky4q2t0P81RVFo2Bs_R7z5Nde
|
|
|
307
307
|
aiq/utils/reactive/base/subject_base.py,sha256=Ed-AC6P7cT3qkW1EXjzbd5M9WpVoeN_9KCe3OM3FLU4,2521
|
|
308
308
|
aiq/utils/settings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
309
309
|
aiq/utils/settings/global_settings.py,sha256=U9TCLdoZsKq5qOVGjREipGVv9e-FlStzqy5zv82_VYk,7454
|
|
310
|
-
aiqtoolkit-1.2.
|
|
311
|
-
aiqtoolkit-1.2.
|
|
312
|
-
aiqtoolkit-1.2.
|
|
313
|
-
aiqtoolkit-1.2.
|
|
314
|
-
aiqtoolkit-1.2.
|
|
315
|
-
aiqtoolkit-1.2.
|
|
316
|
-
aiqtoolkit-1.2.
|
|
310
|
+
aiqtoolkit-1.2.0a20250526.dist-info/licenses/LICENSE-3rd-party.txt,sha256=8o7aySJa9CBvFshPcsRdJbczzdNyDGJ8b0J67WRUQ2k,183936
|
|
311
|
+
aiqtoolkit-1.2.0a20250526.dist-info/licenses/LICENSE.md,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
|
312
|
+
aiqtoolkit-1.2.0a20250526.dist-info/METADATA,sha256=OERp4WxL1pg6wyXVwNFtsG7a6rbpdcpcuFgK8Bx56sg,20174
|
|
313
|
+
aiqtoolkit-1.2.0a20250526.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
|
314
|
+
aiqtoolkit-1.2.0a20250526.dist-info/entry_points.txt,sha256=gRlPfR5g21t328WNEQ4CcEz80S1sJNS8A7rMDYnzl4A,452
|
|
315
|
+
aiqtoolkit-1.2.0a20250526.dist-info/top_level.txt,sha256=fo7AzYcNhZ_tRWrhGumtxwnxMew4xrT1iwouDy_f0Kc,4
|
|
316
|
+
aiqtoolkit-1.2.0a20250526.dist-info/RECORD,,
|
|
File without changes
|
{aiqtoolkit-1.2.0a20250525.dist-info → aiqtoolkit-1.2.0a20250526.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
{aiqtoolkit-1.2.0a20250525.dist-info → aiqtoolkit-1.2.0a20250526.dist-info}/licenses/LICENSE.md
RENAMED
|
File without changes
|
|
File without changes
|