aiqtoolkit 1.2.0a20250525__py3-none-any.whl → 1.2.0a20250527__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiqtoolkit might be problematic. Click here for more details.

@@ -79,15 +79,21 @@ class IntermediateStepAdapter:
79
79
  for step in steps:
80
80
  if step.event_type == IntermediateStepType.LLM_END:
81
81
  last_llm_end_step = step
82
+ action = self.get_agent_action_single(step, "")
83
+ agent_actions.append(action)
82
84
  else:
83
85
  action = self.get_agent_action_single(step, last_llm_end_step)
84
86
  agent_actions.append(action)
85
87
 
86
88
  return agent_actions
87
89
 
88
- def get_context(self, intermediate_steps: list[IntermediateStep]) -> list[str]:
90
+ def get_context(self, intermediate_steps: list[IntermediateStep],
91
+ event_filter: list[IntermediateStepType]) -> list[str]:
89
92
  """Grab the output of all the tools and return them as retrieved context."""
90
- return [
91
- str(step.data.output) for step in intermediate_steps
92
- if step.event_type == IntermediateStepType.TOOL_END and step.data and step.data.output
93
- ]
93
+ count = 0
94
+ agent_actions = []
95
+ for step in intermediate_steps:
96
+ if step.event_type in event_filter and step.data and step.data.output:
97
+ agent_actions.append(f"**Step {count}**\n{str(step.data.output)}")
98
+ count += 1
99
+ return agent_actions
@@ -23,6 +23,7 @@ from ragas.llms import LangchainLLMWrapper
23
23
  from ragas.metrics import Metric
24
24
  from tqdm import tqdm
25
25
 
26
+ from aiq.data_models.intermediate_step import IntermediateStepType
26
27
  from aiq.eval.evaluator.evaluator_model import EvalInput
27
28
  from aiq.eval.evaluator.evaluator_model import EvalOutput
28
29
  from aiq.eval.evaluator.evaluator_model import EvalOutputItem
@@ -33,15 +34,16 @@ logger = logging.getLogger(__name__)
33
34
 
34
35
  class RAGEvaluator:
35
36
 
36
- def __init__(self, evaluator_llm: LangchainLLMWrapper, metrics: Sequence[Metric]):
37
+ def __init__(self, evaluator_llm: LangchainLLMWrapper, metrics: Sequence[Metric], max_concurrency=8):
37
38
  self.evaluator_llm = evaluator_llm
38
39
  self.metrics = metrics
40
+ self.max_concurrency = max_concurrency
39
41
 
40
42
  @staticmethod
41
43
  def eval_input_to_ragas(eval_input: EvalInput) -> EvaluationDataset:
42
44
  """Converts EvalInput into a Ragas-compatible EvaluationDataset."""
43
45
  from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
44
-
46
+ event_filter = [IntermediateStepType.TOOL_END, IntermediateStepType.LLM_END, IntermediateStepType.CUSTOM_END]
45
47
  samples = []
46
48
 
47
49
  intermediate_step_adapter = IntermediateStepAdapter()
@@ -55,7 +57,7 @@ class RAGEvaluator:
55
57
  reference_contexts = [""] # Default to empty context
56
58
  # implement context extraction from expected_trajectory
57
59
 
58
- retrieved_contexts = intermediate_step_adapter.get_context(item.trajectory)
60
+ retrieved_contexts = intermediate_step_adapter.get_context(item.trajectory, event_filter)
59
61
  # implement context extraction from expected_trajectory
60
62
 
61
63
  # Create a SingleTurnSample
@@ -116,6 +118,7 @@ class RAGEvaluator:
116
118
  async def evaluate(self, eval_input: EvalInput) -> EvalOutput:
117
119
  """Run Ragas metrics evaluation on the provided EvalInput"""
118
120
  from ragas import evaluate as ragas_evaluate
121
+ from ragas.run_config import RunConfig
119
122
 
120
123
  ragas_dataset = self.eval_input_to_ragas(eval_input)
121
124
  tqdm_position = TqdmPositionRegistry.claim()
@@ -126,6 +129,7 @@ class RAGEvaluator:
126
129
  metrics=self.metrics,
127
130
  show_progress=True,
128
131
  llm=self.evaluator_llm,
132
+ run_config=RunConfig(max_workers=self.max_concurrency),
129
133
  _pbar=pbar)
130
134
  except Exception as e:
131
135
  # On exception we still continue with other evaluators. Log and return an avg_score of 0.0
@@ -133,6 +133,7 @@ async def register_ragas_evaluator(config: RagasEvaluatorConfig, builder: EvalBu
133
133
  metrics.append(metric_callable(**kwargs))
134
134
 
135
135
  # Create the RAG evaluator
136
- _evaluator = RAGEvaluator(evaluator_llm=llm, metrics=metrics) if metrics else None
136
+ _evaluator = RAGEvaluator(evaluator_llm=llm, metrics=metrics,
137
+ max_concurrency=builder.get_max_concurrency()) if metrics else None
137
138
 
138
139
  yield EvaluatorInfo(config=config, evaluate_fn=evaluate_fn, description="Evaluator for RAGAS metrics")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiqtoolkit
3
- Version: 1.2.0a20250525
3
+ Version: 1.2.0a20250527
4
4
  Summary: NVIDIA Agent Intelligence toolkit
5
5
  Author: NVIDIA Corporation
6
6
  Maintainer: NVIDIA Corporation
@@ -108,7 +108,7 @@ aiq/embedder/register.py,sha256=3MTZrfNQKp6AZTbfaA-PpTnyXiMyu-8HH9JnDCC0v9o,978
108
108
  aiq/eval/__init__.py,sha256=Xs1JQ16L9btwreh4pdGKwskffAw1YFO48jKrU4ib_7c,685
109
109
  aiq/eval/config.py,sha256=IlOr2o618kbkXP0G1F-AklZfsKYVos9UB4Dvlxf66xk,1431
110
110
  aiq/eval/evaluate.py,sha256=WPGLBeJ46mwIlnprbtia1cm2MwMqZ-GskXoTn6R4oV0,14624
111
- aiq/eval/intermediate_step_adapter.py,sha256=D645cfnncBEYc-LhYNwr0NwweCr1kTLENt1pLNvmVvU,4211
111
+ aiq/eval/intermediate_step_adapter.py,sha256=4cSsGgFBvNjXnclk5FvZnQaFEdeulp7VEdRWKLcREAQ,4498
112
112
  aiq/eval/register.py,sha256=QOHJqA2CQixeWMC9InyKbzXo1jByvrntD_m9-2Mvg9k,1076
113
113
  aiq/eval/remote_workflow.py,sha256=Fb7Z6gdP2L_gqyWB--AEWfcXe9xPpQ_hPsf9lmqGXjI,5524
114
114
  aiq/eval/runtime_event_subscriber.py,sha256=2VM8MqmPc_EWPxxrDDR9naiioZirkJUfGwzbXQqbdZA,1906
@@ -119,8 +119,8 @@ aiq/eval/dataset_handler/dataset_handler.py,sha256=cqdGVgHm6tsKk3TwFcFhptxAvcHVl
119
119
  aiq/eval/evaluator/__init__.py,sha256=GUJrgGtpvyMUCjUBvR3faAdv-tZzbU9W-izgx9aMEQg,680
120
120
  aiq/eval/evaluator/evaluator_model.py,sha256=alO8bVoGmvej1LpN5wZ5HG29TSrL4IMWdVcMew8IOzM,1405
121
121
  aiq/eval/rag_evaluator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
122
- aiq/eval/rag_evaluator/evaluate.py,sha256=sxOVHxSOSYvTWFV9jOTpGDrwf2PwkuI3iu_cFj5XbgU,6136
123
- aiq/eval/rag_evaluator/register.py,sha256=1SgbdMmMvk77Kh6BhP-XvWO2_8WEHEDtcHyoL8qwY_E,5565
122
+ aiq/eval/rag_evaluator/evaluate.py,sha256=lEjXKiuELAHyWckz-bM91dZ6AT2J6NC7SfvtedR-Qdk,6548
123
+ aiq/eval/rag_evaluator/register.py,sha256=2NzxkgqyoZ4wC8ARj3tiVoE8ENCmplBCIKrNOFh6_VI,5642
124
124
  aiq/eval/swe_bench_evaluator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
125
125
  aiq/eval/swe_bench_evaluator/evaluate.py,sha256=kNukRruq1EM1RsGLvpVuC22xcP0gpn9acF3edGak9vY,9858
126
126
  aiq/eval/swe_bench_evaluator/register.py,sha256=sTb74F7w4iuI0ROsEJ4bV13Nt1GEWQn7UvO2O0HXwXk,1537
@@ -307,10 +307,10 @@ aiq/utils/reactive/base/observer_base.py,sha256=UAlyAY_ky4q2t0P81RVFo2Bs_R7z5Nde
307
307
  aiq/utils/reactive/base/subject_base.py,sha256=Ed-AC6P7cT3qkW1EXjzbd5M9WpVoeN_9KCe3OM3FLU4,2521
308
308
  aiq/utils/settings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
309
309
  aiq/utils/settings/global_settings.py,sha256=U9TCLdoZsKq5qOVGjREipGVv9e-FlStzqy5zv82_VYk,7454
310
- aiqtoolkit-1.2.0a20250525.dist-info/licenses/LICENSE-3rd-party.txt,sha256=8o7aySJa9CBvFshPcsRdJbczzdNyDGJ8b0J67WRUQ2k,183936
311
- aiqtoolkit-1.2.0a20250525.dist-info/licenses/LICENSE.md,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
312
- aiqtoolkit-1.2.0a20250525.dist-info/METADATA,sha256=VtrrrJKL0ZyCGxF6LN-DCjzOMOUl1YcmDH0WtWGgzMc,20174
313
- aiqtoolkit-1.2.0a20250525.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
314
- aiqtoolkit-1.2.0a20250525.dist-info/entry_points.txt,sha256=gRlPfR5g21t328WNEQ4CcEz80S1sJNS8A7rMDYnzl4A,452
315
- aiqtoolkit-1.2.0a20250525.dist-info/top_level.txt,sha256=fo7AzYcNhZ_tRWrhGumtxwnxMew4xrT1iwouDy_f0Kc,4
316
- aiqtoolkit-1.2.0a20250525.dist-info/RECORD,,
310
+ aiqtoolkit-1.2.0a20250527.dist-info/licenses/LICENSE-3rd-party.txt,sha256=8o7aySJa9CBvFshPcsRdJbczzdNyDGJ8b0J67WRUQ2k,183936
311
+ aiqtoolkit-1.2.0a20250527.dist-info/licenses/LICENSE.md,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
312
+ aiqtoolkit-1.2.0a20250527.dist-info/METADATA,sha256=hYPjikwtuznnhuOiEVVgsZ9gkR5-1PfHfPcx73ltznQ,20174
313
+ aiqtoolkit-1.2.0a20250527.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
314
+ aiqtoolkit-1.2.0a20250527.dist-info/entry_points.txt,sha256=gRlPfR5g21t328WNEQ4CcEz80S1sJNS8A7rMDYnzl4A,452
315
+ aiqtoolkit-1.2.0a20250527.dist-info/top_level.txt,sha256=fo7AzYcNhZ_tRWrhGumtxwnxMew4xrT1iwouDy_f0Kc,4
316
+ aiqtoolkit-1.2.0a20250527.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5