aiqtoolkit 1.2.0a20250630__py3-none-any.whl → 1.2.0a20250702__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiqtoolkit might be problematic. Click here for more details.

@@ -16,6 +16,7 @@
16
16
  import logging
17
17
  from collections.abc import Sequence
18
18
 
19
+ from pydantic import BaseModel
19
20
  from ragas import EvaluationDataset
20
21
  from ragas import SingleTurnSample
21
22
  from ragas.dataset_schema import EvaluationResult
@@ -25,6 +26,7 @@ from tqdm import tqdm
25
26
 
26
27
  from aiq.data_models.intermediate_step import IntermediateStepType
27
28
  from aiq.eval.evaluator.evaluator_model import EvalInput
29
+ from aiq.eval.evaluator.evaluator_model import EvalInputItem
28
30
  from aiq.eval.evaluator.evaluator_model import EvalOutput
29
31
  from aiq.eval.evaluator.evaluator_model import EvalOutputItem
30
32
  from aiq.eval.utils.tqdm_position_registry import TqdmPositionRegistry
@@ -34,13 +36,36 @@ logger = logging.getLogger(__name__)
34
36
 
35
37
  class RAGEvaluator:
36
38
 
37
- def __init__(self, evaluator_llm: LangchainLLMWrapper, metrics: Sequence[Metric], max_concurrency=8):
39
+ def __init__(self,
40
+ evaluator_llm: LangchainLLMWrapper,
41
+ metrics: Sequence[Metric],
42
+ max_concurrency=8,
43
+ input_obj_field: str | None = None):
38
44
  self.evaluator_llm = evaluator_llm
39
45
  self.metrics = metrics
40
46
  self.max_concurrency = max_concurrency
41
-
42
- @staticmethod
43
- def eval_input_to_ragas(eval_input: EvalInput) -> EvaluationDataset:
47
+ self.input_obj_field = input_obj_field
48
+
49
+ def extract_input_obj(self, item: EvalInputItem) -> str:
50
+ """Extracts the input object from EvalInputItem based on the configured input_obj_field."""
51
+ input_obj = item.input_obj
52
+ if isinstance(input_obj, BaseModel):
53
+ if self.input_obj_field and hasattr(input_obj, self.input_obj_field):
54
+ # If input_obj_field is specified, return the value of that field
55
+ return str(getattr(input_obj, self.input_obj_field, ""))
56
+ else:
57
+ # If no input_obj_field is specified, return the string representation of the model
58
+ return input_obj.model_dump_json()
59
+
60
+ if isinstance(input_obj, dict):
61
+ # If input_obj is a dict, return the JSON string representation
62
+ if self.input_obj_field and self.input_obj_field in input_obj:
63
+ # If input_obj_field is specified, return the value of that field
64
+ return str(input_obj[self.input_obj_field])
65
+
66
+ return str(input_obj) # Fallback to string representation of the dict
67
+
68
+ def eval_input_to_ragas(self, eval_input: EvalInput) -> EvaluationDataset:
44
69
  """Converts EvalInput into a Ragas-compatible EvaluationDataset."""
45
70
  from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
46
71
  event_filter = [IntermediateStepType.TOOL_END, IntermediateStepType.LLM_END, IntermediateStepType.CUSTOM_END]
@@ -49,7 +74,7 @@ class RAGEvaluator:
49
74
  intermediate_step_adapter = IntermediateStepAdapter()
50
75
  for item in eval_input.eval_input_items:
51
76
  # Extract required fields from EvalInputItem
52
- user_input = item.input_obj # Assumes input_obj is a string (modify if needed)
77
+ user_input = self.extract_input_obj(item) # Extract input object as string
53
78
  reference = item.expected_output_obj # Reference correct answer
54
79
  response = item.output_obj # Model's generated response
55
80
 
@@ -47,6 +47,8 @@ class RagasEvaluatorConfig(EvaluatorBaseConfig, name="ragas"):
47
47
  # Ragas metric
48
48
  metric: str | dict[str, RagasMetricConfig] = Field(default="AnswerAccuracy",
49
49
  description="RAGAS metric callable with optional 'kwargs:'")
50
+ input_obj_field: str | None = Field(
51
+ default=None, description="The field in the input object that contains the content to evaluate.")
50
52
 
51
53
  @model_validator(mode="before")
52
54
  @classmethod
@@ -133,7 +135,9 @@ async def register_ragas_evaluator(config: RagasEvaluatorConfig, builder: EvalBu
133
135
  metrics.append(metric_callable(**kwargs))
134
136
 
135
137
  # Create the RAG evaluator
136
- _evaluator = RAGEvaluator(evaluator_llm=llm, metrics=metrics,
137
- max_concurrency=builder.get_max_concurrency()) if metrics else None
138
+ _evaluator = RAGEvaluator(evaluator_llm=llm,
139
+ metrics=metrics,
140
+ max_concurrency=builder.get_max_concurrency(),
141
+ input_obj_field=config.input_obj_field) if metrics else None
138
142
 
139
143
  yield EvaluatorInfo(config=config, evaluate_fn=evaluate_fn, description="Evaluator for RAGAS metrics")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aiqtoolkit
3
- Version: 1.2.0a20250630
3
+ Version: 1.2.0a20250702
4
4
  Summary: NVIDIA Agent Intelligence toolkit
5
5
  Author: NVIDIA Corporation
6
6
  Maintainer: NVIDIA Corporation
@@ -121,8 +121,8 @@ aiq/eval/evaluator/__init__.py,sha256=GUJrgGtpvyMUCjUBvR3faAdv-tZzbU9W-izgx9aMEQ
121
121
  aiq/eval/evaluator/base_evaluator.py,sha256=5kqOcTYNecnh9us_XvV58pj5tZI82NGkVN4tg9-R_ZE,3040
122
122
  aiq/eval/evaluator/evaluator_model.py,sha256=5cxe3mqznlNGzv29v_VseYU7OzoT1eTf7hgSPQxytsM,1440
123
123
  aiq/eval/rag_evaluator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
124
- aiq/eval/rag_evaluator/evaluate.py,sha256=lEjXKiuELAHyWckz-bM91dZ6AT2J6NC7SfvtedR-Qdk,6548
125
- aiq/eval/rag_evaluator/register.py,sha256=2NzxkgqyoZ4wC8ARj3tiVoE8ENCmplBCIKrNOFh6_VI,5642
124
+ aiq/eval/rag_evaluator/evaluate.py,sha256=S767ouF5FUFze9jhUyqAPG2aOKzVS_VnAkP4ppE1aNg,7837
125
+ aiq/eval/rag_evaluator/register.py,sha256=vmUxgMJsI42scapLFLvFI6oqXgu9Rl_XhiNedy5-Cqw,5889
126
126
  aiq/eval/swe_bench_evaluator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
127
127
  aiq/eval/swe_bench_evaluator/evaluate.py,sha256=kNukRruq1EM1RsGLvpVuC22xcP0gpn9acF3edGak9vY,9858
128
128
  aiq/eval/swe_bench_evaluator/register.py,sha256=sTb74F7w4iuI0ROsEJ4bV13Nt1GEWQn7UvO2O0HXwXk,1537
@@ -312,10 +312,10 @@ aiq/utils/reactive/base/observer_base.py,sha256=UAlyAY_ky4q2t0P81RVFo2Bs_R7z5Nde
312
312
  aiq/utils/reactive/base/subject_base.py,sha256=Ed-AC6P7cT3qkW1EXjzbd5M9WpVoeN_9KCe3OM3FLU4,2521
313
313
  aiq/utils/settings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
314
314
  aiq/utils/settings/global_settings.py,sha256=U9TCLdoZsKq5qOVGjREipGVv9e-FlStzqy5zv82_VYk,7454
315
- aiqtoolkit-1.2.0a20250630.dist-info/licenses/LICENSE-3rd-party.txt,sha256=8o7aySJa9CBvFshPcsRdJbczzdNyDGJ8b0J67WRUQ2k,183936
316
- aiqtoolkit-1.2.0a20250630.dist-info/licenses/LICENSE.md,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
317
- aiqtoolkit-1.2.0a20250630.dist-info/METADATA,sha256=ZxHMhW-sl0vJbBmisJD6YTReUSVU_ZyIh1hgpOpdioE,20274
318
- aiqtoolkit-1.2.0a20250630.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
319
- aiqtoolkit-1.2.0a20250630.dist-info/entry_points.txt,sha256=gRlPfR5g21t328WNEQ4CcEz80S1sJNS8A7rMDYnzl4A,452
320
- aiqtoolkit-1.2.0a20250630.dist-info/top_level.txt,sha256=fo7AzYcNhZ_tRWrhGumtxwnxMew4xrT1iwouDy_f0Kc,4
321
- aiqtoolkit-1.2.0a20250630.dist-info/RECORD,,
315
+ aiqtoolkit-1.2.0a20250702.dist-info/licenses/LICENSE-3rd-party.txt,sha256=8o7aySJa9CBvFshPcsRdJbczzdNyDGJ8b0J67WRUQ2k,183936
316
+ aiqtoolkit-1.2.0a20250702.dist-info/licenses/LICENSE.md,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
317
+ aiqtoolkit-1.2.0a20250702.dist-info/METADATA,sha256=SSw-cDM0WiHZSrOttYEBIRm__HZZT8zZTqiTQAywcVQ,20274
318
+ aiqtoolkit-1.2.0a20250702.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
319
+ aiqtoolkit-1.2.0a20250702.dist-info/entry_points.txt,sha256=gRlPfR5g21t328WNEQ4CcEz80S1sJNS8A7rMDYnzl4A,452
320
+ aiqtoolkit-1.2.0a20250702.dist-info/top_level.txt,sha256=fo7AzYcNhZ_tRWrhGumtxwnxMew4xrT1iwouDy_f0Kc,4
321
+ aiqtoolkit-1.2.0a20250702.dist-info/RECORD,,