aiqtoolkit 1.1.0a20250516__py3-none-any.whl → 1.1.0a20251020__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (319) hide show
  1. aiqtoolkit-1.1.0a20251020.dist-info/METADATA +37 -0
  2. aiqtoolkit-1.1.0a20251020.dist-info/RECORD +4 -0
  3. {aiqtoolkit-1.1.0a20250516.dist-info → aiqtoolkit-1.1.0a20251020.dist-info}/WHEEL +1 -1
  4. aiqtoolkit-1.1.0a20251020.dist-info/top_level.txt +1 -0
  5. aiq/agent/__init__.py +0 -0
  6. aiq/agent/base.py +0 -76
  7. aiq/agent/dual_node.py +0 -67
  8. aiq/agent/react_agent/__init__.py +0 -0
  9. aiq/agent/react_agent/agent.py +0 -322
  10. aiq/agent/react_agent/output_parser.py +0 -104
  11. aiq/agent/react_agent/prompt.py +0 -46
  12. aiq/agent/react_agent/register.py +0 -148
  13. aiq/agent/reasoning_agent/__init__.py +0 -0
  14. aiq/agent/reasoning_agent/reasoning_agent.py +0 -224
  15. aiq/agent/register.py +0 -23
  16. aiq/agent/rewoo_agent/__init__.py +0 -0
  17. aiq/agent/rewoo_agent/agent.py +0 -410
  18. aiq/agent/rewoo_agent/prompt.py +0 -108
  19. aiq/agent/rewoo_agent/register.py +0 -158
  20. aiq/agent/tool_calling_agent/__init__.py +0 -0
  21. aiq/agent/tool_calling_agent/agent.py +0 -123
  22. aiq/agent/tool_calling_agent/register.py +0 -105
  23. aiq/builder/__init__.py +0 -0
  24. aiq/builder/builder.py +0 -223
  25. aiq/builder/component_utils.py +0 -303
  26. aiq/builder/context.py +0 -227
  27. aiq/builder/embedder.py +0 -24
  28. aiq/builder/eval_builder.py +0 -120
  29. aiq/builder/evaluator.py +0 -29
  30. aiq/builder/framework_enum.py +0 -24
  31. aiq/builder/front_end.py +0 -73
  32. aiq/builder/function.py +0 -297
  33. aiq/builder/function_base.py +0 -376
  34. aiq/builder/function_info.py +0 -627
  35. aiq/builder/intermediate_step_manager.py +0 -176
  36. aiq/builder/llm.py +0 -25
  37. aiq/builder/retriever.py +0 -25
  38. aiq/builder/user_interaction_manager.py +0 -71
  39. aiq/builder/workflow.py +0 -143
  40. aiq/builder/workflow_builder.py +0 -757
  41. aiq/cli/__init__.py +0 -14
  42. aiq/cli/cli_utils/__init__.py +0 -0
  43. aiq/cli/cli_utils/config_override.py +0 -231
  44. aiq/cli/cli_utils/validation.py +0 -37
  45. aiq/cli/commands/__init__.py +0 -0
  46. aiq/cli/commands/configure/__init__.py +0 -0
  47. aiq/cli/commands/configure/channel/__init__.py +0 -0
  48. aiq/cli/commands/configure/channel/add.py +0 -28
  49. aiq/cli/commands/configure/channel/channel.py +0 -36
  50. aiq/cli/commands/configure/channel/remove.py +0 -30
  51. aiq/cli/commands/configure/channel/update.py +0 -30
  52. aiq/cli/commands/configure/configure.py +0 -33
  53. aiq/cli/commands/evaluate.py +0 -139
  54. aiq/cli/commands/info/__init__.py +0 -14
  55. aiq/cli/commands/info/info.py +0 -39
  56. aiq/cli/commands/info/list_channels.py +0 -32
  57. aiq/cli/commands/info/list_components.py +0 -129
  58. aiq/cli/commands/info/list_mcp.py +0 -126
  59. aiq/cli/commands/registry/__init__.py +0 -14
  60. aiq/cli/commands/registry/publish.py +0 -88
  61. aiq/cli/commands/registry/pull.py +0 -118
  62. aiq/cli/commands/registry/registry.py +0 -38
  63. aiq/cli/commands/registry/remove.py +0 -108
  64. aiq/cli/commands/registry/search.py +0 -155
  65. aiq/cli/commands/start.py +0 -250
  66. aiq/cli/commands/uninstall.py +0 -83
  67. aiq/cli/commands/validate.py +0 -47
  68. aiq/cli/commands/workflow/__init__.py +0 -14
  69. aiq/cli/commands/workflow/templates/__init__.py.j2 +0 -0
  70. aiq/cli/commands/workflow/templates/config.yml.j2 +0 -16
  71. aiq/cli/commands/workflow/templates/pyproject.toml.j2 +0 -22
  72. aiq/cli/commands/workflow/templates/register.py.j2 +0 -5
  73. aiq/cli/commands/workflow/templates/workflow.py.j2 +0 -36
  74. aiq/cli/commands/workflow/workflow.py +0 -37
  75. aiq/cli/commands/workflow/workflow_commands.py +0 -313
  76. aiq/cli/entrypoint.py +0 -133
  77. aiq/cli/main.py +0 -44
  78. aiq/cli/register_workflow.py +0 -408
  79. aiq/cli/type_registry.py +0 -879
  80. aiq/data_models/__init__.py +0 -14
  81. aiq/data_models/api_server.py +0 -588
  82. aiq/data_models/common.py +0 -143
  83. aiq/data_models/component.py +0 -46
  84. aiq/data_models/component_ref.py +0 -135
  85. aiq/data_models/config.py +0 -349
  86. aiq/data_models/dataset_handler.py +0 -122
  87. aiq/data_models/discovery_metadata.py +0 -286
  88. aiq/data_models/embedder.py +0 -26
  89. aiq/data_models/evaluate.py +0 -104
  90. aiq/data_models/evaluator.py +0 -26
  91. aiq/data_models/front_end.py +0 -26
  92. aiq/data_models/function.py +0 -30
  93. aiq/data_models/function_dependencies.py +0 -64
  94. aiq/data_models/interactive.py +0 -237
  95. aiq/data_models/intermediate_step.py +0 -269
  96. aiq/data_models/invocation_node.py +0 -38
  97. aiq/data_models/llm.py +0 -26
  98. aiq/data_models/logging.py +0 -26
  99. aiq/data_models/memory.py +0 -26
  100. aiq/data_models/profiler.py +0 -53
  101. aiq/data_models/registry_handler.py +0 -26
  102. aiq/data_models/retriever.py +0 -30
  103. aiq/data_models/step_adaptor.py +0 -64
  104. aiq/data_models/streaming.py +0 -33
  105. aiq/data_models/swe_bench_model.py +0 -54
  106. aiq/data_models/telemetry_exporter.py +0 -26
  107. aiq/embedder/__init__.py +0 -0
  108. aiq/embedder/langchain_client.py +0 -41
  109. aiq/embedder/nim_embedder.py +0 -58
  110. aiq/embedder/openai_embedder.py +0 -42
  111. aiq/embedder/register.py +0 -24
  112. aiq/eval/__init__.py +0 -14
  113. aiq/eval/config.py +0 -42
  114. aiq/eval/dataset_handler/__init__.py +0 -0
  115. aiq/eval/dataset_handler/dataset_downloader.py +0 -106
  116. aiq/eval/dataset_handler/dataset_filter.py +0 -52
  117. aiq/eval/dataset_handler/dataset_handler.py +0 -169
  118. aiq/eval/evaluate.py +0 -325
  119. aiq/eval/evaluator/__init__.py +0 -14
  120. aiq/eval/evaluator/evaluator_model.py +0 -44
  121. aiq/eval/intermediate_step_adapter.py +0 -93
  122. aiq/eval/rag_evaluator/__init__.py +0 -0
  123. aiq/eval/rag_evaluator/evaluate.py +0 -138
  124. aiq/eval/rag_evaluator/register.py +0 -138
  125. aiq/eval/register.py +0 -23
  126. aiq/eval/remote_workflow.py +0 -128
  127. aiq/eval/runtime_event_subscriber.py +0 -52
  128. aiq/eval/swe_bench_evaluator/__init__.py +0 -0
  129. aiq/eval/swe_bench_evaluator/evaluate.py +0 -215
  130. aiq/eval/swe_bench_evaluator/register.py +0 -36
  131. aiq/eval/trajectory_evaluator/__init__.py +0 -0
  132. aiq/eval/trajectory_evaluator/evaluate.py +0 -118
  133. aiq/eval/trajectory_evaluator/register.py +0 -40
  134. aiq/eval/tunable_rag_evaluator/__init__.py +0 -0
  135. aiq/eval/tunable_rag_evaluator/evaluate.py +0 -263
  136. aiq/eval/tunable_rag_evaluator/register.py +0 -50
  137. aiq/eval/utils/__init__.py +0 -0
  138. aiq/eval/utils/output_uploader.py +0 -131
  139. aiq/eval/utils/tqdm_position_registry.py +0 -40
  140. aiq/front_ends/__init__.py +0 -14
  141. aiq/front_ends/console/__init__.py +0 -14
  142. aiq/front_ends/console/console_front_end_config.py +0 -32
  143. aiq/front_ends/console/console_front_end_plugin.py +0 -107
  144. aiq/front_ends/console/register.py +0 -25
  145. aiq/front_ends/cron/__init__.py +0 -14
  146. aiq/front_ends/fastapi/__init__.py +0 -14
  147. aiq/front_ends/fastapi/fastapi_front_end_config.py +0 -150
  148. aiq/front_ends/fastapi/fastapi_front_end_plugin.py +0 -103
  149. aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +0 -607
  150. aiq/front_ends/fastapi/intermediate_steps_subscriber.py +0 -80
  151. aiq/front_ends/fastapi/job_store.py +0 -161
  152. aiq/front_ends/fastapi/main.py +0 -70
  153. aiq/front_ends/fastapi/message_handler.py +0 -279
  154. aiq/front_ends/fastapi/message_validator.py +0 -345
  155. aiq/front_ends/fastapi/register.py +0 -25
  156. aiq/front_ends/fastapi/response_helpers.py +0 -195
  157. aiq/front_ends/fastapi/step_adaptor.py +0 -320
  158. aiq/front_ends/fastapi/websocket.py +0 -148
  159. aiq/front_ends/mcp/__init__.py +0 -14
  160. aiq/front_ends/mcp/mcp_front_end_config.py +0 -32
  161. aiq/front_ends/mcp/mcp_front_end_plugin.py +0 -93
  162. aiq/front_ends/mcp/register.py +0 -27
  163. aiq/front_ends/mcp/tool_converter.py +0 -242
  164. aiq/front_ends/register.py +0 -22
  165. aiq/front_ends/simple_base/__init__.py +0 -14
  166. aiq/front_ends/simple_base/simple_front_end_plugin_base.py +0 -52
  167. aiq/llm/__init__.py +0 -0
  168. aiq/llm/nim_llm.py +0 -45
  169. aiq/llm/openai_llm.py +0 -45
  170. aiq/llm/register.py +0 -22
  171. aiq/llm/utils/__init__.py +0 -14
  172. aiq/llm/utils/env_config_value.py +0 -94
  173. aiq/llm/utils/error.py +0 -17
  174. aiq/memory/__init__.py +0 -20
  175. aiq/memory/interfaces.py +0 -183
  176. aiq/memory/models.py +0 -112
  177. aiq/meta/module_to_distro.json +0 -3
  178. aiq/meta/pypi.md +0 -58
  179. aiq/observability/__init__.py +0 -0
  180. aiq/observability/async_otel_listener.py +0 -429
  181. aiq/observability/register.py +0 -99
  182. aiq/plugins/.namespace +0 -1
  183. aiq/profiler/__init__.py +0 -0
  184. aiq/profiler/callbacks/__init__.py +0 -0
  185. aiq/profiler/callbacks/agno_callback_handler.py +0 -295
  186. aiq/profiler/callbacks/base_callback_class.py +0 -20
  187. aiq/profiler/callbacks/langchain_callback_handler.py +0 -278
  188. aiq/profiler/callbacks/llama_index_callback_handler.py +0 -205
  189. aiq/profiler/callbacks/semantic_kernel_callback_handler.py +0 -238
  190. aiq/profiler/callbacks/token_usage_base_model.py +0 -27
  191. aiq/profiler/data_frame_row.py +0 -51
  192. aiq/profiler/decorators/__init__.py +0 -0
  193. aiq/profiler/decorators/framework_wrapper.py +0 -131
  194. aiq/profiler/decorators/function_tracking.py +0 -254
  195. aiq/profiler/forecasting/__init__.py +0 -0
  196. aiq/profiler/forecasting/config.py +0 -18
  197. aiq/profiler/forecasting/model_trainer.py +0 -75
  198. aiq/profiler/forecasting/models/__init__.py +0 -22
  199. aiq/profiler/forecasting/models/forecasting_base_model.py +0 -40
  200. aiq/profiler/forecasting/models/linear_model.py +0 -196
  201. aiq/profiler/forecasting/models/random_forest_regressor.py +0 -268
  202. aiq/profiler/inference_metrics_model.py +0 -25
  203. aiq/profiler/inference_optimization/__init__.py +0 -0
  204. aiq/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
  205. aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +0 -452
  206. aiq/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +0 -258
  207. aiq/profiler/inference_optimization/data_models.py +0 -386
  208. aiq/profiler/inference_optimization/experimental/__init__.py +0 -0
  209. aiq/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +0 -468
  210. aiq/profiler/inference_optimization/experimental/prefix_span_analysis.py +0 -405
  211. aiq/profiler/inference_optimization/llm_metrics.py +0 -212
  212. aiq/profiler/inference_optimization/prompt_caching.py +0 -163
  213. aiq/profiler/inference_optimization/token_uniqueness.py +0 -107
  214. aiq/profiler/inference_optimization/workflow_runtimes.py +0 -72
  215. aiq/profiler/intermediate_property_adapter.py +0 -102
  216. aiq/profiler/profile_runner.py +0 -433
  217. aiq/profiler/utils.py +0 -184
  218. aiq/registry_handlers/__init__.py +0 -0
  219. aiq/registry_handlers/local/__init__.py +0 -0
  220. aiq/registry_handlers/local/local_handler.py +0 -176
  221. aiq/registry_handlers/local/register_local.py +0 -37
  222. aiq/registry_handlers/metadata_factory.py +0 -60
  223. aiq/registry_handlers/package_utils.py +0 -198
  224. aiq/registry_handlers/pypi/__init__.py +0 -0
  225. aiq/registry_handlers/pypi/pypi_handler.py +0 -251
  226. aiq/registry_handlers/pypi/register_pypi.py +0 -40
  227. aiq/registry_handlers/register.py +0 -21
  228. aiq/registry_handlers/registry_handler_base.py +0 -157
  229. aiq/registry_handlers/rest/__init__.py +0 -0
  230. aiq/registry_handlers/rest/register_rest.py +0 -56
  231. aiq/registry_handlers/rest/rest_handler.py +0 -237
  232. aiq/registry_handlers/schemas/__init__.py +0 -0
  233. aiq/registry_handlers/schemas/headers.py +0 -42
  234. aiq/registry_handlers/schemas/package.py +0 -68
  235. aiq/registry_handlers/schemas/publish.py +0 -63
  236. aiq/registry_handlers/schemas/pull.py +0 -82
  237. aiq/registry_handlers/schemas/remove.py +0 -36
  238. aiq/registry_handlers/schemas/search.py +0 -91
  239. aiq/registry_handlers/schemas/status.py +0 -47
  240. aiq/retriever/__init__.py +0 -0
  241. aiq/retriever/interface.py +0 -37
  242. aiq/retriever/milvus/__init__.py +0 -14
  243. aiq/retriever/milvus/register.py +0 -81
  244. aiq/retriever/milvus/retriever.py +0 -228
  245. aiq/retriever/models.py +0 -74
  246. aiq/retriever/nemo_retriever/__init__.py +0 -14
  247. aiq/retriever/nemo_retriever/register.py +0 -60
  248. aiq/retriever/nemo_retriever/retriever.py +0 -190
  249. aiq/retriever/register.py +0 -22
  250. aiq/runtime/__init__.py +0 -14
  251. aiq/runtime/loader.py +0 -188
  252. aiq/runtime/runner.py +0 -176
  253. aiq/runtime/session.py +0 -140
  254. aiq/runtime/user_metadata.py +0 -131
  255. aiq/settings/__init__.py +0 -0
  256. aiq/settings/global_settings.py +0 -318
  257. aiq/test/.namespace +0 -1
  258. aiq/tool/__init__.py +0 -0
  259. aiq/tool/code_execution/__init__.py +0 -0
  260. aiq/tool/code_execution/code_sandbox.py +0 -188
  261. aiq/tool/code_execution/local_sandbox/Dockerfile.sandbox +0 -60
  262. aiq/tool/code_execution/local_sandbox/__init__.py +0 -13
  263. aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +0 -83
  264. aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +0 -4
  265. aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +0 -25
  266. aiq/tool/code_execution/register.py +0 -70
  267. aiq/tool/code_execution/utils.py +0 -100
  268. aiq/tool/datetime_tools.py +0 -42
  269. aiq/tool/document_search.py +0 -141
  270. aiq/tool/github_tools/__init__.py +0 -0
  271. aiq/tool/github_tools/create_github_commit.py +0 -133
  272. aiq/tool/github_tools/create_github_issue.py +0 -87
  273. aiq/tool/github_tools/create_github_pr.py +0 -106
  274. aiq/tool/github_tools/get_github_file.py +0 -106
  275. aiq/tool/github_tools/get_github_issue.py +0 -166
  276. aiq/tool/github_tools/get_github_pr.py +0 -256
  277. aiq/tool/github_tools/update_github_issue.py +0 -100
  278. aiq/tool/mcp/__init__.py +0 -14
  279. aiq/tool/mcp/mcp_client.py +0 -220
  280. aiq/tool/mcp/mcp_tool.py +0 -95
  281. aiq/tool/memory_tools/__init__.py +0 -0
  282. aiq/tool/memory_tools/add_memory_tool.py +0 -79
  283. aiq/tool/memory_tools/delete_memory_tool.py +0 -67
  284. aiq/tool/memory_tools/get_memory_tool.py +0 -72
  285. aiq/tool/nvidia_rag.py +0 -95
  286. aiq/tool/register.py +0 -37
  287. aiq/tool/retriever.py +0 -89
  288. aiq/tool/server_tools.py +0 -63
  289. aiq/utils/__init__.py +0 -0
  290. aiq/utils/data_models/__init__.py +0 -0
  291. aiq/utils/data_models/schema_validator.py +0 -58
  292. aiq/utils/debugging_utils.py +0 -43
  293. aiq/utils/exception_handlers/__init__.py +0 -0
  294. aiq/utils/exception_handlers/schemas.py +0 -114
  295. aiq/utils/io/__init__.py +0 -0
  296. aiq/utils/io/yaml_tools.py +0 -119
  297. aiq/utils/metadata_utils.py +0 -74
  298. aiq/utils/optional_imports.py +0 -142
  299. aiq/utils/producer_consumer_queue.py +0 -178
  300. aiq/utils/reactive/__init__.py +0 -0
  301. aiq/utils/reactive/base/__init__.py +0 -0
  302. aiq/utils/reactive/base/observable_base.py +0 -65
  303. aiq/utils/reactive/base/observer_base.py +0 -55
  304. aiq/utils/reactive/base/subject_base.py +0 -79
  305. aiq/utils/reactive/observable.py +0 -59
  306. aiq/utils/reactive/observer.py +0 -76
  307. aiq/utils/reactive/subject.py +0 -131
  308. aiq/utils/reactive/subscription.py +0 -49
  309. aiq/utils/settings/__init__.py +0 -0
  310. aiq/utils/settings/global_settings.py +0 -197
  311. aiq/utils/type_converter.py +0 -232
  312. aiq/utils/type_utils.py +0 -397
  313. aiq/utils/url_utils.py +0 -27
  314. aiqtoolkit-1.1.0a20250516.dist-info/METADATA +0 -331
  315. aiqtoolkit-1.1.0a20250516.dist-info/RECORD +0 -316
  316. aiqtoolkit-1.1.0a20250516.dist-info/entry_points.txt +0 -17
  317. aiqtoolkit-1.1.0a20250516.dist-info/licenses/LICENSE-3rd-party.txt +0 -3686
  318. aiqtoolkit-1.1.0a20250516.dist-info/licenses/LICENSE.md +0 -201
  319. aiqtoolkit-1.1.0a20250516.dist-info/top_level.txt +0 -1
@@ -1,138 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
-
16
- import logging
17
- from collections.abc import Sequence
18
-
19
- from ragas import EvaluationDataset
20
- from ragas import SingleTurnSample
21
- from ragas.dataset_schema import EvaluationResult
22
- from ragas.llms import LangchainLLMWrapper
23
- from ragas.metrics import Metric
24
- from tqdm import tqdm
25
-
26
- from aiq.eval.evaluator.evaluator_model import EvalInput
27
- from aiq.eval.evaluator.evaluator_model import EvalOutput
28
- from aiq.eval.evaluator.evaluator_model import EvalOutputItem
29
- from aiq.eval.utils.tqdm_position_registry import TqdmPositionRegistry
30
-
31
- logger = logging.getLogger(__name__)
32
-
33
-
34
- class RAGEvaluator:
35
-
36
- def __init__(self, evaluator_llm: LangchainLLMWrapper, metrics: Sequence[Metric]):
37
- self.evaluator_llm = evaluator_llm
38
- self.metrics = metrics
39
-
40
- @staticmethod
41
- def eval_input_to_ragas(eval_input: EvalInput) -> EvaluationDataset:
42
- """Converts EvalInput into a Ragas-compatible EvaluationDataset."""
43
- from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
44
-
45
- samples = []
46
-
47
- intermediate_step_adapter = IntermediateStepAdapter()
48
- for item in eval_input.eval_input_items:
49
- # Extract required fields from EvalInputItem
50
- user_input = item.input_obj # Assumes input_obj is a string (modify if needed)
51
- reference = item.expected_output_obj # Reference correct answer
52
- response = item.output_obj # Model's generated response
53
-
54
- # Handle context extraction from trajectory if available
55
- reference_contexts = [""] # Default to empty context
56
- # implement context extraction from expected_trajectory
57
-
58
- retrieved_contexts = intermediate_step_adapter.get_context(item.trajectory)
59
- # implement context extraction from expected_trajectory
60
-
61
- # Create a SingleTurnSample
62
- sample = SingleTurnSample(
63
- user_input=user_input,
64
- reference=reference,
65
- response=response,
66
- reference_contexts=reference_contexts,
67
- retrieved_contexts=retrieved_contexts,
68
- )
69
- samples.append(sample)
70
-
71
- return EvaluationDataset(samples=samples)
72
-
73
- def ragas_to_eval_output(self, eval_input: EvalInput, results_dataset: EvaluationResult | None) -> EvalOutput:
74
- """Converts the ragas EvaluationResult to aiq EvalOutput"""
75
-
76
- if not results_dataset:
77
- logger.error("Ragas evaluation failed with no results")
78
- return EvalOutput(average_score=0.0, eval_output_items=[])
79
-
80
- scores: list[dict[str, float]] = results_dataset.scores
81
- if not scores:
82
- logger.error("Ragas returned empty score list")
83
- return EvalOutput(average_score=0.0, eval_output_items=[])
84
-
85
- # Convert from list of dicts to dict of lists
86
- scores_dict = {metric: [score[metric] for score in scores] for metric in scores[0]}
87
-
88
- # Compute the average of each metric
89
- average_scores = {metric: sum(values) / len(values) for metric, values in scores_dict.items()}
90
-
91
- # Extract the first (and only) metric's average score
92
- first_avg_score = next(iter(average_scores.values()))
93
- first_metric_name = list(scores_dict.keys())[0]
94
-
95
- df = results_dataset.to_pandas()
96
- # Get id from eval_input if df size matches number of eval_input_items
97
- if len(eval_input.eval_input_items) >= len(df):
98
- ids = [item.id for item in eval_input.eval_input_items] # Extract IDs
99
- else:
100
- ids = df["user_input"].tolist() # Use "user_input" as ID fallback
101
-
102
- # Construct EvalOutputItem list
103
- eval_output_items = [
104
- EvalOutputItem(
105
- id=ids[i],
106
- score=getattr(row, first_metric_name, 0.0),
107
- reasoning={
108
- key:
109
- getattr(row, key, None) # Use getattr to safely access attributes
110
- for key in ["user_input", "reference", "response", "retrieved_contexts"]
111
- }) for i, row in enumerate(df.itertuples(index=False))
112
- ]
113
- # Return EvalOutput
114
- return EvalOutput(average_score=first_avg_score, eval_output_items=eval_output_items)
115
-
116
- async def evaluate(self, eval_input: EvalInput) -> EvalOutput:
117
- """Run Ragas metrics evaluation on the provided EvalInput"""
118
- from ragas import evaluate as ragas_evaluate
119
-
120
- ragas_dataset = self.eval_input_to_ragas(eval_input)
121
- tqdm_position = TqdmPositionRegistry.claim()
122
- first_metric_name = self.metrics[0].name
123
- pbar = tqdm(total=len(ragas_dataset), desc=f"Evaluating Ragas {first_metric_name}", position=tqdm_position)
124
- try:
125
- results_dataset = ragas_evaluate(dataset=ragas_dataset,
126
- metrics=self.metrics,
127
- show_progress=True,
128
- llm=self.evaluator_llm,
129
- _pbar=pbar)
130
- except Exception as e:
131
- # On exception we still continue with other evaluators. Log and return an avg_score of 0.0
132
- logger.exception("Error evaluating ragas metric, Error: %s", e, exc_info=True)
133
- results_dataset = None
134
- finally:
135
- pbar.close()
136
- TqdmPositionRegistry.release(tqdm_position)
137
-
138
- return self.ragas_to_eval_output(eval_input, results_dataset)
@@ -1,138 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
-
16
- import logging
17
-
18
- from pydantic import BaseModel
19
- from pydantic import Field
20
- from pydantic import model_validator
21
-
22
- from aiq.builder.builder import EvalBuilder
23
- from aiq.builder.evaluator import EvaluatorInfo
24
- from aiq.builder.framework_enum import LLMFrameworkEnum
25
- from aiq.cli.register_workflow import register_evaluator
26
- from aiq.data_models.evaluator import EvaluatorBaseConfig
27
- from aiq.eval.evaluator.evaluator_model import EvalInput
28
- from aiq.eval.evaluator.evaluator_model import EvalOutput
29
-
30
- logger = logging.getLogger(__name__)
31
-
32
-
33
- class RagasMetricConfig(BaseModel):
34
- ''' RAGAS metrics configuration
35
- skip: Allows the metric config to be present but not used
36
- kwargs: Additional arguments to pass to the metric's callable
37
- '''
38
- skip: bool = False
39
- # kwargs specific to the metric's callable
40
- kwargs: dict | None = None
41
-
42
-
43
- class RagasEvaluatorConfig(EvaluatorBaseConfig, name="ragas"):
44
- """Evaluation using RAGAS metrics."""
45
-
46
- llm_name: str = Field(description="LLM as a judge.")
47
- # Ragas metric
48
- metric: str | dict[str, RagasMetricConfig] = Field(default="AnswerAccuracy",
49
- description="RAGAS metric callable with optional 'kwargs:'")
50
-
51
- @model_validator(mode="before")
52
- @classmethod
53
- def validate_metric(cls, values):
54
- """Ensures metric is either a string or a single-item dictionary."""
55
- metric = values.get("metric")
56
-
57
- if isinstance(metric, dict):
58
- if len(metric) != 1:
59
- raise ValueError("Only one metric is allowed in the configuration.")
60
- _, value = next(iter(metric.items()))
61
- if not isinstance(value, dict):
62
- raise ValueError("Metric value must be a RagasMetricConfig object.")
63
- elif not isinstance(metric, str):
64
- raise ValueError("Metric must be either a string or a single-item dictionary.")
65
-
66
- return values
67
-
68
- @property
69
- def metric_name(self) -> str:
70
- """Returns the single metric name."""
71
- if isinstance(self.metric, str):
72
- return self.metric
73
- if isinstance(self.metric, dict) and self.metric:
74
- return next(iter(self.metric.keys())) # pylint: disable=no-member
75
- return ""
76
-
77
- @property
78
- def metric_config(self) -> RagasMetricConfig:
79
- """Returns the metric configuration (or a default if only a string is provided)."""
80
- if isinstance(self.metric, str):
81
- return RagasMetricConfig() # Default config when only a metric name is given
82
- if isinstance(self.metric, dict) and self.metric:
83
- return next(iter(self.metric.values())) # pylint: disable=no-member
84
- return RagasMetricConfig() # Default config when an invalid type is provided
85
-
86
-
87
- @register_evaluator(config_type=RagasEvaluatorConfig)
88
- async def register_ragas_evaluator(config: RagasEvaluatorConfig, builder: EvalBuilder):
89
- from ragas.metrics import Metric
90
-
91
- def get_ragas_metric(metric_name: str) -> Metric | None:
92
- """
93
- Fetch callable for RAGAS metrics
94
- """
95
- try:
96
- import ragas.metrics as ragas_metrics
97
-
98
- return getattr(ragas_metrics, metric_name)
99
- except ImportError as e:
100
- message = f"Ragas metrics not found {e}."
101
- logger.error(message)
102
- raise ValueError(message) from e
103
- except AttributeError as e:
104
- message = f"Ragas metric {metric_name} not found {e}."
105
- logger.error(message)
106
- return None
107
-
108
- async def evaluate_fn(eval_input: EvalInput) -> EvalOutput:
109
- '''Run the RAGAS evaluation and return the average scores and evaluation results dataframe'''
110
- if not _evaluator:
111
- logger.warning("No evaluator found for RAGAS metrics.")
112
- # return empty results if no evaluator is found
113
- return EvalOutput(average_score=0.0, eval_output_items=[])
114
-
115
- return await _evaluator.evaluate(eval_input)
116
-
117
- from .evaluate import RAGEvaluator
118
-
119
- # Get LLM
120
- llm = await builder.get_llm(config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
121
-
122
- # Get RAGAS metric callable from the metric config and create a list of metric-callables
123
- metrics = []
124
- # currently only one metric is supported
125
- metric_name = config.metric_name # Extracts the metric name
126
- metric_config = config.metric_config # Extracts the config (handles str/dict cases)
127
-
128
- # Skip if `skip` is True
129
- if not metric_config.skip:
130
- metric_callable = get_ragas_metric(metric_name)
131
- if metric_callable:
132
- kwargs = metric_config.kwargs or {}
133
- metrics.append(metric_callable(**kwargs))
134
-
135
- # Create the RAG evaluator
136
- _evaluator = RAGEvaluator(evaluator_llm=llm, metrics=metrics) if metrics else None
137
-
138
- yield EvaluatorInfo(config=config, evaluate_fn=evaluate_fn, description="Evaluator for RAGAS metrics")
aiq/eval/register.py DELETED
@@ -1,23 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
-
16
- # flake8: noqa
17
- # pylint: disable=unused-import
18
-
19
- # Import evaluators which need to be automatically registered here
20
- from .rag_evaluator.register import register_ragas_evaluator
21
- from .swe_bench_evaluator.register import register_swe_bench_evaluator
22
- from .trajectory_evaluator.register import register_trajectory_evaluator
23
- from .tunable_rag_evaluator.register import register_tunable_rag_evaluator
@@ -1,128 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
-
16
- import asyncio
17
- import json
18
- import logging
19
-
20
- import aiohttp
21
- from pydantic import ValidationError
22
- from tqdm import tqdm
23
-
24
- from aiq.data_models.api_server import AIQResponseIntermediateStep
25
- from aiq.data_models.intermediate_step import IntermediateStep
26
- from aiq.data_models.intermediate_step import IntermediateStepPayload
27
- from aiq.eval.config import EvaluationRunConfig
28
- from aiq.eval.evaluator.evaluator_model import EvalInput
29
- from aiq.eval.evaluator.evaluator_model import EvalInputItem
30
-
31
- logger = logging.getLogger(__name__)
32
-
33
- # Constants for streaming response prefixes
34
- DATA_PREFIX = "data: "
35
- INTERMEDIATE_DATA_PREFIX = "intermediate_data: "
36
-
37
-
38
- class EvaluationRemoteWorkflowHandler:
39
-
40
- def __init__(self, config: EvaluationRunConfig, max_concurrency: int):
41
- self.config = config
42
- # Run metadata
43
- self.semaphore = asyncio.Semaphore(max_concurrency)
44
-
45
- async def run_workflow_remote_single(self, session: aiohttp.ClientSession, item: EvalInputItem):
46
- """
47
- Sends a single input to the endpoint hosting the workflow and retrieves the response.
48
- """
49
- question = item.input_obj
50
- # generate request format
51
- payload = {"input_message": question}
52
-
53
- try:
54
- # Use the streaming endpoint
55
- endpoint = f"{self.config.endpoint}/generate/full"
56
- async with session.post(endpoint, json=payload) as response:
57
- response.raise_for_status() # Raise an exception for HTTP errors
58
-
59
- # Initialize variables to store the response
60
- final_response = None
61
- intermediate_steps = []
62
-
63
- # Process the streaming response
64
- async for line in response.content:
65
- line = line.decode('utf-8').strip()
66
- if not line:
67
- continue
68
-
69
- if line.startswith(DATA_PREFIX):
70
- # This is a generate response chunk
71
- try:
72
- chunk_data = json.loads(line[len(DATA_PREFIX):])
73
- if chunk_data.get("value"):
74
- final_response = chunk_data.get("value")
75
- except json.JSONDecodeError as e:
76
- logger.error("Failed to parse generate response chunk: %s", e)
77
- continue
78
- elif line.startswith(INTERMEDIATE_DATA_PREFIX):
79
- # This is an intermediate step
80
- try:
81
- step_data = json.loads(line[len(INTERMEDIATE_DATA_PREFIX):])
82
- response_intermediate = AIQResponseIntermediateStep.model_validate(step_data)
83
- # The payload is expected to be IntermediateStepPayload
84
- intermediate_step = IntermediateStep(
85
- payload=IntermediateStepPayload.model_validate_json(response_intermediate.payload))
86
- intermediate_steps.append(intermediate_step)
87
- except (json.JSONDecodeError, ValidationError) as e:
88
- logger.error("Failed to parse intermediate step: %s", e)
89
- continue
90
-
91
- except aiohttp.ClientError as e:
92
- # Handle connection or HTTP-related errors
93
- logger.error("Request failed for question %s: %s", question, e)
94
- item.output_obj = None
95
- item.trajectory = []
96
- return
97
-
98
- # Extract and fill the item with the response and intermediate steps
99
- item.output_obj = final_response
100
- item.trajectory = intermediate_steps
101
- return
102
-
103
- async def run_workflow_remote_with_limits(self, session: aiohttp.ClientSession, item: EvalInputItem, pbar: tqdm):
104
- """
105
- Sends limited number of concurrent requests to a remote workflow and retrieves responses.
106
- """
107
- async with self.semaphore:
108
- await self.run_workflow_remote_single(session=session, item=item)
109
- pbar.update(1)
110
-
111
- async def run_workflow_remote(self, eval_input: EvalInput) -> EvalInput:
112
- """
113
- Sends inputs to a workflow hosted on a remote endpoint.
114
- """
115
- timeout = aiohttp.ClientTimeout(total=self.config.endpoint_timeout)
116
- try:
117
- pbar = tqdm(total=len(eval_input.eval_input_items), desc="Running workflow", unit="item")
118
- async with aiohttp.ClientSession(timeout=timeout) as session:
119
- # get the questions from the eval_input
120
- tasks = [
121
- self.run_workflow_remote_with_limits(session, item, pbar) for item in eval_input.eval_input_items
122
- ]
123
- await asyncio.gather(*tasks)
124
-
125
- finally:
126
- pbar.close()
127
-
128
- return eval_input
@@ -1,52 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
-
16
- import asyncio
17
- import logging
18
-
19
- from aiq.builder.context import AIQContext
20
- from aiq.data_models.intermediate_step import IntermediateStep
21
-
22
- logger = logging.getLogger(__name__)
23
-
24
-
25
- def pull_intermediate() -> asyncio.Future[list[dict]]:
26
- """
27
- Subscribes to the runner's event stream using callbacks.
28
- Intermediate steps are collected and, when complete, the future is set
29
- with the list of dumped intermediate steps.
30
- """
31
- future = asyncio.Future()
32
- intermediate_steps = [] # We'll store the dumped steps here.
33
- context = AIQContext.get()
34
-
35
- def on_next_cb(item: IntermediateStep):
36
- # Append each new intermediate step (dumped to dict) to the list.
37
- intermediate_steps.append(item.model_dump())
38
-
39
- def on_error_cb(exc: Exception):
40
- logger.error("Hit on_error: %s", exc)
41
- if not future.done():
42
- future.set_exception(exc)
43
-
44
- def on_complete_cb():
45
- logger.debug("Completed reading intermediate steps")
46
- if not future.done():
47
- future.set_result(intermediate_steps)
48
-
49
- # Subscribe with our callbacks.
50
- context.intermediate_step_manager.subscribe(on_next=on_next_cb, on_error=on_error_cb, on_complete=on_complete_cb)
51
-
52
- return future
File without changes