aiqtoolkit 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (316) hide show
  1. aiq/agent/__init__.py +0 -0
  2. aiq/agent/base.py +76 -0
  3. aiq/agent/dual_node.py +67 -0
  4. aiq/agent/react_agent/__init__.py +0 -0
  5. aiq/agent/react_agent/agent.py +322 -0
  6. aiq/agent/react_agent/output_parser.py +104 -0
  7. aiq/agent/react_agent/prompt.py +46 -0
  8. aiq/agent/react_agent/register.py +148 -0
  9. aiq/agent/reasoning_agent/__init__.py +0 -0
  10. aiq/agent/reasoning_agent/reasoning_agent.py +224 -0
  11. aiq/agent/register.py +23 -0
  12. aiq/agent/rewoo_agent/__init__.py +0 -0
  13. aiq/agent/rewoo_agent/agent.py +410 -0
  14. aiq/agent/rewoo_agent/prompt.py +108 -0
  15. aiq/agent/rewoo_agent/register.py +158 -0
  16. aiq/agent/tool_calling_agent/__init__.py +0 -0
  17. aiq/agent/tool_calling_agent/agent.py +123 -0
  18. aiq/agent/tool_calling_agent/register.py +105 -0
  19. aiq/builder/__init__.py +0 -0
  20. aiq/builder/builder.py +223 -0
  21. aiq/builder/component_utils.py +303 -0
  22. aiq/builder/context.py +227 -0
  23. aiq/builder/embedder.py +24 -0
  24. aiq/builder/eval_builder.py +120 -0
  25. aiq/builder/evaluator.py +29 -0
  26. aiq/builder/framework_enum.py +24 -0
  27. aiq/builder/front_end.py +73 -0
  28. aiq/builder/function.py +297 -0
  29. aiq/builder/function_base.py +376 -0
  30. aiq/builder/function_info.py +627 -0
  31. aiq/builder/intermediate_step_manager.py +176 -0
  32. aiq/builder/llm.py +25 -0
  33. aiq/builder/retriever.py +25 -0
  34. aiq/builder/user_interaction_manager.py +71 -0
  35. aiq/builder/workflow.py +143 -0
  36. aiq/builder/workflow_builder.py +757 -0
  37. aiq/cli/__init__.py +14 -0
  38. aiq/cli/cli_utils/__init__.py +0 -0
  39. aiq/cli/cli_utils/config_override.py +231 -0
  40. aiq/cli/cli_utils/validation.py +37 -0
  41. aiq/cli/commands/__init__.py +0 -0
  42. aiq/cli/commands/configure/__init__.py +0 -0
  43. aiq/cli/commands/configure/channel/__init__.py +0 -0
  44. aiq/cli/commands/configure/channel/add.py +28 -0
  45. aiq/cli/commands/configure/channel/channel.py +36 -0
  46. aiq/cli/commands/configure/channel/remove.py +30 -0
  47. aiq/cli/commands/configure/channel/update.py +30 -0
  48. aiq/cli/commands/configure/configure.py +33 -0
  49. aiq/cli/commands/evaluate.py +139 -0
  50. aiq/cli/commands/info/__init__.py +14 -0
  51. aiq/cli/commands/info/info.py +39 -0
  52. aiq/cli/commands/info/list_channels.py +32 -0
  53. aiq/cli/commands/info/list_components.py +129 -0
  54. aiq/cli/commands/info/list_mcp.py +126 -0
  55. aiq/cli/commands/registry/__init__.py +14 -0
  56. aiq/cli/commands/registry/publish.py +88 -0
  57. aiq/cli/commands/registry/pull.py +118 -0
  58. aiq/cli/commands/registry/registry.py +38 -0
  59. aiq/cli/commands/registry/remove.py +108 -0
  60. aiq/cli/commands/registry/search.py +155 -0
  61. aiq/cli/commands/start.py +250 -0
  62. aiq/cli/commands/uninstall.py +83 -0
  63. aiq/cli/commands/validate.py +47 -0
  64. aiq/cli/commands/workflow/__init__.py +14 -0
  65. aiq/cli/commands/workflow/templates/__init__.py.j2 +0 -0
  66. aiq/cli/commands/workflow/templates/config.yml.j2 +16 -0
  67. aiq/cli/commands/workflow/templates/pyproject.toml.j2 +22 -0
  68. aiq/cli/commands/workflow/templates/register.py.j2 +5 -0
  69. aiq/cli/commands/workflow/templates/workflow.py.j2 +36 -0
  70. aiq/cli/commands/workflow/workflow.py +37 -0
  71. aiq/cli/commands/workflow/workflow_commands.py +313 -0
  72. aiq/cli/entrypoint.py +133 -0
  73. aiq/cli/main.py +44 -0
  74. aiq/cli/register_workflow.py +408 -0
  75. aiq/cli/type_registry.py +879 -0
  76. aiq/data_models/__init__.py +14 -0
  77. aiq/data_models/api_server.py +588 -0
  78. aiq/data_models/common.py +143 -0
  79. aiq/data_models/component.py +46 -0
  80. aiq/data_models/component_ref.py +135 -0
  81. aiq/data_models/config.py +349 -0
  82. aiq/data_models/dataset_handler.py +122 -0
  83. aiq/data_models/discovery_metadata.py +286 -0
  84. aiq/data_models/embedder.py +26 -0
  85. aiq/data_models/evaluate.py +104 -0
  86. aiq/data_models/evaluator.py +26 -0
  87. aiq/data_models/front_end.py +26 -0
  88. aiq/data_models/function.py +30 -0
  89. aiq/data_models/function_dependencies.py +64 -0
  90. aiq/data_models/interactive.py +237 -0
  91. aiq/data_models/intermediate_step.py +269 -0
  92. aiq/data_models/invocation_node.py +38 -0
  93. aiq/data_models/llm.py +26 -0
  94. aiq/data_models/logging.py +26 -0
  95. aiq/data_models/memory.py +26 -0
  96. aiq/data_models/profiler.py +53 -0
  97. aiq/data_models/registry_handler.py +26 -0
  98. aiq/data_models/retriever.py +30 -0
  99. aiq/data_models/step_adaptor.py +64 -0
  100. aiq/data_models/streaming.py +33 -0
  101. aiq/data_models/swe_bench_model.py +54 -0
  102. aiq/data_models/telemetry_exporter.py +26 -0
  103. aiq/embedder/__init__.py +0 -0
  104. aiq/embedder/langchain_client.py +41 -0
  105. aiq/embedder/nim_embedder.py +58 -0
  106. aiq/embedder/openai_embedder.py +42 -0
  107. aiq/embedder/register.py +24 -0
  108. aiq/eval/__init__.py +14 -0
  109. aiq/eval/config.py +42 -0
  110. aiq/eval/dataset_handler/__init__.py +0 -0
  111. aiq/eval/dataset_handler/dataset_downloader.py +106 -0
  112. aiq/eval/dataset_handler/dataset_filter.py +52 -0
  113. aiq/eval/dataset_handler/dataset_handler.py +169 -0
  114. aiq/eval/evaluate.py +325 -0
  115. aiq/eval/evaluator/__init__.py +14 -0
  116. aiq/eval/evaluator/evaluator_model.py +44 -0
  117. aiq/eval/intermediate_step_adapter.py +93 -0
  118. aiq/eval/rag_evaluator/__init__.py +0 -0
  119. aiq/eval/rag_evaluator/evaluate.py +138 -0
  120. aiq/eval/rag_evaluator/register.py +138 -0
  121. aiq/eval/register.py +23 -0
  122. aiq/eval/remote_workflow.py +128 -0
  123. aiq/eval/runtime_event_subscriber.py +52 -0
  124. aiq/eval/swe_bench_evaluator/__init__.py +0 -0
  125. aiq/eval/swe_bench_evaluator/evaluate.py +215 -0
  126. aiq/eval/swe_bench_evaluator/register.py +36 -0
  127. aiq/eval/trajectory_evaluator/__init__.py +0 -0
  128. aiq/eval/trajectory_evaluator/evaluate.py +118 -0
  129. aiq/eval/trajectory_evaluator/register.py +40 -0
  130. aiq/eval/tunable_rag_evaluator/__init__.py +0 -0
  131. aiq/eval/tunable_rag_evaluator/evaluate.py +263 -0
  132. aiq/eval/tunable_rag_evaluator/register.py +50 -0
  133. aiq/eval/utils/__init__.py +0 -0
  134. aiq/eval/utils/output_uploader.py +131 -0
  135. aiq/eval/utils/tqdm_position_registry.py +40 -0
  136. aiq/front_ends/__init__.py +14 -0
  137. aiq/front_ends/console/__init__.py +14 -0
  138. aiq/front_ends/console/console_front_end_config.py +32 -0
  139. aiq/front_ends/console/console_front_end_plugin.py +107 -0
  140. aiq/front_ends/console/register.py +25 -0
  141. aiq/front_ends/cron/__init__.py +14 -0
  142. aiq/front_ends/fastapi/__init__.py +14 -0
  143. aiq/front_ends/fastapi/fastapi_front_end_config.py +150 -0
  144. aiq/front_ends/fastapi/fastapi_front_end_plugin.py +103 -0
  145. aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +607 -0
  146. aiq/front_ends/fastapi/intermediate_steps_subscriber.py +80 -0
  147. aiq/front_ends/fastapi/job_store.py +161 -0
  148. aiq/front_ends/fastapi/main.py +70 -0
  149. aiq/front_ends/fastapi/message_handler.py +279 -0
  150. aiq/front_ends/fastapi/message_validator.py +345 -0
  151. aiq/front_ends/fastapi/register.py +25 -0
  152. aiq/front_ends/fastapi/response_helpers.py +195 -0
  153. aiq/front_ends/fastapi/step_adaptor.py +320 -0
  154. aiq/front_ends/fastapi/websocket.py +148 -0
  155. aiq/front_ends/mcp/__init__.py +14 -0
  156. aiq/front_ends/mcp/mcp_front_end_config.py +32 -0
  157. aiq/front_ends/mcp/mcp_front_end_plugin.py +93 -0
  158. aiq/front_ends/mcp/register.py +27 -0
  159. aiq/front_ends/mcp/tool_converter.py +242 -0
  160. aiq/front_ends/register.py +22 -0
  161. aiq/front_ends/simple_base/__init__.py +14 -0
  162. aiq/front_ends/simple_base/simple_front_end_plugin_base.py +52 -0
  163. aiq/llm/__init__.py +0 -0
  164. aiq/llm/nim_llm.py +45 -0
  165. aiq/llm/openai_llm.py +45 -0
  166. aiq/llm/register.py +22 -0
  167. aiq/llm/utils/__init__.py +14 -0
  168. aiq/llm/utils/env_config_value.py +94 -0
  169. aiq/llm/utils/error.py +17 -0
  170. aiq/memory/__init__.py +20 -0
  171. aiq/memory/interfaces.py +183 -0
  172. aiq/memory/models.py +112 -0
  173. aiq/meta/module_to_distro.json +3 -0
  174. aiq/meta/pypi.md +58 -0
  175. aiq/observability/__init__.py +0 -0
  176. aiq/observability/async_otel_listener.py +429 -0
  177. aiq/observability/register.py +99 -0
  178. aiq/plugins/.namespace +1 -0
  179. aiq/profiler/__init__.py +0 -0
  180. aiq/profiler/callbacks/__init__.py +0 -0
  181. aiq/profiler/callbacks/agno_callback_handler.py +295 -0
  182. aiq/profiler/callbacks/base_callback_class.py +20 -0
  183. aiq/profiler/callbacks/langchain_callback_handler.py +278 -0
  184. aiq/profiler/callbacks/llama_index_callback_handler.py +205 -0
  185. aiq/profiler/callbacks/semantic_kernel_callback_handler.py +238 -0
  186. aiq/profiler/callbacks/token_usage_base_model.py +27 -0
  187. aiq/profiler/data_frame_row.py +51 -0
  188. aiq/profiler/decorators/__init__.py +0 -0
  189. aiq/profiler/decorators/framework_wrapper.py +131 -0
  190. aiq/profiler/decorators/function_tracking.py +254 -0
  191. aiq/profiler/forecasting/__init__.py +0 -0
  192. aiq/profiler/forecasting/config.py +18 -0
  193. aiq/profiler/forecasting/model_trainer.py +75 -0
  194. aiq/profiler/forecasting/models/__init__.py +22 -0
  195. aiq/profiler/forecasting/models/forecasting_base_model.py +40 -0
  196. aiq/profiler/forecasting/models/linear_model.py +196 -0
  197. aiq/profiler/forecasting/models/random_forest_regressor.py +268 -0
  198. aiq/profiler/inference_metrics_model.py +25 -0
  199. aiq/profiler/inference_optimization/__init__.py +0 -0
  200. aiq/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
  201. aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +452 -0
  202. aiq/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +258 -0
  203. aiq/profiler/inference_optimization/data_models.py +386 -0
  204. aiq/profiler/inference_optimization/experimental/__init__.py +0 -0
  205. aiq/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +468 -0
  206. aiq/profiler/inference_optimization/experimental/prefix_span_analysis.py +405 -0
  207. aiq/profiler/inference_optimization/llm_metrics.py +212 -0
  208. aiq/profiler/inference_optimization/prompt_caching.py +163 -0
  209. aiq/profiler/inference_optimization/token_uniqueness.py +107 -0
  210. aiq/profiler/inference_optimization/workflow_runtimes.py +72 -0
  211. aiq/profiler/intermediate_property_adapter.py +102 -0
  212. aiq/profiler/profile_runner.py +433 -0
  213. aiq/profiler/utils.py +184 -0
  214. aiq/registry_handlers/__init__.py +0 -0
  215. aiq/registry_handlers/local/__init__.py +0 -0
  216. aiq/registry_handlers/local/local_handler.py +176 -0
  217. aiq/registry_handlers/local/register_local.py +37 -0
  218. aiq/registry_handlers/metadata_factory.py +60 -0
  219. aiq/registry_handlers/package_utils.py +198 -0
  220. aiq/registry_handlers/pypi/__init__.py +0 -0
  221. aiq/registry_handlers/pypi/pypi_handler.py +251 -0
  222. aiq/registry_handlers/pypi/register_pypi.py +40 -0
  223. aiq/registry_handlers/register.py +21 -0
  224. aiq/registry_handlers/registry_handler_base.py +157 -0
  225. aiq/registry_handlers/rest/__init__.py +0 -0
  226. aiq/registry_handlers/rest/register_rest.py +56 -0
  227. aiq/registry_handlers/rest/rest_handler.py +237 -0
  228. aiq/registry_handlers/schemas/__init__.py +0 -0
  229. aiq/registry_handlers/schemas/headers.py +42 -0
  230. aiq/registry_handlers/schemas/package.py +68 -0
  231. aiq/registry_handlers/schemas/publish.py +63 -0
  232. aiq/registry_handlers/schemas/pull.py +82 -0
  233. aiq/registry_handlers/schemas/remove.py +36 -0
  234. aiq/registry_handlers/schemas/search.py +91 -0
  235. aiq/registry_handlers/schemas/status.py +47 -0
  236. aiq/retriever/__init__.py +0 -0
  237. aiq/retriever/interface.py +37 -0
  238. aiq/retriever/milvus/__init__.py +14 -0
  239. aiq/retriever/milvus/register.py +81 -0
  240. aiq/retriever/milvus/retriever.py +228 -0
  241. aiq/retriever/models.py +74 -0
  242. aiq/retriever/nemo_retriever/__init__.py +14 -0
  243. aiq/retriever/nemo_retriever/register.py +60 -0
  244. aiq/retriever/nemo_retriever/retriever.py +190 -0
  245. aiq/retriever/register.py +22 -0
  246. aiq/runtime/__init__.py +14 -0
  247. aiq/runtime/loader.py +188 -0
  248. aiq/runtime/runner.py +176 -0
  249. aiq/runtime/session.py +140 -0
  250. aiq/runtime/user_metadata.py +131 -0
  251. aiq/settings/__init__.py +0 -0
  252. aiq/settings/global_settings.py +318 -0
  253. aiq/test/.namespace +1 -0
  254. aiq/tool/__init__.py +0 -0
  255. aiq/tool/code_execution/__init__.py +0 -0
  256. aiq/tool/code_execution/code_sandbox.py +188 -0
  257. aiq/tool/code_execution/local_sandbox/Dockerfile.sandbox +60 -0
  258. aiq/tool/code_execution/local_sandbox/__init__.py +13 -0
  259. aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +83 -0
  260. aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +4 -0
  261. aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +25 -0
  262. aiq/tool/code_execution/register.py +70 -0
  263. aiq/tool/code_execution/utils.py +100 -0
  264. aiq/tool/datetime_tools.py +42 -0
  265. aiq/tool/document_search.py +141 -0
  266. aiq/tool/github_tools/__init__.py +0 -0
  267. aiq/tool/github_tools/create_github_commit.py +133 -0
  268. aiq/tool/github_tools/create_github_issue.py +87 -0
  269. aiq/tool/github_tools/create_github_pr.py +106 -0
  270. aiq/tool/github_tools/get_github_file.py +106 -0
  271. aiq/tool/github_tools/get_github_issue.py +166 -0
  272. aiq/tool/github_tools/get_github_pr.py +256 -0
  273. aiq/tool/github_tools/update_github_issue.py +100 -0
  274. aiq/tool/mcp/__init__.py +14 -0
  275. aiq/tool/mcp/mcp_client.py +220 -0
  276. aiq/tool/mcp/mcp_tool.py +95 -0
  277. aiq/tool/memory_tools/__init__.py +0 -0
  278. aiq/tool/memory_tools/add_memory_tool.py +79 -0
  279. aiq/tool/memory_tools/delete_memory_tool.py +67 -0
  280. aiq/tool/memory_tools/get_memory_tool.py +72 -0
  281. aiq/tool/nvidia_rag.py +95 -0
  282. aiq/tool/register.py +37 -0
  283. aiq/tool/retriever.py +89 -0
  284. aiq/tool/server_tools.py +63 -0
  285. aiq/utils/__init__.py +0 -0
  286. aiq/utils/data_models/__init__.py +0 -0
  287. aiq/utils/data_models/schema_validator.py +58 -0
  288. aiq/utils/debugging_utils.py +43 -0
  289. aiq/utils/exception_handlers/__init__.py +0 -0
  290. aiq/utils/exception_handlers/schemas.py +114 -0
  291. aiq/utils/io/__init__.py +0 -0
  292. aiq/utils/io/yaml_tools.py +119 -0
  293. aiq/utils/metadata_utils.py +74 -0
  294. aiq/utils/optional_imports.py +142 -0
  295. aiq/utils/producer_consumer_queue.py +178 -0
  296. aiq/utils/reactive/__init__.py +0 -0
  297. aiq/utils/reactive/base/__init__.py +0 -0
  298. aiq/utils/reactive/base/observable_base.py +65 -0
  299. aiq/utils/reactive/base/observer_base.py +55 -0
  300. aiq/utils/reactive/base/subject_base.py +79 -0
  301. aiq/utils/reactive/observable.py +59 -0
  302. aiq/utils/reactive/observer.py +76 -0
  303. aiq/utils/reactive/subject.py +131 -0
  304. aiq/utils/reactive/subscription.py +49 -0
  305. aiq/utils/settings/__init__.py +0 -0
  306. aiq/utils/settings/global_settings.py +197 -0
  307. aiq/utils/type_converter.py +232 -0
  308. aiq/utils/type_utils.py +397 -0
  309. aiq/utils/url_utils.py +27 -0
  310. aiqtoolkit-1.1.0.dist-info/METADATA +331 -0
  311. aiqtoolkit-1.1.0.dist-info/RECORD +316 -0
  312. aiqtoolkit-1.1.0.dist-info/WHEEL +5 -0
  313. aiqtoolkit-1.1.0.dist-info/entry_points.txt +17 -0
  314. aiqtoolkit-1.1.0.dist-info/licenses/LICENSE-3rd-party.txt +3686 -0
  315. aiqtoolkit-1.1.0.dist-info/licenses/LICENSE.md +201 -0
  316. aiqtoolkit-1.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,169 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import json
17
+
18
+ import pandas as pd
19
+
20
+ from aiq.data_models.dataset_handler import EvalDatasetConfig
21
+ from aiq.data_models.dataset_handler import EvalDatasetJsonConfig
22
+ from aiq.data_models.intermediate_step import IntermediateStep
23
+ from aiq.data_models.intermediate_step import IntermediateStepType
24
+ from aiq.eval.dataset_handler.dataset_downloader import DatasetDownloader
25
+ from aiq.eval.dataset_handler.dataset_filter import DatasetFilter
26
+ from aiq.eval.evaluator.evaluator_model import EvalInput
27
+ from aiq.eval.evaluator.evaluator_model import EvalInputItem
28
+
29
+
30
+ class DatasetHandler:
31
+ """
32
+ Read the datasets and pre-process (apply filters, deduplicate etc.) before turning them into EvalInput objects.
33
+ One DatasetHandler object is needed for each dataset to be evaluated.
34
+ """
35
+
36
+ def __init__(self, dataset_config: EvalDatasetConfig, reps: int):
37
+ from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
38
+
39
+ self.dataset_config = dataset_config
40
+ self.dataset_filter = DatasetFilter(dataset_config.filter)
41
+ self.reps = reps
42
+ # Helpers
43
+ self.intermediate_step_adapter = IntermediateStepAdapter()
44
+
45
+ def is_structured_input(self) -> bool:
46
+ '''Check if the input is structured or unstructured'''
47
+ return not self.dataset_config.structure.disable
48
+
49
+ @property
50
+ def id_key(self) -> str:
51
+ return self.dataset_config.id_key
52
+
53
+ @property
54
+ def question_key(self) -> str:
55
+ return self.dataset_config.structure.question_key
56
+
57
+ @property
58
+ def answer_key(self) -> str:
59
+ return self.dataset_config.structure.answer_key
60
+
61
+ @property
62
+ def generated_answer_key(self) -> str:
63
+ return self.dataset_config.structure.generated_answer_key
64
+
65
+ @property
66
+ def trajectory_key(self) -> str:
67
+ return self.dataset_config.structure.trajectory_key
68
+
69
+ @property
70
+ def expected_trajectory_key(self) -> str:
71
+ return self.dataset_config.structure.expected_trajectory_key
72
+
73
+ def get_eval_input_from_df(self, input_df: pd.DataFrame) -> EvalInput:
74
+
75
+ def create_eval_item(row: pd.Series, structured: bool) -> EvalInputItem:
76
+ """Helper function to create EvalInputItem."""
77
+ return EvalInputItem(
78
+ id=row.get(self.id_key, ""),
79
+ input_obj=row.to_json() if not structured else row.get(self.question_key, ""),
80
+ expected_output_obj=row.get(self.answer_key, "") if structured else "",
81
+ output_obj=row.get(self.generated_answer_key, "") if structured else "",
82
+ trajectory=row.get(self.trajectory_key, []) if structured else [],
83
+ expected_trajectory=row.get(self.expected_trajectory_key, []) if structured else [],
84
+ )
85
+
86
+ # if input dataframe is empty return an empty list
87
+ if input_df.empty:
88
+ return EvalInput(eval_input_items=[])
89
+
90
+ structured = self.is_structured_input()
91
+ if structured:
92
+ # For structured input, question is mandatory. Ignore rows with missing or empty questions
93
+ input_df = input_df[input_df[self.question_key].notnull() & input_df[self.question_key].str.strip().ne("")]
94
+ eval_input_items = [create_eval_item(row, structured) for _, row in input_df.iterrows()]
95
+
96
+ return EvalInput(eval_input_items=eval_input_items)
97
+
98
+ def setup_reps(self, input_df: pd.DataFrame) -> pd.DataFrame:
99
+ """replicate the rows and update the id to id_key + "_rep" + rep_number"""
100
+ # Replicate the rows
101
+ input_df = pd.concat([input_df] * self.reps, ignore_index=True)
102
+ # Compute repetition index
103
+ rep_index = input_df.groupby(self.dataset_config.id_key).cumcount().astype(str)
104
+ # Convert id_key to string (id can be integer) if needed and update IDs
105
+ input_df[self.dataset_config.id_key] = input_df[self.dataset_config.id_key].astype(str) + "_rep" + rep_index
106
+ # Ensure unique ID values after modification
107
+ input_df.drop_duplicates(subset=[self.dataset_config.id_key], inplace=True)
108
+
109
+ return input_df
110
+
111
+ def get_eval_input_from_dataset(self, dataset: str) -> EvalInput:
112
+ # read the dataset and convert it to EvalInput
113
+
114
+ # if a dataset file has been provided in the command line, use that
115
+ dataset_config = EvalDatasetJsonConfig(file_path=dataset) if dataset else self.dataset_config
116
+
117
+ # Download the dataset if it is remote
118
+ downloader = DatasetDownloader(dataset_config=dataset_config)
119
+ downloader.download_dataset()
120
+
121
+ parser, kwargs = dataset_config.parser()
122
+ # Parse the dataset into a DataFrame
123
+ input_df = parser(dataset_config.file_path, **kwargs)
124
+
125
+ # Apply filters and deduplicate
126
+ input_df = self.dataset_filter.apply_filters(input_df)
127
+ input_df.drop_duplicates(subset=[self.dataset_config.id_key], inplace=True)
128
+
129
+ # If more than one repetition is needed, replicate the rows
130
+ if self.reps > 1:
131
+ input_df = self.setup_reps(input_df)
132
+
133
+ # Convert the DataFrame to a list of EvalInput objects
134
+ return self.get_eval_input_from_df(input_df)
135
+
136
+ def filter_intermediate_steps(self,
137
+ intermediate_steps: list[IntermediateStep],
138
+ event_filter: list[IntermediateStepType] = None) -> list[dict]:
139
+ """
140
+ Filter out the intermediate steps that are not relevant for evaluation.
141
+ The output is written with with the intention of re-running the evaluation using the original config file.
142
+ """
143
+ if event_filter is None:
144
+ event_filter = self.intermediate_step_adapter.DEFAULT_EVENT_FILTER
145
+ filtered_steps = self.intermediate_step_adapter.filter_intermediate_steps(intermediate_steps, event_filter)
146
+ return self.intermediate_step_adapter.serialize_intermediate_steps(filtered_steps)
147
+
148
+ def publish_eval_input(self, eval_input, workflow_output_step_filter: list[IntermediateStepType] = None) -> str:
149
+ """
150
+ Convert the EvalInput object to a JSON output for storing in a file. Use the orginal keys to
151
+ allow re-running evaluation using the orignal config file and '--skip_workflow' option.
152
+ """
153
+
154
+ indent = 2
155
+ if self.is_structured_input():
156
+ # Extract structured data from EvalInputItems
157
+ data = [{
158
+ self.id_key: item.id,
159
+ self.question_key: item.input_obj,
160
+ self.answer_key: item.expected_output_obj,
161
+ self.generated_answer_key: item.output_obj,
162
+ self.trajectory_key: self.filter_intermediate_steps(item.trajectory, workflow_output_step_filter),
163
+ self.expected_trajectory_key: self.filter_intermediate_steps(item.expected_trajectory),
164
+ } for item in eval_input.eval_input_items]
165
+ else:
166
+ # Unstructured case: return only raw output objects as a JSON array
167
+ data = [json.loads(item.output_obj) for item in eval_input.eval_input_items]
168
+
169
+ return json.dumps(data, indent=indent, ensure_ascii=False, default=str)
aiq/eval/evaluate.py ADDED
@@ -0,0 +1,325 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import asyncio
17
+ import logging
18
+ import shutil
19
+ from pathlib import Path
20
+ from typing import Any
21
+
22
+ from pydantic import BaseModel
23
+ from tqdm import tqdm
24
+
25
+ from aiq.data_models.evaluate import EvalConfig
26
+ from aiq.eval.config import EvaluationRunConfig
27
+ from aiq.eval.config import EvaluationRunOutput
28
+ from aiq.eval.dataset_handler.dataset_handler import DatasetHandler
29
+ from aiq.eval.evaluator.evaluator_model import EvalInput
30
+ from aiq.eval.evaluator.evaluator_model import EvalInputItem
31
+ from aiq.eval.evaluator.evaluator_model import EvalOutput
32
+ from aiq.eval.utils.output_uploader import OutputUploader
33
+ from aiq.runtime.session import AIQSessionManager
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+
38
+ class EvaluationRun: # pylint: disable=too-many-public-methods
39
+ """
40
+ Instantiated for each evaluation run and used to store data for that single run.
41
+ """
42
+
43
+ def __init__(self, config: EvaluationRunConfig):
44
+ """
45
+ Initialize an EvaluationRun with configuration.
46
+ """
47
+ from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
48
+
49
+ # Run-specific configuration
50
+ self.config: EvaluationRunConfig = config
51
+ self.eval_config: EvalConfig | None = None
52
+
53
+ # Helpers
54
+ self.intermediate_step_adapter: IntermediateStepAdapter = IntermediateStepAdapter()
55
+
56
+ # Metadata
57
+ self.eval_input: EvalInput | None = None
58
+ self.workflow_interrupted: bool = False
59
+
60
+ # evaluation_results is list of tuples (evaluator_name, EvalOutput)
61
+ self.evaluation_results: list[tuple[str, EvalOutput]] = []
62
+
63
+ # workflow output file
64
+ self.workflow_output_file: Path | None = None
65
+
66
+ # evaluation output files
67
+ self.evaluator_output_files: list[Path] = []
68
+
69
+ async def run_workflow_local(self, session_manager: AIQSessionManager):
70
+ '''
71
+ Launch the workflow with the specified questions and extract the output using the jsonpath
72
+ '''
73
+ # import function level dependencies
74
+ from jsonpath_ng import parse
75
+
76
+ from aiq.eval.runtime_event_subscriber import pull_intermediate
77
+
78
+ # Run the workflow
79
+ jsonpath_expr = parse(self.config.result_json_path)
80
+ stop_event = asyncio.Event()
81
+
82
+ async def run_one(item: EvalInputItem):
83
+ if stop_event.is_set():
84
+ return "", []
85
+
86
+ async with session_manager.run(item.input_obj) as runner:
87
+ try:
88
+ # Start usage stats and intermediate steps collection in parallel
89
+ intermediate_future = pull_intermediate()
90
+
91
+ if session_manager.workflow.has_single_output:
92
+ base_output = await runner.result()
93
+ else:
94
+ # raise an error if the workflow has multiple outputs
95
+ raise NotImplementedError("Multiple outputs are not supported")
96
+ intermediate_steps = await intermediate_future
97
+ except NotImplementedError as e:
98
+ # raise original error
99
+ raise e
100
+ except Exception as e:
101
+ logger.exception("Failed to run the workflow: %s", e, exc_info=True)
102
+ # stop processing if a workflow error occurs
103
+ self.workflow_interrupted = True
104
+ stop_event.set()
105
+ return
106
+
107
+ try:
108
+ base_output = runner.convert(base_output, to_type=str)
109
+ except ValueError:
110
+ pass
111
+
112
+ # if base_output is a pydantic model dump it to json
113
+ if isinstance(base_output, BaseModel):
114
+ output = base_output.model_dump_json(indent=2)
115
+ else:
116
+ m = jsonpath_expr.find(base_output)
117
+ if (not m):
118
+ raise RuntimeError(f"Failed to extract output using jsonpath: {self.config.result_json_path}")
119
+ if (len(m) > 1):
120
+ logger.warning("Multiple matches found for jsonpath at row '%s'. Matches: %s. Using the first",
121
+ base_output,
122
+ m)
123
+ output = m[0].value
124
+
125
+ item.output_obj = output
126
+ item.trajectory = self.intermediate_step_adapter.validate_intermediate_steps(intermediate_steps)
127
+
128
+ async def wrapped_run(item: EvalInputItem) -> None:
129
+ await run_one(item)
130
+ pbar.update(1)
131
+
132
+ # if self.config.skip_complete is set skip eval_input_items with a non-empty output_obj
133
+ if self.config.skip_completed_entries:
134
+ eval_input_items = [item for item in self.eval_input.eval_input_items if not item.output_obj]
135
+ if not eval_input_items:
136
+ logger.warning("All items have a non-empty output. Skipping workflow pass altogether.")
137
+ return
138
+ else:
139
+ eval_input_items = self.eval_input.eval_input_items
140
+ pbar = tqdm(total=len(eval_input_items), desc="Running workflow")
141
+ await asyncio.gather(*[wrapped_run(item) for item in eval_input_items])
142
+ pbar.close()
143
+
144
+ async def run_workflow_remote(self):
145
+ from aiq.eval.remote_workflow import EvaluationRemoteWorkflowHandler
146
+ handler = EvaluationRemoteWorkflowHandler(self.config, self.eval_config.general.max_concurrency)
147
+ await handler.run_workflow_remote(self.eval_input)
148
+
149
+ async def profile_workflow(self):
150
+ """
151
+ Profile a dataset
152
+ """
153
+
154
+ if not self.eval_config.general.profiler:
155
+ logger.info("Profiler is not enabled. Skipping profiling.")
156
+ return
157
+
158
+ from aiq.profiler.profile_runner import ProfilerRunner
159
+
160
+ all_stats = []
161
+ for input_item in self.eval_input.eval_input_items:
162
+ all_stats.append(input_item.trajectory)
163
+
164
+ profiler_runner = ProfilerRunner(self.eval_config.general.profiler, self.eval_config.general.output_dir)
165
+
166
+ await profiler_runner.run(all_stats)
167
+
168
+ def cleanup_output_directory(self):
169
+ '''Remove contents of the output directory if it exists'''
170
+ if self.eval_config.general.output and self.eval_config.general.output.dir and \
171
+ self.eval_config.general.output.dir.exists():
172
+ logger.info("Cleaning up output directory %s", self.eval_config.general.output.dir)
173
+ shutil.rmtree(self.eval_config.general.output.dir)
174
+
175
+ def write_output(self, dataset_handler: DatasetHandler):
176
+ workflow_output_file = self.eval_config.general.output_dir / "workflow_output.json"
177
+ workflow_output_file.parent.mkdir(parents=True, exist_ok=True)
178
+
179
+ # Write the workflow output to a file (this can be used for re-running the evaluation)
180
+
181
+ step_filter = self.eval_config.general.output.workflow_output_step_filter \
182
+ if self.eval_config.general.output else None
183
+ workflow_output = dataset_handler.publish_eval_input(self.eval_input, step_filter)
184
+ with open(workflow_output_file, "w", encoding="utf-8") as f:
185
+ # set indent to 2 for pretty printing
186
+ f.write(workflow_output)
187
+ self.workflow_output_file = workflow_output_file
188
+ logger.info("Workflow output written to %s", workflow_output_file)
189
+
190
+ # Write the output of each evaluator to a separate json file
191
+ for evaluator_name, eval_output in self.evaluation_results:
192
+ output_file = self.eval_config.general.output_dir / f"{evaluator_name}_output.json"
193
+ output_file.parent.mkdir(parents=True, exist_ok=True)
194
+ # create json content using the evaluation results
195
+ output = eval_output.model_dump_json(indent=2)
196
+ with open(output_file, "w", encoding="utf-8") as f:
197
+ f.write(output)
198
+ self.evaluator_output_files.append(output_file)
199
+ logger.info("Evaluation results written to %s", output_file)
200
+
201
+ if self.workflow_interrupted:
202
+ # Issue a warning if the workflow was not completed on all datasets
203
+ msg = ("Workflow execution was interrupted due to an error. The results may be incomplete. "
204
+ "You can re-execute evaluation for incomplete results by running "
205
+ "`eval` with the --skip_completed_entries flag.")
206
+ logger.warning(msg)
207
+
208
+ async def run_single_evaluator(self, evaluator_name: str, evaluator: Any):
209
+ """Run a single evaluator and store its results."""
210
+ try:
211
+ eval_output = await evaluator.evaluate_fn(self.eval_input)
212
+ self.evaluation_results.append((evaluator_name, eval_output))
213
+ except Exception as e:
214
+ logger.exception("An error occurred while running evaluator %s: %s", evaluator_name, e, exc_info=True)
215
+
216
+ async def run_evaluators(self, evaluators: dict[str, Any]):
217
+ """Run all configured evaluators asynchronously."""
218
+ tasks = [self.run_single_evaluator(name, evaluator) for name, evaluator in evaluators.items() if evaluator]
219
+
220
+ if not tasks:
221
+ logger.warning("All evaluators were empty or invalid.")
222
+ return
223
+
224
+ try:
225
+ await asyncio.gather(*tasks)
226
+ except Exception as e:
227
+ logger.exception("An error occurred while running evaluators: %s", e, exc_info=True)
228
+ raise
229
+
230
+ def apply_overrides(self):
231
+ from aiq.cli.cli_utils.config_override import load_and_override_config
232
+ from aiq.data_models.config import AIQConfig
233
+ from aiq.runtime.loader import PluginTypes
234
+ from aiq.runtime.loader import discover_and_register_plugins
235
+ from aiq.utils.data_models.schema_validator import validate_schema
236
+
237
+ # Register plugins before validation
238
+ discover_and_register_plugins(PluginTypes.CONFIG_OBJECT)
239
+
240
+ config_dict = load_and_override_config(self.config.config_file, self.config.override)
241
+ config = validate_schema(config_dict, AIQConfig)
242
+ return config
243
+
244
+ async def run_and_evaluate(self,
245
+ session_manager: AIQSessionManager | None = None,
246
+ job_id: str | None = None) -> EvaluationRunOutput:
247
+ """
248
+ Run the workflow with the specified config file and evaluate the dataset
249
+ """
250
+ logger.info("Starting evaluation run with config file: %s", self.config.config_file)
251
+
252
+ from aiq.builder.eval_builder import WorkflowEvalBuilder
253
+ from aiq.runtime.loader import load_config
254
+
255
+ # Load and override the config
256
+ if self.config.override:
257
+ config = self.apply_overrides()
258
+ else:
259
+ config = load_config(self.config.config_file)
260
+ self.eval_config = config.eval
261
+ logger.debug("Loaded evaluation configuration: %s", self.eval_config)
262
+
263
+ # Cleanup the output directory
264
+ if self.eval_config.general.output and self.eval_config.general.output.cleanup:
265
+ self.cleanup_output_directory()
266
+
267
+ # If a job id is provided keep the data per-job
268
+ if job_id:
269
+ self.eval_config.general.output_dir = self.eval_config.general.output_dir / f"jobs/{job_id}"
270
+ if self.eval_config.general.output:
271
+ self.eval_config.general.output.dir = self.eval_config.general.output_dir
272
+
273
+ # Load the input dataset
274
+ # For multiple datasets, one handler per dataset can be created
275
+ dataset_config = self.eval_config.general.dataset # Currently only one dataset is supported
276
+ if not dataset_config:
277
+ logger.info("No dataset found, nothing to evaluate")
278
+ return EvaluationRunOutput(
279
+ workflow_output_file=self.workflow_output_file,
280
+ evaluator_output_files=self.evaluator_output_files,
281
+ workflow_interrupted=self.workflow_interrupted,
282
+ )
283
+
284
+ dataset_handler = DatasetHandler(dataset_config=dataset_config, reps=self.config.reps)
285
+ self.eval_input = dataset_handler.get_eval_input_from_dataset(self.config.dataset)
286
+ if not self.eval_input.eval_input_items:
287
+ logger.info("Dataset is empty. Nothing to evaluate.")
288
+ return EvaluationRunOutput(
289
+ workflow_output_file=self.workflow_output_file,
290
+ evaluator_output_files=self.evaluator_output_files,
291
+ workflow_interrupted=self.workflow_interrupted,
292
+ )
293
+
294
+ # Run workflow and evaluate
295
+ async with WorkflowEvalBuilder.from_config(config=config) as eval_workflow:
296
+ if self.config.endpoint:
297
+ await self.run_workflow_remote()
298
+ else:
299
+ if not self.config.skip_workflow:
300
+ if session_manager is None:
301
+ session_manager = AIQSessionManager(eval_workflow.build(),
302
+ max_concurrency=self.eval_config.general.max_concurrency)
303
+ await self.run_workflow_local(session_manager)
304
+
305
+ # Evaluate
306
+ evaluators = {name: eval_workflow.get_evaluator(name) for name in self.eval_config.evaluators}
307
+ await self.run_evaluators(evaluators)
308
+
309
+ # Profile the workflow
310
+ await self.profile_workflow()
311
+
312
+ # Write the results to the output directory
313
+ self.write_output(dataset_handler)
314
+
315
+ # Run custom scripts and upload evaluation outputs to S3
316
+ if self.eval_config.general.output:
317
+ output_uploader = OutputUploader(self.eval_config.general.output, job_id=job_id)
318
+ output_uploader.run_custom_scripts()
319
+ await output_uploader.upload_directory()
320
+
321
+ return EvaluationRunOutput(
322
+ workflow_output_file=self.workflow_output_file,
323
+ evaluator_output_files=self.evaluator_output_files,
324
+ workflow_interrupted=self.workflow_interrupted,
325
+ )
@@ -0,0 +1,14 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
@@ -0,0 +1,44 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import typing
17
+
18
+ from pydantic import BaseModel
19
+
20
+ from aiq.data_models.intermediate_step import IntermediateStep
21
+
22
+
23
+ class EvalInputItem(BaseModel):
24
+ id: typing.Any
25
+ input_obj: typing.Any
26
+ expected_output_obj: typing.Any
27
+ output_obj: typing.Any
28
+ expected_trajectory: list[IntermediateStep]
29
+ trajectory: list[IntermediateStep]
30
+
31
+
32
+ class EvalInput(BaseModel):
33
+ eval_input_items: list[EvalInputItem]
34
+
35
+
36
+ class EvalOutputItem(BaseModel):
37
+ id: typing.Any # id or input_obj from EvalInputItem
38
+ score: typing.Any # float or any serializable type
39
+ reasoning: typing.Any
40
+
41
+
42
+ class EvalOutput(BaseModel):
43
+ average_score: typing.Any # float or any serializable type
44
+ eval_output_items: list[EvalOutputItem]
@@ -0,0 +1,93 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import logging
17
+
18
+ from langchain_core.agents import AgentAction
19
+
20
+ from aiq.data_models.intermediate_step import IntermediateStep
21
+ from aiq.data_models.intermediate_step import IntermediateStepType
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class IntermediateStepAdapter:
27
+ DEFAULT_EVENT_FILTER = [IntermediateStepType.LLM_END, IntermediateStepType.TOOL_END]
28
+
29
+ def filter_intermediate_steps(self,
30
+ intermediate_steps: list[IntermediateStep],
31
+ event_filter: list[IntermediateStepType]) -> list[IntermediateStep]:
32
+ """ Filters intermediate steps"""
33
+ if not event_filter:
34
+ return intermediate_steps
35
+ return [step for step in intermediate_steps if step.event_type in event_filter]
36
+
37
+ def validate_intermediate_steps(self, intermediate_steps: list[dict]) -> list[IntermediateStep]:
38
+ validated_steps = []
39
+ for step_data in intermediate_steps:
40
+ try:
41
+ validated_steps.append(IntermediateStep.model_validate(step_data))
42
+ except Exception as e:
43
+ logger.exception("Validation failed for step: %r, Error: %s", step_data, e, exc_info=True)
44
+ return validated_steps
45
+
46
+ def serialize_intermediate_steps(self, intermediate_steps: list[IntermediateStep]) -> list[dict]:
47
+ """Converts a list of IntermediateStep objects to a list of dictionaries."""
48
+ return [step.model_dump() for step in intermediate_steps]
49
+
50
+ @staticmethod
51
+ def agent_action_to_dict(action) -> dict:
52
+ """Convert AgentAction to a JSON-serializable dictionary."""
53
+ return {
54
+ "tool": action.tool,
55
+ "tool_input": action.tool_input,
56
+ "log": action.log,
57
+ "type": action.type,
58
+ }
59
+
60
+ def get_agent_action_single(self, step: IntermediateStep,
61
+ last_llm_end_step: IntermediateStep | None) -> tuple[AgentAction, str]:
62
+ """Converts a single intermediate step to Tuple[AgentAction, str]."""
63
+ # use the previous llm output as log
64
+ log = getattr(last_llm_end_step.data, "output", "") if last_llm_end_step else ""
65
+ tool_name = step.name or ""
66
+ tool_input = getattr(step.data, "input", "") if step.data else ""
67
+ tool_output = getattr(step.data, "output", "") if step.data else ""
68
+
69
+ action = AgentAction(tool=tool_name, tool_input=tool_input, log=log)
70
+
71
+ return action, tool_output
72
+
73
+ def get_agent_actions(self, intermediate_steps: list[IntermediateStep],
74
+ event_filter: list[IntermediateStepType]) -> list[tuple[AgentAction, str]]:
75
+ """Converts a list of intermediate steps to a list of (AgentAction, output)."""
76
+ steps = self.filter_intermediate_steps(intermediate_steps, event_filter)
77
+ last_llm_end_step = None
78
+ agent_actions = []
79
+ for step in steps:
80
+ if step.event_type == IntermediateStepType.LLM_END:
81
+ last_llm_end_step = step
82
+ else:
83
+ action = self.get_agent_action_single(step, last_llm_end_step)
84
+ agent_actions.append(action)
85
+
86
+ return agent_actions
87
+
88
+ def get_context(self, intermediate_steps: list[IntermediateStep]) -> list[str]:
89
+ """Grab the output of all the tools and return them as retrieved context."""
90
+ return [
91
+ str(step.data.output) for step in intermediate_steps
92
+ if step.event_type == IntermediateStepType.TOOL_END and step.data and step.data.output
93
+ ]
File without changes