nvidia-nat 1.2.0rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. aiq/agent/__init__.py +0 -0
  2. aiq/agent/base.py +239 -0
  3. aiq/agent/dual_node.py +67 -0
  4. aiq/agent/react_agent/__init__.py +0 -0
  5. aiq/agent/react_agent/agent.py +355 -0
  6. aiq/agent/react_agent/output_parser.py +104 -0
  7. aiq/agent/react_agent/prompt.py +41 -0
  8. aiq/agent/react_agent/register.py +149 -0
  9. aiq/agent/reasoning_agent/__init__.py +0 -0
  10. aiq/agent/reasoning_agent/reasoning_agent.py +225 -0
  11. aiq/agent/register.py +23 -0
  12. aiq/agent/rewoo_agent/__init__.py +0 -0
  13. aiq/agent/rewoo_agent/agent.py +411 -0
  14. aiq/agent/rewoo_agent/prompt.py +108 -0
  15. aiq/agent/rewoo_agent/register.py +158 -0
  16. aiq/agent/tool_calling_agent/__init__.py +0 -0
  17. aiq/agent/tool_calling_agent/agent.py +119 -0
  18. aiq/agent/tool_calling_agent/register.py +106 -0
  19. aiq/authentication/__init__.py +14 -0
  20. aiq/authentication/api_key/__init__.py +14 -0
  21. aiq/authentication/api_key/api_key_auth_provider.py +96 -0
  22. aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
  23. aiq/authentication/api_key/register.py +26 -0
  24. aiq/authentication/exceptions/__init__.py +14 -0
  25. aiq/authentication/exceptions/api_key_exceptions.py +38 -0
  26. aiq/authentication/http_basic_auth/__init__.py +0 -0
  27. aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
  28. aiq/authentication/http_basic_auth/register.py +30 -0
  29. aiq/authentication/interfaces.py +93 -0
  30. aiq/authentication/oauth2/__init__.py +14 -0
  31. aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
  32. aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
  33. aiq/authentication/oauth2/register.py +25 -0
  34. aiq/authentication/register.py +21 -0
  35. aiq/builder/__init__.py +0 -0
  36. aiq/builder/builder.py +285 -0
  37. aiq/builder/component_utils.py +316 -0
  38. aiq/builder/context.py +264 -0
  39. aiq/builder/embedder.py +24 -0
  40. aiq/builder/eval_builder.py +161 -0
  41. aiq/builder/evaluator.py +29 -0
  42. aiq/builder/framework_enum.py +24 -0
  43. aiq/builder/front_end.py +73 -0
  44. aiq/builder/function.py +344 -0
  45. aiq/builder/function_base.py +380 -0
  46. aiq/builder/function_info.py +627 -0
  47. aiq/builder/intermediate_step_manager.py +174 -0
  48. aiq/builder/llm.py +25 -0
  49. aiq/builder/retriever.py +25 -0
  50. aiq/builder/user_interaction_manager.py +74 -0
  51. aiq/builder/workflow.py +148 -0
  52. aiq/builder/workflow_builder.py +1117 -0
  53. aiq/cli/__init__.py +14 -0
  54. aiq/cli/cli_utils/__init__.py +0 -0
  55. aiq/cli/cli_utils/config_override.py +231 -0
  56. aiq/cli/cli_utils/validation.py +37 -0
  57. aiq/cli/commands/__init__.py +0 -0
  58. aiq/cli/commands/configure/__init__.py +0 -0
  59. aiq/cli/commands/configure/channel/__init__.py +0 -0
  60. aiq/cli/commands/configure/channel/add.py +28 -0
  61. aiq/cli/commands/configure/channel/channel.py +36 -0
  62. aiq/cli/commands/configure/channel/remove.py +30 -0
  63. aiq/cli/commands/configure/channel/update.py +30 -0
  64. aiq/cli/commands/configure/configure.py +33 -0
  65. aiq/cli/commands/evaluate.py +139 -0
  66. aiq/cli/commands/info/__init__.py +14 -0
  67. aiq/cli/commands/info/info.py +39 -0
  68. aiq/cli/commands/info/list_channels.py +32 -0
  69. aiq/cli/commands/info/list_components.py +129 -0
  70. aiq/cli/commands/info/list_mcp.py +213 -0
  71. aiq/cli/commands/registry/__init__.py +14 -0
  72. aiq/cli/commands/registry/publish.py +88 -0
  73. aiq/cli/commands/registry/pull.py +118 -0
  74. aiq/cli/commands/registry/registry.py +38 -0
  75. aiq/cli/commands/registry/remove.py +108 -0
  76. aiq/cli/commands/registry/search.py +155 -0
  77. aiq/cli/commands/sizing/__init__.py +14 -0
  78. aiq/cli/commands/sizing/calc.py +297 -0
  79. aiq/cli/commands/sizing/sizing.py +27 -0
  80. aiq/cli/commands/start.py +246 -0
  81. aiq/cli/commands/uninstall.py +81 -0
  82. aiq/cli/commands/validate.py +47 -0
  83. aiq/cli/commands/workflow/__init__.py +14 -0
  84. aiq/cli/commands/workflow/templates/__init__.py.j2 +0 -0
  85. aiq/cli/commands/workflow/templates/config.yml.j2 +16 -0
  86. aiq/cli/commands/workflow/templates/pyproject.toml.j2 +22 -0
  87. aiq/cli/commands/workflow/templates/register.py.j2 +5 -0
  88. aiq/cli/commands/workflow/templates/workflow.py.j2 +36 -0
  89. aiq/cli/commands/workflow/workflow.py +37 -0
  90. aiq/cli/commands/workflow/workflow_commands.py +313 -0
  91. aiq/cli/entrypoint.py +135 -0
  92. aiq/cli/main.py +44 -0
  93. aiq/cli/register_workflow.py +488 -0
  94. aiq/cli/type_registry.py +1000 -0
  95. aiq/data_models/__init__.py +14 -0
  96. aiq/data_models/api_server.py +694 -0
  97. aiq/data_models/authentication.py +231 -0
  98. aiq/data_models/common.py +171 -0
  99. aiq/data_models/component.py +54 -0
  100. aiq/data_models/component_ref.py +168 -0
  101. aiq/data_models/config.py +406 -0
  102. aiq/data_models/dataset_handler.py +123 -0
  103. aiq/data_models/discovery_metadata.py +335 -0
  104. aiq/data_models/embedder.py +27 -0
  105. aiq/data_models/evaluate.py +127 -0
  106. aiq/data_models/evaluator.py +26 -0
  107. aiq/data_models/front_end.py +26 -0
  108. aiq/data_models/function.py +30 -0
  109. aiq/data_models/function_dependencies.py +72 -0
  110. aiq/data_models/interactive.py +246 -0
  111. aiq/data_models/intermediate_step.py +302 -0
  112. aiq/data_models/invocation_node.py +38 -0
  113. aiq/data_models/llm.py +27 -0
  114. aiq/data_models/logging.py +26 -0
  115. aiq/data_models/memory.py +27 -0
  116. aiq/data_models/object_store.py +44 -0
  117. aiq/data_models/profiler.py +54 -0
  118. aiq/data_models/registry_handler.py +26 -0
  119. aiq/data_models/retriever.py +30 -0
  120. aiq/data_models/retry_mixin.py +35 -0
  121. aiq/data_models/span.py +187 -0
  122. aiq/data_models/step_adaptor.py +64 -0
  123. aiq/data_models/streaming.py +33 -0
  124. aiq/data_models/swe_bench_model.py +54 -0
  125. aiq/data_models/telemetry_exporter.py +26 -0
  126. aiq/data_models/ttc_strategy.py +30 -0
  127. aiq/embedder/__init__.py +0 -0
  128. aiq/embedder/langchain_client.py +41 -0
  129. aiq/embedder/nim_embedder.py +59 -0
  130. aiq/embedder/openai_embedder.py +43 -0
  131. aiq/embedder/register.py +24 -0
  132. aiq/eval/__init__.py +14 -0
  133. aiq/eval/config.py +60 -0
  134. aiq/eval/dataset_handler/__init__.py +0 -0
  135. aiq/eval/dataset_handler/dataset_downloader.py +106 -0
  136. aiq/eval/dataset_handler/dataset_filter.py +52 -0
  137. aiq/eval/dataset_handler/dataset_handler.py +254 -0
  138. aiq/eval/evaluate.py +506 -0
  139. aiq/eval/evaluator/__init__.py +14 -0
  140. aiq/eval/evaluator/base_evaluator.py +73 -0
  141. aiq/eval/evaluator/evaluator_model.py +45 -0
  142. aiq/eval/intermediate_step_adapter.py +99 -0
  143. aiq/eval/rag_evaluator/__init__.py +0 -0
  144. aiq/eval/rag_evaluator/evaluate.py +178 -0
  145. aiq/eval/rag_evaluator/register.py +143 -0
  146. aiq/eval/register.py +23 -0
  147. aiq/eval/remote_workflow.py +133 -0
  148. aiq/eval/runners/__init__.py +14 -0
  149. aiq/eval/runners/config.py +39 -0
  150. aiq/eval/runners/multi_eval_runner.py +54 -0
  151. aiq/eval/runtime_event_subscriber.py +52 -0
  152. aiq/eval/swe_bench_evaluator/__init__.py +0 -0
  153. aiq/eval/swe_bench_evaluator/evaluate.py +215 -0
  154. aiq/eval/swe_bench_evaluator/register.py +36 -0
  155. aiq/eval/trajectory_evaluator/__init__.py +0 -0
  156. aiq/eval/trajectory_evaluator/evaluate.py +75 -0
  157. aiq/eval/trajectory_evaluator/register.py +40 -0
  158. aiq/eval/tunable_rag_evaluator/__init__.py +0 -0
  159. aiq/eval/tunable_rag_evaluator/evaluate.py +245 -0
  160. aiq/eval/tunable_rag_evaluator/register.py +52 -0
  161. aiq/eval/usage_stats.py +41 -0
  162. aiq/eval/utils/__init__.py +0 -0
  163. aiq/eval/utils/output_uploader.py +140 -0
  164. aiq/eval/utils/tqdm_position_registry.py +40 -0
  165. aiq/eval/utils/weave_eval.py +184 -0
  166. aiq/experimental/__init__.py +0 -0
  167. aiq/experimental/decorators/__init__.py +0 -0
  168. aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
  169. aiq/experimental/test_time_compute/__init__.py +0 -0
  170. aiq/experimental/test_time_compute/editing/__init__.py +0 -0
  171. aiq/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +147 -0
  172. aiq/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +204 -0
  173. aiq/experimental/test_time_compute/editing/motivation_aware_summarization.py +107 -0
  174. aiq/experimental/test_time_compute/functions/__init__.py +0 -0
  175. aiq/experimental/test_time_compute/functions/execute_score_select_function.py +105 -0
  176. aiq/experimental/test_time_compute/functions/its_tool_orchestration_function.py +205 -0
  177. aiq/experimental/test_time_compute/functions/its_tool_wrapper_function.py +146 -0
  178. aiq/experimental/test_time_compute/functions/plan_select_execute_function.py +224 -0
  179. aiq/experimental/test_time_compute/models/__init__.py +0 -0
  180. aiq/experimental/test_time_compute/models/editor_config.py +132 -0
  181. aiq/experimental/test_time_compute/models/scoring_config.py +112 -0
  182. aiq/experimental/test_time_compute/models/search_config.py +120 -0
  183. aiq/experimental/test_time_compute/models/selection_config.py +154 -0
  184. aiq/experimental/test_time_compute/models/stage_enums.py +43 -0
  185. aiq/experimental/test_time_compute/models/strategy_base.py +66 -0
  186. aiq/experimental/test_time_compute/models/tool_use_config.py +41 -0
  187. aiq/experimental/test_time_compute/models/ttc_item.py +48 -0
  188. aiq/experimental/test_time_compute/register.py +36 -0
  189. aiq/experimental/test_time_compute/scoring/__init__.py +0 -0
  190. aiq/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +168 -0
  191. aiq/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +168 -0
  192. aiq/experimental/test_time_compute/scoring/motivation_aware_scorer.py +111 -0
  193. aiq/experimental/test_time_compute/search/__init__.py +0 -0
  194. aiq/experimental/test_time_compute/search/multi_llm_planner.py +128 -0
  195. aiq/experimental/test_time_compute/search/multi_query_retrieval_search.py +122 -0
  196. aiq/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +128 -0
  197. aiq/experimental/test_time_compute/selection/__init__.py +0 -0
  198. aiq/experimental/test_time_compute/selection/best_of_n_selector.py +63 -0
  199. aiq/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +131 -0
  200. aiq/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +159 -0
  201. aiq/experimental/test_time_compute/selection/llm_based_plan_selector.py +128 -0
  202. aiq/experimental/test_time_compute/selection/threshold_selector.py +58 -0
  203. aiq/front_ends/__init__.py +14 -0
  204. aiq/front_ends/console/__init__.py +14 -0
  205. aiq/front_ends/console/authentication_flow_handler.py +233 -0
  206. aiq/front_ends/console/console_front_end_config.py +32 -0
  207. aiq/front_ends/console/console_front_end_plugin.py +96 -0
  208. aiq/front_ends/console/register.py +25 -0
  209. aiq/front_ends/cron/__init__.py +14 -0
  210. aiq/front_ends/fastapi/__init__.py +14 -0
  211. aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
  212. aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
  213. aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
  214. aiq/front_ends/fastapi/fastapi_front_end_config.py +234 -0
  215. aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
  216. aiq/front_ends/fastapi/fastapi_front_end_plugin.py +116 -0
  217. aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +1092 -0
  218. aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
  219. aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
  220. aiq/front_ends/fastapi/intermediate_steps_subscriber.py +80 -0
  221. aiq/front_ends/fastapi/job_store.py +183 -0
  222. aiq/front_ends/fastapi/main.py +72 -0
  223. aiq/front_ends/fastapi/message_handler.py +298 -0
  224. aiq/front_ends/fastapi/message_validator.py +345 -0
  225. aiq/front_ends/fastapi/register.py +25 -0
  226. aiq/front_ends/fastapi/response_helpers.py +195 -0
  227. aiq/front_ends/fastapi/step_adaptor.py +321 -0
  228. aiq/front_ends/mcp/__init__.py +14 -0
  229. aiq/front_ends/mcp/mcp_front_end_config.py +32 -0
  230. aiq/front_ends/mcp/mcp_front_end_plugin.py +93 -0
  231. aiq/front_ends/mcp/register.py +27 -0
  232. aiq/front_ends/mcp/tool_converter.py +242 -0
  233. aiq/front_ends/register.py +22 -0
  234. aiq/front_ends/simple_base/__init__.py +14 -0
  235. aiq/front_ends/simple_base/simple_front_end_plugin_base.py +54 -0
  236. aiq/llm/__init__.py +0 -0
  237. aiq/llm/aws_bedrock_llm.py +57 -0
  238. aiq/llm/nim_llm.py +46 -0
  239. aiq/llm/openai_llm.py +46 -0
  240. aiq/llm/register.py +23 -0
  241. aiq/llm/utils/__init__.py +14 -0
  242. aiq/llm/utils/env_config_value.py +94 -0
  243. aiq/llm/utils/error.py +17 -0
  244. aiq/memory/__init__.py +20 -0
  245. aiq/memory/interfaces.py +183 -0
  246. aiq/memory/models.py +112 -0
  247. aiq/meta/module_to_distro.json +3 -0
  248. aiq/meta/pypi.md +58 -0
  249. aiq/object_store/__init__.py +20 -0
  250. aiq/object_store/in_memory_object_store.py +76 -0
  251. aiq/object_store/interfaces.py +84 -0
  252. aiq/object_store/models.py +36 -0
  253. aiq/object_store/register.py +20 -0
  254. aiq/observability/__init__.py +14 -0
  255. aiq/observability/exporter/__init__.py +14 -0
  256. aiq/observability/exporter/base_exporter.py +449 -0
  257. aiq/observability/exporter/exporter.py +78 -0
  258. aiq/observability/exporter/file_exporter.py +33 -0
  259. aiq/observability/exporter/processing_exporter.py +322 -0
  260. aiq/observability/exporter/raw_exporter.py +52 -0
  261. aiq/observability/exporter/span_exporter.py +265 -0
  262. aiq/observability/exporter_manager.py +335 -0
  263. aiq/observability/mixin/__init__.py +14 -0
  264. aiq/observability/mixin/batch_config_mixin.py +26 -0
  265. aiq/observability/mixin/collector_config_mixin.py +23 -0
  266. aiq/observability/mixin/file_mixin.py +288 -0
  267. aiq/observability/mixin/file_mode.py +23 -0
  268. aiq/observability/mixin/resource_conflict_mixin.py +134 -0
  269. aiq/observability/mixin/serialize_mixin.py +61 -0
  270. aiq/observability/mixin/type_introspection_mixin.py +183 -0
  271. aiq/observability/processor/__init__.py +14 -0
  272. aiq/observability/processor/batching_processor.py +310 -0
  273. aiq/observability/processor/callback_processor.py +42 -0
  274. aiq/observability/processor/intermediate_step_serializer.py +28 -0
  275. aiq/observability/processor/processor.py +71 -0
  276. aiq/observability/register.py +96 -0
  277. aiq/observability/utils/__init__.py +14 -0
  278. aiq/observability/utils/dict_utils.py +236 -0
  279. aiq/observability/utils/time_utils.py +31 -0
  280. aiq/plugins/.namespace +1 -0
  281. aiq/profiler/__init__.py +0 -0
  282. aiq/profiler/calc/__init__.py +14 -0
  283. aiq/profiler/calc/calc_runner.py +627 -0
  284. aiq/profiler/calc/calculations.py +288 -0
  285. aiq/profiler/calc/data_models.py +188 -0
  286. aiq/profiler/calc/plot.py +345 -0
  287. aiq/profiler/callbacks/__init__.py +0 -0
  288. aiq/profiler/callbacks/agno_callback_handler.py +295 -0
  289. aiq/profiler/callbacks/base_callback_class.py +20 -0
  290. aiq/profiler/callbacks/langchain_callback_handler.py +290 -0
  291. aiq/profiler/callbacks/llama_index_callback_handler.py +205 -0
  292. aiq/profiler/callbacks/semantic_kernel_callback_handler.py +238 -0
  293. aiq/profiler/callbacks/token_usage_base_model.py +27 -0
  294. aiq/profiler/data_frame_row.py +51 -0
  295. aiq/profiler/data_models.py +24 -0
  296. aiq/profiler/decorators/__init__.py +0 -0
  297. aiq/profiler/decorators/framework_wrapper.py +131 -0
  298. aiq/profiler/decorators/function_tracking.py +254 -0
  299. aiq/profiler/forecasting/__init__.py +0 -0
  300. aiq/profiler/forecasting/config.py +18 -0
  301. aiq/profiler/forecasting/model_trainer.py +75 -0
  302. aiq/profiler/forecasting/models/__init__.py +22 -0
  303. aiq/profiler/forecasting/models/forecasting_base_model.py +40 -0
  304. aiq/profiler/forecasting/models/linear_model.py +196 -0
  305. aiq/profiler/forecasting/models/random_forest_regressor.py +268 -0
  306. aiq/profiler/inference_metrics_model.py +28 -0
  307. aiq/profiler/inference_optimization/__init__.py +0 -0
  308. aiq/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
  309. aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +460 -0
  310. aiq/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +258 -0
  311. aiq/profiler/inference_optimization/data_models.py +386 -0
  312. aiq/profiler/inference_optimization/experimental/__init__.py +0 -0
  313. aiq/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +468 -0
  314. aiq/profiler/inference_optimization/experimental/prefix_span_analysis.py +405 -0
  315. aiq/profiler/inference_optimization/llm_metrics.py +212 -0
  316. aiq/profiler/inference_optimization/prompt_caching.py +163 -0
  317. aiq/profiler/inference_optimization/token_uniqueness.py +107 -0
  318. aiq/profiler/inference_optimization/workflow_runtimes.py +72 -0
  319. aiq/profiler/intermediate_property_adapter.py +102 -0
  320. aiq/profiler/profile_runner.py +473 -0
  321. aiq/profiler/utils.py +184 -0
  322. aiq/registry_handlers/__init__.py +0 -0
  323. aiq/registry_handlers/local/__init__.py +0 -0
  324. aiq/registry_handlers/local/local_handler.py +176 -0
  325. aiq/registry_handlers/local/register_local.py +37 -0
  326. aiq/registry_handlers/metadata_factory.py +60 -0
  327. aiq/registry_handlers/package_utils.py +567 -0
  328. aiq/registry_handlers/pypi/__init__.py +0 -0
  329. aiq/registry_handlers/pypi/pypi_handler.py +251 -0
  330. aiq/registry_handlers/pypi/register_pypi.py +40 -0
  331. aiq/registry_handlers/register.py +21 -0
  332. aiq/registry_handlers/registry_handler_base.py +157 -0
  333. aiq/registry_handlers/rest/__init__.py +0 -0
  334. aiq/registry_handlers/rest/register_rest.py +56 -0
  335. aiq/registry_handlers/rest/rest_handler.py +237 -0
  336. aiq/registry_handlers/schemas/__init__.py +0 -0
  337. aiq/registry_handlers/schemas/headers.py +42 -0
  338. aiq/registry_handlers/schemas/package.py +68 -0
  339. aiq/registry_handlers/schemas/publish.py +63 -0
  340. aiq/registry_handlers/schemas/pull.py +82 -0
  341. aiq/registry_handlers/schemas/remove.py +36 -0
  342. aiq/registry_handlers/schemas/search.py +91 -0
  343. aiq/registry_handlers/schemas/status.py +47 -0
  344. aiq/retriever/__init__.py +0 -0
  345. aiq/retriever/interface.py +37 -0
  346. aiq/retriever/milvus/__init__.py +14 -0
  347. aiq/retriever/milvus/register.py +81 -0
  348. aiq/retriever/milvus/retriever.py +228 -0
  349. aiq/retriever/models.py +74 -0
  350. aiq/retriever/nemo_retriever/__init__.py +14 -0
  351. aiq/retriever/nemo_retriever/register.py +60 -0
  352. aiq/retriever/nemo_retriever/retriever.py +190 -0
  353. aiq/retriever/register.py +22 -0
  354. aiq/runtime/__init__.py +14 -0
  355. aiq/runtime/loader.py +215 -0
  356. aiq/runtime/runner.py +190 -0
  357. aiq/runtime/session.py +158 -0
  358. aiq/runtime/user_metadata.py +130 -0
  359. aiq/settings/__init__.py +0 -0
  360. aiq/settings/global_settings.py +318 -0
  361. aiq/test/.namespace +1 -0
  362. aiq/tool/__init__.py +0 -0
  363. aiq/tool/chat_completion.py +74 -0
  364. aiq/tool/code_execution/README.md +151 -0
  365. aiq/tool/code_execution/__init__.py +0 -0
  366. aiq/tool/code_execution/code_sandbox.py +267 -0
  367. aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
  368. aiq/tool/code_execution/local_sandbox/Dockerfile.sandbox +60 -0
  369. aiq/tool/code_execution/local_sandbox/__init__.py +13 -0
  370. aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +198 -0
  371. aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +6 -0
  372. aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +50 -0
  373. aiq/tool/code_execution/register.py +74 -0
  374. aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
  375. aiq/tool/code_execution/utils.py +100 -0
  376. aiq/tool/datetime_tools.py +42 -0
  377. aiq/tool/document_search.py +141 -0
  378. aiq/tool/github_tools/__init__.py +0 -0
  379. aiq/tool/github_tools/create_github_commit.py +133 -0
  380. aiq/tool/github_tools/create_github_issue.py +87 -0
  381. aiq/tool/github_tools/create_github_pr.py +106 -0
  382. aiq/tool/github_tools/get_github_file.py +106 -0
  383. aiq/tool/github_tools/get_github_issue.py +166 -0
  384. aiq/tool/github_tools/get_github_pr.py +256 -0
  385. aiq/tool/github_tools/update_github_issue.py +100 -0
  386. aiq/tool/mcp/__init__.py +14 -0
  387. aiq/tool/mcp/exceptions.py +142 -0
  388. aiq/tool/mcp/mcp_client.py +255 -0
  389. aiq/tool/mcp/mcp_tool.py +96 -0
  390. aiq/tool/memory_tools/__init__.py +0 -0
  391. aiq/tool/memory_tools/add_memory_tool.py +79 -0
  392. aiq/tool/memory_tools/delete_memory_tool.py +67 -0
  393. aiq/tool/memory_tools/get_memory_tool.py +72 -0
  394. aiq/tool/nvidia_rag.py +95 -0
  395. aiq/tool/register.py +38 -0
  396. aiq/tool/retriever.py +89 -0
  397. aiq/tool/server_tools.py +66 -0
  398. aiq/utils/__init__.py +0 -0
  399. aiq/utils/data_models/__init__.py +0 -0
  400. aiq/utils/data_models/schema_validator.py +58 -0
  401. aiq/utils/debugging_utils.py +43 -0
  402. aiq/utils/dump_distro_mapping.py +32 -0
  403. aiq/utils/exception_handlers/__init__.py +0 -0
  404. aiq/utils/exception_handlers/automatic_retries.py +289 -0
  405. aiq/utils/exception_handlers/mcp.py +211 -0
  406. aiq/utils/exception_handlers/schemas.py +114 -0
  407. aiq/utils/io/__init__.py +0 -0
  408. aiq/utils/io/model_processing.py +28 -0
  409. aiq/utils/io/yaml_tools.py +119 -0
  410. aiq/utils/log_utils.py +37 -0
  411. aiq/utils/metadata_utils.py +74 -0
  412. aiq/utils/optional_imports.py +142 -0
  413. aiq/utils/producer_consumer_queue.py +178 -0
  414. aiq/utils/reactive/__init__.py +0 -0
  415. aiq/utils/reactive/base/__init__.py +0 -0
  416. aiq/utils/reactive/base/observable_base.py +65 -0
  417. aiq/utils/reactive/base/observer_base.py +55 -0
  418. aiq/utils/reactive/base/subject_base.py +79 -0
  419. aiq/utils/reactive/observable.py +59 -0
  420. aiq/utils/reactive/observer.py +76 -0
  421. aiq/utils/reactive/subject.py +131 -0
  422. aiq/utils/reactive/subscription.py +49 -0
  423. aiq/utils/settings/__init__.py +0 -0
  424. aiq/utils/settings/global_settings.py +197 -0
  425. aiq/utils/string_utils.py +38 -0
  426. aiq/utils/type_converter.py +290 -0
  427. aiq/utils/type_utils.py +484 -0
  428. aiq/utils/url_utils.py +27 -0
  429. nvidia_nat-1.2.0rc5.dist-info/METADATA +363 -0
  430. nvidia_nat-1.2.0rc5.dist-info/RECORD +435 -0
  431. nvidia_nat-1.2.0rc5.dist-info/WHEEL +5 -0
  432. nvidia_nat-1.2.0rc5.dist-info/entry_points.txt +20 -0
  433. nvidia_nat-1.2.0rc5.dist-info/licenses/LICENSE-3rd-party.txt +3686 -0
  434. nvidia_nat-1.2.0rc5.dist-info/licenses/LICENSE.md +201 -0
  435. nvidia_nat-1.2.0rc5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,59 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import typing
17
+
18
+ from pydantic import AfterValidator
19
+ from pydantic import AliasChoices
20
+ from pydantic import ConfigDict
21
+ from pydantic import Field
22
+
23
+ from aiq.builder.builder import Builder
24
+ from aiq.builder.embedder import EmbedderProviderInfo
25
+ from aiq.cli.register_workflow import register_embedder_provider
26
+ from aiq.data_models.embedder import EmbedderBaseConfig
27
+ from aiq.data_models.retry_mixin import RetryMixin
28
+
29
+ allowed_truncate_values = ["NONE", "START", "END"]
30
+
31
+
32
+ def option_in_allowed_values(v):
33
+ """Ensures option is allowed"""
34
+ assert v in allowed_truncate_values
35
+ return v
36
+
37
+
38
+ TruncationOption = typing.Annotated[str, AfterValidator(option_in_allowed_values)]
39
+
40
+
41
+ class NIMEmbedderModelConfig(EmbedderBaseConfig, RetryMixin, name="nim"):
42
+ """A NVIDIA Inference Microservice (NIM) embedder provider to be used with an embedder client."""
43
+
44
+ api_key: str | None = Field(default=None, description="NVIDIA API key to interact with hosted NIM.")
45
+ base_url: str | None = Field(default=None, description="Base url to the hosted NIM.")
46
+ model_name: str = Field(validation_alias=AliasChoices("model_name", "model"),
47
+ serialization_alias="model",
48
+ description="The model name for the hosted NIM.")
49
+ truncate: TruncationOption = Field(default="NONE",
50
+ description=("The truncation strategy if the input on the "
51
+ "server side if it's too large."))
52
+
53
+ model_config = ConfigDict(protected_namespaces=())
54
+
55
+
56
+ @register_embedder_provider(config_type=NIMEmbedderModelConfig)
57
+ async def nim_embedder_model(embedder_config: NIMEmbedderModelConfig, builder: Builder):
58
+
59
+ yield EmbedderProviderInfo(config=embedder_config, description="A NIM model for use with an Embedder client.")
@@ -0,0 +1,43 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from pydantic import AliasChoices
17
+ from pydantic import ConfigDict
18
+ from pydantic import Field
19
+
20
+ from aiq.builder.builder import Builder
21
+ from aiq.builder.embedder import EmbedderProviderInfo
22
+ from aiq.cli.register_workflow import register_embedder_provider
23
+ from aiq.data_models.embedder import EmbedderBaseConfig
24
+ from aiq.data_models.retry_mixin import RetryMixin
25
+
26
+
27
+ class OpenAIEmbedderModelConfig(EmbedderBaseConfig, RetryMixin, name="openai"):
28
+ """An OpenAI LLM provider to be used with an LLM client."""
29
+
30
+ model_config = ConfigDict(protected_namespaces=())
31
+
32
+ api_key: str | None = Field(default=None, description="OpenAI API key to interact with hosted model.")
33
+ base_url: str | None = Field(default=None, description="Base url to the hosted model.")
34
+ model_name: str = Field(validation_alias=AliasChoices("model_name", "model"),
35
+ serialization_alias="model",
36
+ description="The OpenAI hosted model name.")
37
+ max_retries: int = Field(default=2, description="The max number of retries for the request.")
38
+
39
+
40
+ @register_embedder_provider(config_type=OpenAIEmbedderModelConfig)
41
+ async def openai_llm(config: OpenAIEmbedderModelConfig, builder: Builder):
42
+
43
+ yield EmbedderProviderInfo(config=config, description="An OpenAI model for use with an Embedder client.")
@@ -0,0 +1,24 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ # pylint: disable=unused-import
17
+ # flake8: noqa
18
+ # isort:skip_file
19
+
20
+ # Import any providers which need to be automatically registered here
21
+ from . import nim_embedder
22
+ from . import openai_embedder
23
+ # Import any clients which need to be automatically registered here
24
+ from . import langchain_client
aiq/eval/__init__.py ADDED
@@ -0,0 +1,14 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
aiq/eval/config.py ADDED
@@ -0,0 +1,60 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from pathlib import Path
17
+
18
+ from pydantic import BaseModel
19
+
20
+ from aiq.eval.evaluator.evaluator_model import EvalInput
21
+ from aiq.eval.evaluator.evaluator_model import EvalOutput
22
+ from aiq.eval.usage_stats import UsageStats
23
+ from aiq.profiler.data_models import ProfilerResults
24
+
25
+
26
+ class EvaluationRunConfig(BaseModel):
27
+ """
28
+ Parameters used for a single evaluation run.
29
+ """
30
+ config_file: Path
31
+ dataset: str | None = None # dataset file path can be specified in the config file
32
+ result_json_path: str = "$"
33
+ skip_workflow: bool = False
34
+ skip_completed_entries: bool = False
35
+ endpoint: str | None = None # only used when running the workflow remotely
36
+ endpoint_timeout: int = 300
37
+ reps: int = 1
38
+ override: tuple[tuple[str, str], ...] = ()
39
+ # If false, the output will not be written to the output directory. This is
40
+ # useful when running evaluation via another tool.
41
+ write_output: bool = True
42
+ # if true, the dataset is adjusted to a multiple of the concurrency
43
+ adjust_dataset_size: bool = False
44
+ # number of passes at each concurrency, if 0 the dataset is adjusted to a multiple of the
45
+ # concurrency. The is only used if adjust_dataset_size is true
46
+ num_passes: int = 0
47
+
48
+
49
+ class EvaluationRunOutput(BaseModel):
50
+ """
51
+ Output of a single evaluation run.
52
+ """
53
+ workflow_output_file: Path | None
54
+ evaluator_output_files: list[Path]
55
+ workflow_interrupted: bool
56
+
57
+ eval_input: EvalInput
58
+ evaluation_results: list[tuple[str, EvalOutput]]
59
+ usage_stats: UsageStats | None = None
60
+ profiler_results: ProfilerResults
File without changes
@@ -0,0 +1,106 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ import logging
16
+ from pathlib import Path
17
+
18
+ import boto3
19
+ import requests
20
+ from botocore.exceptions import NoCredentialsError
21
+
22
+ from aiq.data_models.dataset_handler import EvalDatasetConfig
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class DatasetDownloader:
28
+ """
29
+ Download remote datasets using signed URLs or S3 credentials.
30
+
31
+ One DatasetDownloader object is needed for each dataset to be downloaded.
32
+ """
33
+
34
+ def __init__(self, dataset_config: EvalDatasetConfig):
35
+ self.dataset_config = dataset_config
36
+ self._s3_client = None
37
+
38
+ @property
39
+ def s3_config(self):
40
+ return self.dataset_config.s3
41
+
42
+ @property
43
+ def s3_client(self):
44
+ """Lazy init the S3 client."""
45
+ if not self._s3_client:
46
+ try:
47
+ self._s3_client = boto3.client("s3",
48
+ endpoint_url=self.s3_config.endpoint_url,
49
+ aws_access_key_id=self.s3_config.access_key,
50
+ aws_secret_access_key=self.s3_config.secret_key)
51
+ except NoCredentialsError as e:
52
+ logger.error("AWS credentials not available: %s", e)
53
+ raise
54
+ except Exception as e:
55
+ logger.error("Failed to initialize S3 client: %s", e)
56
+ raise
57
+ return self._s3_client
58
+
59
+ @staticmethod
60
+ def ensure_directory_exists(file_path: str):
61
+ """Ensure the directory for the file exists."""
62
+ Path(file_path).parent.mkdir(parents=True, exist_ok=True)
63
+
64
+ def download_with_signed_url(self, remote_file_path: str, local_file_path: str, timeout: int = 300):
65
+ """Download a file using a signed URL."""
66
+ try:
67
+ response = requests.get(remote_file_path, stream=True, timeout=timeout)
68
+ response.raise_for_status()
69
+ with open(local_file_path, "wb") as file:
70
+ for chunk in response.iter_content(chunk_size=8192):
71
+ file.write(chunk)
72
+ logger.info("File downloaded successfully to %s using signed URL.", local_file_path)
73
+ except requests.exceptions.RequestException as e:
74
+ logger.error("Error downloading file using signed URL: %s", e)
75
+ raise
76
+
77
+ def download_with_boto3(self, remote_file_path: str, local_file_path: str):
78
+ """Download a file using boto3 and credentials."""
79
+ try:
80
+ self.s3_client.download_file(self.dataset_config.s3.bucket, remote_file_path, local_file_path)
81
+ logger.info("File downloaded successfully to %s using S3 client.", local_file_path)
82
+ except Exception as e:
83
+ logger.error("Error downloading file from S3: %s", e)
84
+ raise
85
+
86
+ @staticmethod
87
+ def is_file_path_url(file_path: str) -> bool:
88
+ """Check if the file path is a URL."""
89
+ return file_path.startswith("http")
90
+
91
+ def download_file(self, remote_file_path: str, local_file_path: str):
92
+ """Download a file using the appropriate method."""
93
+ self.ensure_directory_exists(local_file_path)
94
+ if self.is_file_path_url(remote_file_path):
95
+ logger.info("Using signed URL to download the file %s...", remote_file_path)
96
+ self.download_with_signed_url(remote_file_path, local_file_path, timeout=120)
97
+ else:
98
+ logger.info("Using S3 credentials to download the file %s...", remote_file_path)
99
+ self.download_with_boto3(remote_file_path, local_file_path)
100
+
101
+ def download_dataset(self):
102
+ """Download datasets defined in the evaluation configuration."""
103
+ if self.dataset_config.remote_file_path:
104
+ logger.info("Downloading remote dataset %s")
105
+ self.download_file(remote_file_path=self.dataset_config.remote_file_path,
106
+ local_file_path=self.dataset_config.file_path)
@@ -0,0 +1,52 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import pandas as pd
17
+
18
+ from aiq.data_models.dataset_handler import EvalFilterConfig
19
+
20
+
21
+ class DatasetFilter:
22
+ """
23
+ Apply allowlist and denylist filters to the DataFrame based on specified column filters.
24
+ - If a allowlist is provided, only keep rows matching the filter values.
25
+ - If a denylist is provided, remove rows matching the filter values.
26
+ - If the filter column does not exist in the DataFrame, the filtering is skipped for that column.
27
+
28
+ This is a utility class that is dataset agnostic and can be used to filter any DataFrame based on the provided
29
+ filter configuration.
30
+ """
31
+
32
+ def __init__(self, filter_config: EvalFilterConfig):
33
+
34
+ self.filter_config = filter_config
35
+
36
+ def apply_filters(self, df) -> pd.DataFrame:
37
+
38
+ filtered_df = df.copy()
39
+
40
+ # Apply allowlist (only keep specified rows)
41
+ if self.filter_config.allowlist:
42
+ for column, values in self.filter_config.allowlist.field.items():
43
+ if column in filtered_df.columns:
44
+ filtered_df = filtered_df[filtered_df[column].isin(values)]
45
+
46
+ # Apply denylist (remove specified rows)
47
+ if self.filter_config.denylist:
48
+ for column, values in self.filter_config.denylist.field.items():
49
+ if column in filtered_df.columns:
50
+ filtered_df = filtered_df[~filtered_df[column].isin(values)]
51
+
52
+ return filtered_df
@@ -0,0 +1,254 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import json
17
+ import math
18
+
19
+ import pandas as pd
20
+
21
+ from aiq.data_models.dataset_handler import EvalDatasetConfig
22
+ from aiq.data_models.dataset_handler import EvalDatasetJsonConfig
23
+ from aiq.data_models.intermediate_step import IntermediateStep
24
+ from aiq.data_models.intermediate_step import IntermediateStepType
25
+ from aiq.eval.dataset_handler.dataset_downloader import DatasetDownloader
26
+ from aiq.eval.dataset_handler.dataset_filter import DatasetFilter
27
+ from aiq.eval.evaluator.evaluator_model import EvalInput
28
+ from aiq.eval.evaluator.evaluator_model import EvalInputItem
29
+
30
+
31
+ class DatasetHandler:
32
+ """
33
+ Read the datasets and pre-process (apply filters, deduplicate etc.) before turning them into EvalInput objects.
34
+ One DatasetHandler object is needed for each dataset to be evaluated.
35
+ """
36
+
37
+ def __init__(self,
38
+ dataset_config: EvalDatasetConfig,
39
+ reps: int,
40
+ concurrency: int,
41
+ num_passes: int | None = None,
42
+ adjust_dataset_size: bool = False):
43
+ from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
44
+
45
+ self.dataset_config = dataset_config
46
+ self.dataset_filter = DatasetFilter(dataset_config.filter)
47
+ self.reps = reps
48
+
49
+ # number of passes at specific concurrency
50
+ self.concurrency = concurrency
51
+ self.num_passes = num_passes
52
+ self.adjust_dataset_size = adjust_dataset_size
53
+
54
+ # Helpers
55
+ self.intermediate_step_adapter = IntermediateStepAdapter()
56
+
57
+ def is_structured_input(self) -> bool:
58
+ '''Check if the input is structured or unstructured'''
59
+ return not self.dataset_config.structure.disable
60
+
61
+ @property
62
+ def id_key(self) -> str:
63
+ return self.dataset_config.id_key
64
+
65
+ @property
66
+ def question_key(self) -> str:
67
+ return self.dataset_config.structure.question_key
68
+
69
+ @property
70
+ def answer_key(self) -> str:
71
+ return self.dataset_config.structure.answer_key
72
+
73
+ @property
74
+ def generated_answer_key(self) -> str:
75
+ return self.dataset_config.structure.generated_answer_key
76
+
77
+ @property
78
+ def trajectory_key(self) -> str:
79
+ return self.dataset_config.structure.trajectory_key
80
+
81
+ @property
82
+ def expected_trajectory_key(self) -> str:
83
+ return self.dataset_config.structure.expected_trajectory_key
84
+
85
+ def get_eval_input_from_df(self, input_df: pd.DataFrame) -> EvalInput:
86
+
87
+ def create_eval_item(row: pd.Series, structured: bool) -> EvalInputItem:
88
+ """Helper function to create EvalInputItem."""
89
+ return EvalInputItem(
90
+ id=row.get(self.id_key, ""),
91
+ input_obj=row.to_json() if not structured else row.get(self.question_key, ""),
92
+ expected_output_obj=row.get(self.answer_key, "") if structured else "",
93
+ output_obj=row.get(self.generated_answer_key, "") if structured else "",
94
+ trajectory=row.get(self.trajectory_key, []) if structured else [],
95
+ expected_trajectory=row.get(self.expected_trajectory_key, []) if structured else [],
96
+ full_dataset_entry=row.to_dict(),
97
+ )
98
+
99
+ # if input dataframe is empty return an empty list
100
+ if input_df.empty:
101
+ return EvalInput(eval_input_items=[])
102
+
103
+ structured = self.is_structured_input()
104
+ if structured:
105
+ # For structured input, question is mandatory. Ignore rows with missing or empty questions
106
+ input_df = input_df[input_df[self.question_key].notnull() & input_df[self.question_key].str.strip().ne("")]
107
+ eval_input_items = [create_eval_item(row, structured) for _, row in input_df.iterrows()]
108
+
109
+ return EvalInput(eval_input_items=eval_input_items)
110
+
111
+ def setup_reps(self, input_df: pd.DataFrame) -> pd.DataFrame:
112
+ """replicate the rows and update the id to id_key + "_rep" + rep_number"""
113
+ # Replicate the rows
114
+ input_df = pd.concat([input_df] * self.reps, ignore_index=True)
115
+ # Compute repetition index
116
+ rep_index = input_df.groupby(self.dataset_config.id_key).cumcount().astype(str)
117
+ # Convert id_key to string (id can be integer) if needed and update IDs
118
+ input_df[self.dataset_config.id_key] = input_df[self.dataset_config.id_key].astype(str) + "_rep" + rep_index
119
+ # Ensure unique ID values after modification
120
+ input_df.drop_duplicates(subset=[self.dataset_config.id_key], inplace=True)
121
+
122
+ return input_df
123
+
124
+ def adjust_dataset(self, input_df: pd.DataFrame) -> pd.DataFrame:
125
+ """
126
+ Adjust the dataset so its length is a multiple of concurrency.
127
+
128
+ If num_passes > 0:
129
+ dataset size is adjusted to concurrency * num_passes
130
+ else:
131
+ dataset size is adjusted to the largest multiple of concurrency
132
+ that is less than or equal to the current dataset size
133
+ """
134
+ if self.concurrency <= 0:
135
+ raise ValueError("Concurrency must be > 0")
136
+
137
+ if self.num_passes < 0:
138
+ raise ValueError("num_passes must be >= 0")
139
+
140
+ original_size = input_df.shape[0]
141
+
142
+ # Calculate target size
143
+ if self.num_passes > 0:
144
+ # When num_passes is specified, always use concurrency * num_passes
145
+ # This respects the user's intent for exact number of passes
146
+ target_size = self.concurrency * self.num_passes
147
+ else:
148
+ # When num_passes = 0, use the largest multiple of concurrency <= original_size
149
+ # If original_size < concurrency, we need at least concurrency rows
150
+ if original_size >= self.concurrency:
151
+ target_size = (original_size // self.concurrency) * self.concurrency
152
+ else:
153
+ target_size = self.concurrency
154
+
155
+ if target_size == 0:
156
+ raise ValueError("Input dataset too small for even one batch at given concurrency.")
157
+
158
+ id_col = self.dataset_config.id_key
159
+
160
+ # If we need more rows than we have, replicate the dataset
161
+ if original_size < target_size:
162
+ # Clean existing _rep suffix if present
163
+ input_df[id_col] = input_df[id_col].astype(str).str.replace(r"_rep\d+$", "", regex=True)
164
+
165
+ # Calculate how many complete copies we need
166
+ copies_needed = math.ceil(target_size / original_size)
167
+
168
+ # Create the replicated dataframe
169
+ replicated_dfs = []
170
+ for i in range(copies_needed):
171
+ df_copy = input_df.copy()
172
+ if i > 0: # Add suffix to all but the first copy
173
+ df_copy[id_col] = df_copy[id_col].astype(str) + f"_rep{i}"
174
+ replicated_dfs.append(df_copy)
175
+
176
+ input_df = pd.concat(replicated_dfs, ignore_index=True)
177
+
178
+ # Return exactly the target size
179
+ return input_df.head(target_size)
180
+
181
+ def get_eval_input_from_dataset(self, dataset: str) -> EvalInput:
182
+ # read the dataset and convert it to EvalInput
183
+
184
+ # if a dataset file has been provided in the command line, use that
185
+ dataset_config = EvalDatasetJsonConfig(file_path=dataset) if dataset else self.dataset_config
186
+
187
+ # Download the dataset if it is remote
188
+ downloader = DatasetDownloader(dataset_config=dataset_config)
189
+ downloader.download_dataset()
190
+
191
+ parser, kwargs = dataset_config.parser()
192
+ # Parse the dataset into a DataFrame
193
+ input_df = parser(dataset_config.file_path, **kwargs)
194
+
195
+ # Apply filters and deduplicate
196
+ input_df = self.dataset_filter.apply_filters(input_df)
197
+ input_df.drop_duplicates(subset=[self.dataset_config.id_key], inplace=True)
198
+
199
+ if self.reps > 1 and self.adjust_dataset_size:
200
+ raise ValueError("reps and adjust_dataset_size are mutually exclusive")
201
+
202
+ # If more than one repetition is needed, replicate the rows
203
+ if self.reps > 1:
204
+ input_df = self.setup_reps(input_df)
205
+ elif self.adjust_dataset_size:
206
+ input_df = self.adjust_dataset(input_df)
207
+
208
+ # Convert the DataFrame to a list of EvalInput objects
209
+ return self.get_eval_input_from_df(input_df)
210
+
211
+ def filter_intermediate_steps(self,
212
+ intermediate_steps: list[IntermediateStep],
213
+ event_filter: list[IntermediateStepType] = None) -> list[dict]:
214
+ """
215
+ Filter out the intermediate steps that are not relevant for evaluation.
216
+ The output is written with with the intention of re-running the evaluation using the original config file.
217
+ """
218
+ if event_filter is None:
219
+ event_filter = self.intermediate_step_adapter.DEFAULT_EVENT_FILTER
220
+ filtered_steps = self.intermediate_step_adapter.filter_intermediate_steps(intermediate_steps, event_filter)
221
+ return self.intermediate_step_adapter.serialize_intermediate_steps(filtered_steps)
222
+
223
+ def publish_eval_input(self, eval_input, workflow_output_step_filter: list[IntermediateStepType] = None) -> str:
224
+ """
225
+ Convert the EvalInput object to a JSON output for storing in a file. Use the orginal keys to
226
+ allow re-running evaluation using the orignal config file and '--skip_workflow' option.
227
+ """
228
+
229
+ def parse_if_json_string(value):
230
+ if isinstance(value, str):
231
+ try:
232
+ return json.loads(value)
233
+ except json.JSONDecodeError:
234
+ return value
235
+ if hasattr(value, "model_dump"):
236
+ return value.model_dump()
237
+ return value
238
+
239
+ indent = 2
240
+ if self.is_structured_input():
241
+ # Extract structured data from EvalInputItems
242
+ data = [{
243
+ self.id_key: item.id,
244
+ self.question_key: item.input_obj,
245
+ self.answer_key: item.expected_output_obj,
246
+ self.generated_answer_key: item.output_obj,
247
+ self.trajectory_key: self.filter_intermediate_steps(item.trajectory, workflow_output_step_filter),
248
+ self.expected_trajectory_key: self.filter_intermediate_steps(item.expected_trajectory),
249
+ } for item in eval_input.eval_input_items]
250
+ else:
251
+ # Unstructured case: return only raw output objects as a JSON array
252
+ data = [parse_if_json_string(item.output_obj) for item in eval_input.eval_input_items]
253
+
254
+ return json.dumps(data, indent=indent, ensure_ascii=False, default=str)