nvidia-nat 1.2.0rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. aiq/agent/__init__.py +0 -0
  2. aiq/agent/base.py +239 -0
  3. aiq/agent/dual_node.py +67 -0
  4. aiq/agent/react_agent/__init__.py +0 -0
  5. aiq/agent/react_agent/agent.py +355 -0
  6. aiq/agent/react_agent/output_parser.py +104 -0
  7. aiq/agent/react_agent/prompt.py +41 -0
  8. aiq/agent/react_agent/register.py +149 -0
  9. aiq/agent/reasoning_agent/__init__.py +0 -0
  10. aiq/agent/reasoning_agent/reasoning_agent.py +225 -0
  11. aiq/agent/register.py +23 -0
  12. aiq/agent/rewoo_agent/__init__.py +0 -0
  13. aiq/agent/rewoo_agent/agent.py +411 -0
  14. aiq/agent/rewoo_agent/prompt.py +108 -0
  15. aiq/agent/rewoo_agent/register.py +158 -0
  16. aiq/agent/tool_calling_agent/__init__.py +0 -0
  17. aiq/agent/tool_calling_agent/agent.py +119 -0
  18. aiq/agent/tool_calling_agent/register.py +106 -0
  19. aiq/authentication/__init__.py +14 -0
  20. aiq/authentication/api_key/__init__.py +14 -0
  21. aiq/authentication/api_key/api_key_auth_provider.py +96 -0
  22. aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
  23. aiq/authentication/api_key/register.py +26 -0
  24. aiq/authentication/exceptions/__init__.py +14 -0
  25. aiq/authentication/exceptions/api_key_exceptions.py +38 -0
  26. aiq/authentication/http_basic_auth/__init__.py +0 -0
  27. aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
  28. aiq/authentication/http_basic_auth/register.py +30 -0
  29. aiq/authentication/interfaces.py +93 -0
  30. aiq/authentication/oauth2/__init__.py +14 -0
  31. aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
  32. aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
  33. aiq/authentication/oauth2/register.py +25 -0
  34. aiq/authentication/register.py +21 -0
  35. aiq/builder/__init__.py +0 -0
  36. aiq/builder/builder.py +285 -0
  37. aiq/builder/component_utils.py +316 -0
  38. aiq/builder/context.py +264 -0
  39. aiq/builder/embedder.py +24 -0
  40. aiq/builder/eval_builder.py +161 -0
  41. aiq/builder/evaluator.py +29 -0
  42. aiq/builder/framework_enum.py +24 -0
  43. aiq/builder/front_end.py +73 -0
  44. aiq/builder/function.py +344 -0
  45. aiq/builder/function_base.py +380 -0
  46. aiq/builder/function_info.py +627 -0
  47. aiq/builder/intermediate_step_manager.py +174 -0
  48. aiq/builder/llm.py +25 -0
  49. aiq/builder/retriever.py +25 -0
  50. aiq/builder/user_interaction_manager.py +74 -0
  51. aiq/builder/workflow.py +148 -0
  52. aiq/builder/workflow_builder.py +1117 -0
  53. aiq/cli/__init__.py +14 -0
  54. aiq/cli/cli_utils/__init__.py +0 -0
  55. aiq/cli/cli_utils/config_override.py +231 -0
  56. aiq/cli/cli_utils/validation.py +37 -0
  57. aiq/cli/commands/__init__.py +0 -0
  58. aiq/cli/commands/configure/__init__.py +0 -0
  59. aiq/cli/commands/configure/channel/__init__.py +0 -0
  60. aiq/cli/commands/configure/channel/add.py +28 -0
  61. aiq/cli/commands/configure/channel/channel.py +36 -0
  62. aiq/cli/commands/configure/channel/remove.py +30 -0
  63. aiq/cli/commands/configure/channel/update.py +30 -0
  64. aiq/cli/commands/configure/configure.py +33 -0
  65. aiq/cli/commands/evaluate.py +139 -0
  66. aiq/cli/commands/info/__init__.py +14 -0
  67. aiq/cli/commands/info/info.py +39 -0
  68. aiq/cli/commands/info/list_channels.py +32 -0
  69. aiq/cli/commands/info/list_components.py +129 -0
  70. aiq/cli/commands/info/list_mcp.py +213 -0
  71. aiq/cli/commands/registry/__init__.py +14 -0
  72. aiq/cli/commands/registry/publish.py +88 -0
  73. aiq/cli/commands/registry/pull.py +118 -0
  74. aiq/cli/commands/registry/registry.py +38 -0
  75. aiq/cli/commands/registry/remove.py +108 -0
  76. aiq/cli/commands/registry/search.py +155 -0
  77. aiq/cli/commands/sizing/__init__.py +14 -0
  78. aiq/cli/commands/sizing/calc.py +297 -0
  79. aiq/cli/commands/sizing/sizing.py +27 -0
  80. aiq/cli/commands/start.py +246 -0
  81. aiq/cli/commands/uninstall.py +81 -0
  82. aiq/cli/commands/validate.py +47 -0
  83. aiq/cli/commands/workflow/__init__.py +14 -0
  84. aiq/cli/commands/workflow/templates/__init__.py.j2 +0 -0
  85. aiq/cli/commands/workflow/templates/config.yml.j2 +16 -0
  86. aiq/cli/commands/workflow/templates/pyproject.toml.j2 +22 -0
  87. aiq/cli/commands/workflow/templates/register.py.j2 +5 -0
  88. aiq/cli/commands/workflow/templates/workflow.py.j2 +36 -0
  89. aiq/cli/commands/workflow/workflow.py +37 -0
  90. aiq/cli/commands/workflow/workflow_commands.py +313 -0
  91. aiq/cli/entrypoint.py +135 -0
  92. aiq/cli/main.py +44 -0
  93. aiq/cli/register_workflow.py +488 -0
  94. aiq/cli/type_registry.py +1000 -0
  95. aiq/data_models/__init__.py +14 -0
  96. aiq/data_models/api_server.py +694 -0
  97. aiq/data_models/authentication.py +231 -0
  98. aiq/data_models/common.py +171 -0
  99. aiq/data_models/component.py +54 -0
  100. aiq/data_models/component_ref.py +168 -0
  101. aiq/data_models/config.py +406 -0
  102. aiq/data_models/dataset_handler.py +123 -0
  103. aiq/data_models/discovery_metadata.py +335 -0
  104. aiq/data_models/embedder.py +27 -0
  105. aiq/data_models/evaluate.py +127 -0
  106. aiq/data_models/evaluator.py +26 -0
  107. aiq/data_models/front_end.py +26 -0
  108. aiq/data_models/function.py +30 -0
  109. aiq/data_models/function_dependencies.py +72 -0
  110. aiq/data_models/interactive.py +246 -0
  111. aiq/data_models/intermediate_step.py +302 -0
  112. aiq/data_models/invocation_node.py +38 -0
  113. aiq/data_models/llm.py +27 -0
  114. aiq/data_models/logging.py +26 -0
  115. aiq/data_models/memory.py +27 -0
  116. aiq/data_models/object_store.py +44 -0
  117. aiq/data_models/profiler.py +54 -0
  118. aiq/data_models/registry_handler.py +26 -0
  119. aiq/data_models/retriever.py +30 -0
  120. aiq/data_models/retry_mixin.py +35 -0
  121. aiq/data_models/span.py +187 -0
  122. aiq/data_models/step_adaptor.py +64 -0
  123. aiq/data_models/streaming.py +33 -0
  124. aiq/data_models/swe_bench_model.py +54 -0
  125. aiq/data_models/telemetry_exporter.py +26 -0
  126. aiq/data_models/ttc_strategy.py +30 -0
  127. aiq/embedder/__init__.py +0 -0
  128. aiq/embedder/langchain_client.py +41 -0
  129. aiq/embedder/nim_embedder.py +59 -0
  130. aiq/embedder/openai_embedder.py +43 -0
  131. aiq/embedder/register.py +24 -0
  132. aiq/eval/__init__.py +14 -0
  133. aiq/eval/config.py +60 -0
  134. aiq/eval/dataset_handler/__init__.py +0 -0
  135. aiq/eval/dataset_handler/dataset_downloader.py +106 -0
  136. aiq/eval/dataset_handler/dataset_filter.py +52 -0
  137. aiq/eval/dataset_handler/dataset_handler.py +254 -0
  138. aiq/eval/evaluate.py +506 -0
  139. aiq/eval/evaluator/__init__.py +14 -0
  140. aiq/eval/evaluator/base_evaluator.py +73 -0
  141. aiq/eval/evaluator/evaluator_model.py +45 -0
  142. aiq/eval/intermediate_step_adapter.py +99 -0
  143. aiq/eval/rag_evaluator/__init__.py +0 -0
  144. aiq/eval/rag_evaluator/evaluate.py +178 -0
  145. aiq/eval/rag_evaluator/register.py +143 -0
  146. aiq/eval/register.py +23 -0
  147. aiq/eval/remote_workflow.py +133 -0
  148. aiq/eval/runners/__init__.py +14 -0
  149. aiq/eval/runners/config.py +39 -0
  150. aiq/eval/runners/multi_eval_runner.py +54 -0
  151. aiq/eval/runtime_event_subscriber.py +52 -0
  152. aiq/eval/swe_bench_evaluator/__init__.py +0 -0
  153. aiq/eval/swe_bench_evaluator/evaluate.py +215 -0
  154. aiq/eval/swe_bench_evaluator/register.py +36 -0
  155. aiq/eval/trajectory_evaluator/__init__.py +0 -0
  156. aiq/eval/trajectory_evaluator/evaluate.py +75 -0
  157. aiq/eval/trajectory_evaluator/register.py +40 -0
  158. aiq/eval/tunable_rag_evaluator/__init__.py +0 -0
  159. aiq/eval/tunable_rag_evaluator/evaluate.py +245 -0
  160. aiq/eval/tunable_rag_evaluator/register.py +52 -0
  161. aiq/eval/usage_stats.py +41 -0
  162. aiq/eval/utils/__init__.py +0 -0
  163. aiq/eval/utils/output_uploader.py +140 -0
  164. aiq/eval/utils/tqdm_position_registry.py +40 -0
  165. aiq/eval/utils/weave_eval.py +184 -0
  166. aiq/experimental/__init__.py +0 -0
  167. aiq/experimental/decorators/__init__.py +0 -0
  168. aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
  169. aiq/experimental/test_time_compute/__init__.py +0 -0
  170. aiq/experimental/test_time_compute/editing/__init__.py +0 -0
  171. aiq/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +147 -0
  172. aiq/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +204 -0
  173. aiq/experimental/test_time_compute/editing/motivation_aware_summarization.py +107 -0
  174. aiq/experimental/test_time_compute/functions/__init__.py +0 -0
  175. aiq/experimental/test_time_compute/functions/execute_score_select_function.py +105 -0
  176. aiq/experimental/test_time_compute/functions/its_tool_orchestration_function.py +205 -0
  177. aiq/experimental/test_time_compute/functions/its_tool_wrapper_function.py +146 -0
  178. aiq/experimental/test_time_compute/functions/plan_select_execute_function.py +224 -0
  179. aiq/experimental/test_time_compute/models/__init__.py +0 -0
  180. aiq/experimental/test_time_compute/models/editor_config.py +132 -0
  181. aiq/experimental/test_time_compute/models/scoring_config.py +112 -0
  182. aiq/experimental/test_time_compute/models/search_config.py +120 -0
  183. aiq/experimental/test_time_compute/models/selection_config.py +154 -0
  184. aiq/experimental/test_time_compute/models/stage_enums.py +43 -0
  185. aiq/experimental/test_time_compute/models/strategy_base.py +66 -0
  186. aiq/experimental/test_time_compute/models/tool_use_config.py +41 -0
  187. aiq/experimental/test_time_compute/models/ttc_item.py +48 -0
  188. aiq/experimental/test_time_compute/register.py +36 -0
  189. aiq/experimental/test_time_compute/scoring/__init__.py +0 -0
  190. aiq/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +168 -0
  191. aiq/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +168 -0
  192. aiq/experimental/test_time_compute/scoring/motivation_aware_scorer.py +111 -0
  193. aiq/experimental/test_time_compute/search/__init__.py +0 -0
  194. aiq/experimental/test_time_compute/search/multi_llm_planner.py +128 -0
  195. aiq/experimental/test_time_compute/search/multi_query_retrieval_search.py +122 -0
  196. aiq/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +128 -0
  197. aiq/experimental/test_time_compute/selection/__init__.py +0 -0
  198. aiq/experimental/test_time_compute/selection/best_of_n_selector.py +63 -0
  199. aiq/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +131 -0
  200. aiq/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +159 -0
  201. aiq/experimental/test_time_compute/selection/llm_based_plan_selector.py +128 -0
  202. aiq/experimental/test_time_compute/selection/threshold_selector.py +58 -0
  203. aiq/front_ends/__init__.py +14 -0
  204. aiq/front_ends/console/__init__.py +14 -0
  205. aiq/front_ends/console/authentication_flow_handler.py +233 -0
  206. aiq/front_ends/console/console_front_end_config.py +32 -0
  207. aiq/front_ends/console/console_front_end_plugin.py +96 -0
  208. aiq/front_ends/console/register.py +25 -0
  209. aiq/front_ends/cron/__init__.py +14 -0
  210. aiq/front_ends/fastapi/__init__.py +14 -0
  211. aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
  212. aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
  213. aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
  214. aiq/front_ends/fastapi/fastapi_front_end_config.py +234 -0
  215. aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
  216. aiq/front_ends/fastapi/fastapi_front_end_plugin.py +116 -0
  217. aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +1092 -0
  218. aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
  219. aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
  220. aiq/front_ends/fastapi/intermediate_steps_subscriber.py +80 -0
  221. aiq/front_ends/fastapi/job_store.py +183 -0
  222. aiq/front_ends/fastapi/main.py +72 -0
  223. aiq/front_ends/fastapi/message_handler.py +298 -0
  224. aiq/front_ends/fastapi/message_validator.py +345 -0
  225. aiq/front_ends/fastapi/register.py +25 -0
  226. aiq/front_ends/fastapi/response_helpers.py +195 -0
  227. aiq/front_ends/fastapi/step_adaptor.py +321 -0
  228. aiq/front_ends/mcp/__init__.py +14 -0
  229. aiq/front_ends/mcp/mcp_front_end_config.py +32 -0
  230. aiq/front_ends/mcp/mcp_front_end_plugin.py +93 -0
  231. aiq/front_ends/mcp/register.py +27 -0
  232. aiq/front_ends/mcp/tool_converter.py +242 -0
  233. aiq/front_ends/register.py +22 -0
  234. aiq/front_ends/simple_base/__init__.py +14 -0
  235. aiq/front_ends/simple_base/simple_front_end_plugin_base.py +54 -0
  236. aiq/llm/__init__.py +0 -0
  237. aiq/llm/aws_bedrock_llm.py +57 -0
  238. aiq/llm/nim_llm.py +46 -0
  239. aiq/llm/openai_llm.py +46 -0
  240. aiq/llm/register.py +23 -0
  241. aiq/llm/utils/__init__.py +14 -0
  242. aiq/llm/utils/env_config_value.py +94 -0
  243. aiq/llm/utils/error.py +17 -0
  244. aiq/memory/__init__.py +20 -0
  245. aiq/memory/interfaces.py +183 -0
  246. aiq/memory/models.py +112 -0
  247. aiq/meta/module_to_distro.json +3 -0
  248. aiq/meta/pypi.md +58 -0
  249. aiq/object_store/__init__.py +20 -0
  250. aiq/object_store/in_memory_object_store.py +76 -0
  251. aiq/object_store/interfaces.py +84 -0
  252. aiq/object_store/models.py +36 -0
  253. aiq/object_store/register.py +20 -0
  254. aiq/observability/__init__.py +14 -0
  255. aiq/observability/exporter/__init__.py +14 -0
  256. aiq/observability/exporter/base_exporter.py +449 -0
  257. aiq/observability/exporter/exporter.py +78 -0
  258. aiq/observability/exporter/file_exporter.py +33 -0
  259. aiq/observability/exporter/processing_exporter.py +322 -0
  260. aiq/observability/exporter/raw_exporter.py +52 -0
  261. aiq/observability/exporter/span_exporter.py +265 -0
  262. aiq/observability/exporter_manager.py +335 -0
  263. aiq/observability/mixin/__init__.py +14 -0
  264. aiq/observability/mixin/batch_config_mixin.py +26 -0
  265. aiq/observability/mixin/collector_config_mixin.py +23 -0
  266. aiq/observability/mixin/file_mixin.py +288 -0
  267. aiq/observability/mixin/file_mode.py +23 -0
  268. aiq/observability/mixin/resource_conflict_mixin.py +134 -0
  269. aiq/observability/mixin/serialize_mixin.py +61 -0
  270. aiq/observability/mixin/type_introspection_mixin.py +183 -0
  271. aiq/observability/processor/__init__.py +14 -0
  272. aiq/observability/processor/batching_processor.py +310 -0
  273. aiq/observability/processor/callback_processor.py +42 -0
  274. aiq/observability/processor/intermediate_step_serializer.py +28 -0
  275. aiq/observability/processor/processor.py +71 -0
  276. aiq/observability/register.py +96 -0
  277. aiq/observability/utils/__init__.py +14 -0
  278. aiq/observability/utils/dict_utils.py +236 -0
  279. aiq/observability/utils/time_utils.py +31 -0
  280. aiq/plugins/.namespace +1 -0
  281. aiq/profiler/__init__.py +0 -0
  282. aiq/profiler/calc/__init__.py +14 -0
  283. aiq/profiler/calc/calc_runner.py +627 -0
  284. aiq/profiler/calc/calculations.py +288 -0
  285. aiq/profiler/calc/data_models.py +188 -0
  286. aiq/profiler/calc/plot.py +345 -0
  287. aiq/profiler/callbacks/__init__.py +0 -0
  288. aiq/profiler/callbacks/agno_callback_handler.py +295 -0
  289. aiq/profiler/callbacks/base_callback_class.py +20 -0
  290. aiq/profiler/callbacks/langchain_callback_handler.py +290 -0
  291. aiq/profiler/callbacks/llama_index_callback_handler.py +205 -0
  292. aiq/profiler/callbacks/semantic_kernel_callback_handler.py +238 -0
  293. aiq/profiler/callbacks/token_usage_base_model.py +27 -0
  294. aiq/profiler/data_frame_row.py +51 -0
  295. aiq/profiler/data_models.py +24 -0
  296. aiq/profiler/decorators/__init__.py +0 -0
  297. aiq/profiler/decorators/framework_wrapper.py +131 -0
  298. aiq/profiler/decorators/function_tracking.py +254 -0
  299. aiq/profiler/forecasting/__init__.py +0 -0
  300. aiq/profiler/forecasting/config.py +18 -0
  301. aiq/profiler/forecasting/model_trainer.py +75 -0
  302. aiq/profiler/forecasting/models/__init__.py +22 -0
  303. aiq/profiler/forecasting/models/forecasting_base_model.py +40 -0
  304. aiq/profiler/forecasting/models/linear_model.py +196 -0
  305. aiq/profiler/forecasting/models/random_forest_regressor.py +268 -0
  306. aiq/profiler/inference_metrics_model.py +28 -0
  307. aiq/profiler/inference_optimization/__init__.py +0 -0
  308. aiq/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
  309. aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +460 -0
  310. aiq/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +258 -0
  311. aiq/profiler/inference_optimization/data_models.py +386 -0
  312. aiq/profiler/inference_optimization/experimental/__init__.py +0 -0
  313. aiq/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +468 -0
  314. aiq/profiler/inference_optimization/experimental/prefix_span_analysis.py +405 -0
  315. aiq/profiler/inference_optimization/llm_metrics.py +212 -0
  316. aiq/profiler/inference_optimization/prompt_caching.py +163 -0
  317. aiq/profiler/inference_optimization/token_uniqueness.py +107 -0
  318. aiq/profiler/inference_optimization/workflow_runtimes.py +72 -0
  319. aiq/profiler/intermediate_property_adapter.py +102 -0
  320. aiq/profiler/profile_runner.py +473 -0
  321. aiq/profiler/utils.py +184 -0
  322. aiq/registry_handlers/__init__.py +0 -0
  323. aiq/registry_handlers/local/__init__.py +0 -0
  324. aiq/registry_handlers/local/local_handler.py +176 -0
  325. aiq/registry_handlers/local/register_local.py +37 -0
  326. aiq/registry_handlers/metadata_factory.py +60 -0
  327. aiq/registry_handlers/package_utils.py +567 -0
  328. aiq/registry_handlers/pypi/__init__.py +0 -0
  329. aiq/registry_handlers/pypi/pypi_handler.py +251 -0
  330. aiq/registry_handlers/pypi/register_pypi.py +40 -0
  331. aiq/registry_handlers/register.py +21 -0
  332. aiq/registry_handlers/registry_handler_base.py +157 -0
  333. aiq/registry_handlers/rest/__init__.py +0 -0
  334. aiq/registry_handlers/rest/register_rest.py +56 -0
  335. aiq/registry_handlers/rest/rest_handler.py +237 -0
  336. aiq/registry_handlers/schemas/__init__.py +0 -0
  337. aiq/registry_handlers/schemas/headers.py +42 -0
  338. aiq/registry_handlers/schemas/package.py +68 -0
  339. aiq/registry_handlers/schemas/publish.py +63 -0
  340. aiq/registry_handlers/schemas/pull.py +82 -0
  341. aiq/registry_handlers/schemas/remove.py +36 -0
  342. aiq/registry_handlers/schemas/search.py +91 -0
  343. aiq/registry_handlers/schemas/status.py +47 -0
  344. aiq/retriever/__init__.py +0 -0
  345. aiq/retriever/interface.py +37 -0
  346. aiq/retriever/milvus/__init__.py +14 -0
  347. aiq/retriever/milvus/register.py +81 -0
  348. aiq/retriever/milvus/retriever.py +228 -0
  349. aiq/retriever/models.py +74 -0
  350. aiq/retriever/nemo_retriever/__init__.py +14 -0
  351. aiq/retriever/nemo_retriever/register.py +60 -0
  352. aiq/retriever/nemo_retriever/retriever.py +190 -0
  353. aiq/retriever/register.py +22 -0
  354. aiq/runtime/__init__.py +14 -0
  355. aiq/runtime/loader.py +215 -0
  356. aiq/runtime/runner.py +190 -0
  357. aiq/runtime/session.py +158 -0
  358. aiq/runtime/user_metadata.py +130 -0
  359. aiq/settings/__init__.py +0 -0
  360. aiq/settings/global_settings.py +318 -0
  361. aiq/test/.namespace +1 -0
  362. aiq/tool/__init__.py +0 -0
  363. aiq/tool/chat_completion.py +74 -0
  364. aiq/tool/code_execution/README.md +151 -0
  365. aiq/tool/code_execution/__init__.py +0 -0
  366. aiq/tool/code_execution/code_sandbox.py +267 -0
  367. aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
  368. aiq/tool/code_execution/local_sandbox/Dockerfile.sandbox +60 -0
  369. aiq/tool/code_execution/local_sandbox/__init__.py +13 -0
  370. aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +198 -0
  371. aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +6 -0
  372. aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +50 -0
  373. aiq/tool/code_execution/register.py +74 -0
  374. aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
  375. aiq/tool/code_execution/utils.py +100 -0
  376. aiq/tool/datetime_tools.py +42 -0
  377. aiq/tool/document_search.py +141 -0
  378. aiq/tool/github_tools/__init__.py +0 -0
  379. aiq/tool/github_tools/create_github_commit.py +133 -0
  380. aiq/tool/github_tools/create_github_issue.py +87 -0
  381. aiq/tool/github_tools/create_github_pr.py +106 -0
  382. aiq/tool/github_tools/get_github_file.py +106 -0
  383. aiq/tool/github_tools/get_github_issue.py +166 -0
  384. aiq/tool/github_tools/get_github_pr.py +256 -0
  385. aiq/tool/github_tools/update_github_issue.py +100 -0
  386. aiq/tool/mcp/__init__.py +14 -0
  387. aiq/tool/mcp/exceptions.py +142 -0
  388. aiq/tool/mcp/mcp_client.py +255 -0
  389. aiq/tool/mcp/mcp_tool.py +96 -0
  390. aiq/tool/memory_tools/__init__.py +0 -0
  391. aiq/tool/memory_tools/add_memory_tool.py +79 -0
  392. aiq/tool/memory_tools/delete_memory_tool.py +67 -0
  393. aiq/tool/memory_tools/get_memory_tool.py +72 -0
  394. aiq/tool/nvidia_rag.py +95 -0
  395. aiq/tool/register.py +38 -0
  396. aiq/tool/retriever.py +89 -0
  397. aiq/tool/server_tools.py +66 -0
  398. aiq/utils/__init__.py +0 -0
  399. aiq/utils/data_models/__init__.py +0 -0
  400. aiq/utils/data_models/schema_validator.py +58 -0
  401. aiq/utils/debugging_utils.py +43 -0
  402. aiq/utils/dump_distro_mapping.py +32 -0
  403. aiq/utils/exception_handlers/__init__.py +0 -0
  404. aiq/utils/exception_handlers/automatic_retries.py +289 -0
  405. aiq/utils/exception_handlers/mcp.py +211 -0
  406. aiq/utils/exception_handlers/schemas.py +114 -0
  407. aiq/utils/io/__init__.py +0 -0
  408. aiq/utils/io/model_processing.py +28 -0
  409. aiq/utils/io/yaml_tools.py +119 -0
  410. aiq/utils/log_utils.py +37 -0
  411. aiq/utils/metadata_utils.py +74 -0
  412. aiq/utils/optional_imports.py +142 -0
  413. aiq/utils/producer_consumer_queue.py +178 -0
  414. aiq/utils/reactive/__init__.py +0 -0
  415. aiq/utils/reactive/base/__init__.py +0 -0
  416. aiq/utils/reactive/base/observable_base.py +65 -0
  417. aiq/utils/reactive/base/observer_base.py +55 -0
  418. aiq/utils/reactive/base/subject_base.py +79 -0
  419. aiq/utils/reactive/observable.py +59 -0
  420. aiq/utils/reactive/observer.py +76 -0
  421. aiq/utils/reactive/subject.py +131 -0
  422. aiq/utils/reactive/subscription.py +49 -0
  423. aiq/utils/settings/__init__.py +0 -0
  424. aiq/utils/settings/global_settings.py +197 -0
  425. aiq/utils/string_utils.py +38 -0
  426. aiq/utils/type_converter.py +290 -0
  427. aiq/utils/type_utils.py +484 -0
  428. aiq/utils/url_utils.py +27 -0
  429. nvidia_nat-1.2.0rc5.dist-info/METADATA +363 -0
  430. nvidia_nat-1.2.0rc5.dist-info/RECORD +435 -0
  431. nvidia_nat-1.2.0rc5.dist-info/WHEEL +5 -0
  432. nvidia_nat-1.2.0rc5.dist-info/entry_points.txt +20 -0
  433. nvidia_nat-1.2.0rc5.dist-info/licenses/LICENSE-3rd-party.txt +3686 -0
  434. nvidia_nat-1.2.0rc5.dist-info/licenses/LICENSE.md +201 -0
  435. nvidia_nat-1.2.0rc5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,54 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import copy
17
+ import typing
18
+
19
+ from aiq.eval.config import EvaluationRunConfig
20
+ from aiq.eval.config import EvaluationRunOutput
21
+ from aiq.eval.evaluate import EvaluationRun
22
+ from aiq.eval.runners.config import MultiEvaluationRunConfig
23
+
24
+
25
+ class MultiEvaluationRunner:
26
+ """
27
+ Run a multi-evaluation run.
28
+ """
29
+
30
+ def __init__(self, config: MultiEvaluationRunConfig):
31
+ """
32
+ Initialize a multi-evaluation run.
33
+ """
34
+ self.config = config
35
+ self.evaluation_run_outputs: dict[typing.Any, EvaluationRunOutput] = {}
36
+
37
+ async def run_all(self):
38
+ """
39
+ Run all evaluations defined by the overrides.
40
+ """
41
+ for id, config in self.config.configs.items():
42
+ output = await self.run_single_evaluation(id, config)
43
+ self.evaluation_run_outputs[id] = output
44
+
45
+ return self.evaluation_run_outputs
46
+
47
+ async def run_single_evaluation(self, id: typing.Any, config: EvaluationRunConfig) -> EvaluationRunOutput:
48
+ """
49
+ Run a single evaluation and return the output.
50
+ """
51
+ # copy the config in case the caller is using the same config for multiple evaluations
52
+ config_copy = copy.deepcopy(config)
53
+ evaluation_run = EvaluationRun(config_copy)
54
+ return await evaluation_run.run_and_evaluate()
@@ -0,0 +1,52 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import asyncio
17
+ import logging
18
+
19
+ from aiq.builder.context import AIQContext
20
+ from aiq.data_models.intermediate_step import IntermediateStep
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ def pull_intermediate() -> asyncio.Future[list[dict]]:
26
+ """
27
+ Subscribes to the runner's event stream using callbacks.
28
+ Intermediate steps are collected and, when complete, the future is set
29
+ with the list of dumped intermediate steps.
30
+ """
31
+ future = asyncio.Future()
32
+ intermediate_steps = [] # We'll store the dumped steps here.
33
+ context = AIQContext.get()
34
+
35
+ def on_next_cb(item: IntermediateStep):
36
+ # Append each new intermediate step (dumped to dict) to the list.
37
+ intermediate_steps.append(item.model_dump())
38
+
39
+ def on_error_cb(exc: Exception):
40
+ logger.error("Hit on_error: %s", exc)
41
+ if not future.done():
42
+ future.set_exception(exc)
43
+
44
+ def on_complete_cb():
45
+ logger.debug("Completed reading intermediate steps")
46
+ if not future.done():
47
+ future.set_result(intermediate_steps)
48
+
49
+ # Subscribe with our callbacks.
50
+ context.intermediate_step_manager.subscribe(on_next=on_next_cb, on_error=on_error_cb, on_complete=on_complete_cb)
51
+
52
+ return future
File without changes
@@ -0,0 +1,215 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import json
17
+ import logging
18
+ import os
19
+ import shutil
20
+ from pathlib import Path
21
+
22
+ from aiq.data_models.swe_bench_model import SWEBenchInput
23
+ from aiq.data_models.swe_bench_model import SWEBenchOutput
24
+ from aiq.eval.evaluator.evaluator_model import EvalInput
25
+ from aiq.eval.evaluator.evaluator_model import EvalOutput
26
+
27
+ try:
28
+ import swebench.harness.run_evaluation as swebench_eval
29
+ from swebench.harness.constants import MAP_REPO_VERSION_TO_SPECS
30
+ except ImportError as exc:
31
+ raise ImportError("Please install swebench to use this evaluator") from exc
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ class SweBenchEvaluator:
37
+
38
+ def __init__(self, run_id: str, max_workers: int, output_dir: Path):
39
+
40
+ self.run_id = run_id
41
+ self.max_workers = max_workers
42
+ self.output_dir = output_dir
43
+
44
+ # metadata
45
+ self._unsupported_repos = []
46
+ self._swe_bench_inputs = []
47
+ self._swe_bench_outputs = []
48
+ self._model_name_or_path = "no_llm"
49
+
50
+ def get_model_name_from_output(self, workflow_output: list[dict]) -> str | None:
51
+ """Fetch the `model_name_or_path` from the first entry in the list."""
52
+ return workflow_output[0].get("model_name_or_path") if workflow_output else None
53
+
54
+ @staticmethod
55
+ def empty_report_dir(report_dir: Path):
56
+ """Remove the current contents of the report directory."""
57
+ os.makedirs(report_dir, exist_ok=True)
58
+
59
+ # Iterate through all files in the directory and remove them
60
+ for item in report_dir.iterdir():
61
+ if item.is_file(): # Remove files only
62
+ item.unlink()
63
+ elif item.is_dir(): # Remove subdirectories and their contents
64
+ shutil.rmtree(item)
65
+
66
+ @staticmethod
67
+ def move_report_and_logs(swe_bench_report_file: str, logs_dir: str, report_dir: Path):
68
+ """ Temorary function to move the report and logs to the output directory"""
69
+ try:
70
+ shutil.move(swe_bench_report_file, report_dir)
71
+ except Exception as e:
72
+ logger.exception("Error moving report file: %s", e, exc_info=True)
73
+
74
+ try:
75
+ dest_logs_dir = os.path.join(report_dir, 'logs')
76
+ shutil.move(logs_dir, dest_logs_dir)
77
+ except Exception as e:
78
+ logger.exception("Error moving logs directory: %s", e, exc_info=True)
79
+
80
+ def is_repo_supported(self, repo: str, version: str) -> bool:
81
+ """Check if the repo is supported by swebench"""
82
+
83
+ try:
84
+ _ = MAP_REPO_VERSION_TO_SPECS[repo][str(version)]
85
+ except KeyError:
86
+ self._unsupported_repos.append({repo, version})
87
+ return False
88
+ return True
89
+
90
+ def process_eval_input(self, eval_input: EvalInput) -> tuple[Path, Path]:
91
+ """Converts EvalInput into lists of SWEBenchInput and SWEBenchOutput models and applies filtering."""
92
+ # Convert input_obj and output_obj JSON strings to SWEBenchInput and SWEBenchOutput models
93
+ swebench_inputs = []
94
+ swebench_outputs = []
95
+
96
+ for item in eval_input.eval_input_items:
97
+ try:
98
+ swebench_input = SWEBenchInput.model_validate_json(item.input_obj) # Convert input JSON to model
99
+ swebench_input.version = str(swebench_input.version) # Convert version to string
100
+ swebench_inputs.append(swebench_input)
101
+
102
+ if item.output_obj: # Convert output JSON to model if available
103
+ swebench_output = SWEBenchOutput.model_validate_json(item.output_obj)
104
+ swebench_outputs.append(swebench_output)
105
+ # this is bit of a hack to match the swe_bench harness
106
+ self._model_name_or_path = swebench_output.model_name_or_path
107
+
108
+ except Exception as e:
109
+ logger.exception("Failed to parse EvalInputItem %s: %s", item.id, e, exc_info=True)
110
+
111
+ # Filter out repos/version not supported by SWEBench
112
+ supported_inputs = [
113
+ swebench for swebench in swebench_inputs if self.is_repo_supported(swebench.repo, swebench.version)
114
+ ]
115
+
116
+ if not supported_inputs:
117
+ logger.error("No supported instances; nothing to evaluate")
118
+ return None, None
119
+
120
+ if len(supported_inputs) < len(swebench_inputs):
121
+ logger.warning("The following repos are not supported by SWEBench and were skipped:\n %s",
122
+ {s.repo
123
+ for s in swebench_inputs if s not in supported_inputs})
124
+
125
+ # Write SWEBenchInput to file
126
+ workflow_input_file = self.output_dir / "aiq_workflow_input.json"
127
+ workflow_input_file.parent.mkdir(parents=True, exist_ok=True)
128
+ Path(workflow_input_file).write_text(json.dumps([swebench.model_dump() for swebench in supported_inputs],
129
+ indent=2),
130
+ encoding="utf-8")
131
+ logger.info("Workflow input written to %s", workflow_input_file)
132
+
133
+ # Filter SWEBenchOutput to include only instance_ids present in SWEBenchInput
134
+ valid_instance_ids = {swebench.instance_id for swebench in supported_inputs}
135
+ filtered_outputs = [output for output in swebench_outputs if output.instance_id in valid_instance_ids]
136
+
137
+ if not filtered_outputs:
138
+ logger.error("No supported outputs; nothing to evaluate")
139
+ return None, None
140
+
141
+ # Write SWEBenchOutput to file
142
+ workflow_output_file = self.output_dir / "aiq_workflow_output.json"
143
+ Path(workflow_output_file).write_text(json.dumps([output.model_dump() for output in filtered_outputs],
144
+ indent=2),
145
+ encoding="utf-8")
146
+ logger.info("Workflow output written to %s", workflow_output_file)
147
+
148
+ self._swe_bench_inputs = supported_inputs
149
+ self._swe_bench_outputs = filtered_outputs
150
+ return workflow_input_file, workflow_output_file
151
+
152
+ def build_eval_output(self):
153
+ """Builds the EvalOutput object from the SWEBenchOutput models and the average score."""
154
+ # WIP: Build a score based on eval run logs
155
+ for swebench_output in self._swe_bench_outputs:
156
+ yield {"id": swebench_output.instance_id, "score": "-", "reasoning": "-"}
157
+
158
+ @staticmethod
159
+ def compute_score(success_cnt: int, total_cnt: int) -> float:
160
+ if total_cnt == 0:
161
+ return 0.0
162
+ score = success_cnt / total_cnt
163
+ return min(max(score, 0.0), 1.0)
164
+
165
+ async def evaluate(self, eval_input: EvalInput) -> EvalOutput:
166
+ '''Run the swebench evaluation and store the report in the output directory'''
167
+
168
+ # Process the EvalInput
169
+ workflow_input_file, workflow_output_file = self.process_eval_input(eval_input)
170
+ if not workflow_input_file or not workflow_output_file:
171
+ # nothing to evaluate
172
+ return EvalOutput(average_score=0.0, eval_output_items=[])
173
+
174
+ report_dir = self.output_dir / "swe_bench_reports"
175
+ self.empty_report_dir(report_dir)
176
+
177
+ logger.info("Starting swe_bench run %s", self.run_id)
178
+ swebench_eval.main(dataset_name=str(workflow_input_file),
179
+ split="dev",
180
+ instance_ids=[],
181
+ predictions_path=str(workflow_output_file),
182
+ max_workers=self.max_workers,
183
+ force_rebuild=False,
184
+ cache_level="env",
185
+ clean=False,
186
+ open_file_limit=4096,
187
+ run_id=self.run_id,
188
+ timeout=1800,
189
+ namespace=None,
190
+ rewrite_reports=False,
191
+ modal=False,
192
+ instance_image_tag='latest',
193
+ report_dir=str(report_dir))
194
+ logger.info("Completed swe_bench run %s", self.run_id)
195
+
196
+ swe_bench_report_file = f"{self._model_name_or_path}.{self.run_id}.json"
197
+
198
+ # There is a bug in swebench because of which report_dir is being ignored. Copy the report to the output dir
199
+ self.move_report_and_logs(swe_bench_report_file=swe_bench_report_file, logs_dir="logs", report_dir=report_dir)
200
+ logger.info("SWE_bench report and logs written to %s directory", report_dir)
201
+
202
+ # read the swe_bench report file
203
+ report_file = report_dir / swe_bench_report_file
204
+ # if report file is not present, return empty EvalOutput
205
+ avg_score = 0.0
206
+ if report_file.exists():
207
+ with open(report_file, "r", encoding="utf-8") as f:
208
+ report = json.load(f)
209
+ resolved_instances = report.get("resolved_instances", 0)
210
+ total_instances = report.get("total_instances", 0)
211
+ avg_score = self.compute_score(resolved_instances, total_instances)
212
+
213
+ # Build the EvalOutput from self._swe_bench_outputs and avg_score
214
+ eval_output_items = list(self.build_eval_output())
215
+ return EvalOutput(average_score=avg_score, eval_output_items=eval_output_items)
@@ -0,0 +1,36 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from pydantic import Field
17
+
18
+ from aiq.builder.builder import EvalBuilder
19
+ from aiq.builder.evaluator import EvaluatorInfo
20
+ from aiq.cli.register_workflow import register_evaluator
21
+ from aiq.data_models.evaluator import EvaluatorBaseConfig
22
+
23
+
24
+ class SweBenchEvaluatorConfig(EvaluatorBaseConfig, name="swe_bench"):
25
+ """Code patch evaluation for SWE Bench problems."""
26
+
27
+ run_id: str = Field(description="swe-bench test harness run identifier.")
28
+
29
+
30
+ @register_evaluator(config_type=SweBenchEvaluatorConfig)
31
+ async def register_swe_bench_evaluator(config: SweBenchEvaluatorConfig, builder: EvalBuilder):
32
+
33
+ from .evaluate import SweBenchEvaluator
34
+ _evaluator = SweBenchEvaluator(config.run_id, builder.get_max_concurrency(), builder.get_output_dir())
35
+
36
+ yield EvaluatorInfo(config=config, evaluate_fn=_evaluator.evaluate, description="SWE Bench Evaluator")
File without changes
@@ -0,0 +1,75 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import logging
17
+
18
+ from langchain.evaluation import TrajectoryEvalChain
19
+ from langchain_core.language_models import BaseChatModel
20
+ from langchain_core.tools import BaseTool
21
+
22
+ from aiq.eval.evaluator.base_evaluator import BaseEvaluator
23
+ from aiq.eval.evaluator.evaluator_model import EvalInputItem
24
+ from aiq.eval.evaluator.evaluator_model import EvalOutputItem
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class TrajectoryEvaluator(BaseEvaluator):
30
+
31
+ def __init__(
32
+ self,
33
+ llm: BaseChatModel,
34
+ tools: list[BaseTool] | None = None,
35
+ max_concurrency: int = 8,
36
+ ):
37
+ super().__init__(max_concurrency=max_concurrency, tqdm_desc="Evaluating Trajectory")
38
+ self.llm = llm
39
+ self.tools = tools
40
+ # Initialize trajectory evaluation chain
41
+ self.traj_eval_chain = TrajectoryEvalChain.from_llm(llm=self.llm,
42
+ tools=self.tools,
43
+ return_reasoning=True,
44
+ requires_reference=True)
45
+ logger.debug("Trajectory evaluation chain initialized.")
46
+
47
+ async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:
48
+ """
49
+ Evaluate a single EvalInputItem and return an EvalOutputItem.
50
+ """
51
+ from aiq.data_models.intermediate_step import IntermediateStepType
52
+ from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
53
+
54
+ intermediate_step_adapter = IntermediateStepAdapter()
55
+ event_filter = [IntermediateStepType.LLM_END, IntermediateStepType.TOOL_END]
56
+
57
+ question = item.input_obj
58
+ generated_answer = item.output_obj
59
+ agent_trajectory = intermediate_step_adapter.get_agent_actions(item.trajectory, event_filter)
60
+
61
+ try:
62
+ eval_result = await self.traj_eval_chain.aevaluate_agent_trajectory(
63
+ input=question,
64
+ agent_trajectory=agent_trajectory,
65
+ prediction=generated_answer,
66
+ )
67
+ except Exception as e:
68
+ logger.exception("Error evaluating trajectory for question: %s, Error: %s", question, e, exc_info=True)
69
+ return EvalOutputItem(id=item.id, score=0.0, reasoning=f"Error evaluating trajectory: {e}")
70
+
71
+ reasoning = {
72
+ "reasoning": eval_result["reasoning"],
73
+ "trajectory": [(action.model_dump(), output) for (action, output) in agent_trajectory]
74
+ }
75
+ return EvalOutputItem(id=item.id, score=eval_result["score"], reasoning=reasoning)
@@ -0,0 +1,40 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from pydantic import Field
17
+
18
+ from aiq.builder.builder import EvalBuilder
19
+ from aiq.builder.evaluator import EvaluatorInfo
20
+ from aiq.cli.register_workflow import register_evaluator
21
+ from aiq.data_models.evaluator import EvaluatorBaseConfig
22
+
23
+
24
+ class TrajectoryEvaluatorConfig(EvaluatorBaseConfig, name="trajectory"):
25
+ """Agent Trajectory Evaluation."""
26
+
27
+ llm_name: str = Field(description="LLM as a judge.")
28
+
29
+
30
+ @register_evaluator(config_type=TrajectoryEvaluatorConfig)
31
+ async def register_trajectory_evaluator(config: TrajectoryEvaluatorConfig, builder: EvalBuilder):
32
+ from aiq.builder.framework_enum import LLMFrameworkEnum
33
+
34
+ from .evaluate import TrajectoryEvaluator
35
+ llm = await builder.get_llm(config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
36
+ tools = builder.get_all_tools(wrapper_type=LLMFrameworkEnum.LANGCHAIN)
37
+
38
+ _evaluator = TrajectoryEvaluator(llm, tools, builder.get_max_concurrency())
39
+
40
+ yield EvaluatorInfo(config=config, evaluate_fn=_evaluator.evaluate, description="Trajectory Evaluator")
File without changes