nvidia-nat 1.2.0rc5__py3-none-any.whl → 1.2.0rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (441) hide show
  1. {aiq → nat}/agent/react_agent/agent.py +12 -12
  2. {aiq → nat}/agent/react_agent/register.py +20 -20
  3. {aiq → nat}/agent/reasoning_agent/reasoning_agent.py +14 -14
  4. {aiq → nat}/agent/rewoo_agent/agent.py +7 -7
  5. {aiq → nat}/agent/rewoo_agent/prompt.py +11 -12
  6. {aiq → nat}/agent/rewoo_agent/register.py +47 -49
  7. {aiq → nat}/agent/tool_calling_agent/agent.py +4 -4
  8. {aiq → nat}/agent/tool_calling_agent/register.py +8 -8
  9. {aiq → nat}/authentication/api_key/api_key_auth_provider.py +6 -6
  10. {aiq → nat}/authentication/api_key/api_key_auth_provider_config.py +5 -5
  11. {aiq → nat}/authentication/api_key/register.py +4 -4
  12. {aiq → nat}/authentication/http_basic_auth/http_basic_auth_provider.py +10 -10
  13. {aiq → nat}/authentication/http_basic_auth/register.py +4 -4
  14. {aiq → nat}/authentication/interfaces.py +6 -6
  15. {aiq → nat}/authentication/oauth2/oauth2_auth_code_flow_provider.py +11 -11
  16. {aiq → nat}/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
  17. {aiq → nat}/authentication/oauth2/register.py +4 -4
  18. {aiq → nat}/authentication/register.py +3 -3
  19. {aiq → nat}/builder/builder.py +30 -30
  20. {aiq → nat}/builder/component_utils.py +23 -23
  21. {aiq → nat}/builder/context.py +35 -29
  22. {aiq → nat}/builder/embedder.py +1 -1
  23. {aiq → nat}/builder/eval_builder.py +13 -13
  24. {aiq → nat}/builder/evaluator.py +3 -3
  25. {aiq → nat}/builder/front_end.py +11 -11
  26. {aiq → nat}/builder/function.py +8 -8
  27. {aiq → nat}/builder/function_base.py +6 -6
  28. {aiq → nat}/builder/function_info.py +3 -3
  29. {aiq → nat}/builder/intermediate_step_manager.py +13 -13
  30. {aiq → nat}/builder/llm.py +1 -1
  31. {aiq → nat}/builder/retriever.py +1 -1
  32. {aiq → nat}/builder/user_interaction_manager.py +14 -10
  33. {aiq → nat}/builder/workflow.py +25 -25
  34. {aiq → nat}/builder/workflow_builder.py +86 -86
  35. {aiq → nat}/cli/cli_utils/config_override.py +2 -2
  36. {aiq → nat}/cli/cli_utils/validation.py +4 -4
  37. {aiq → nat}/cli/commands/configure/channel/add.py +2 -2
  38. {aiq → nat}/cli/commands/configure/channel/channel.py +4 -6
  39. {aiq → nat}/cli/commands/configure/channel/remove.py +2 -2
  40. {aiq → nat}/cli/commands/configure/channel/update.py +2 -2
  41. {aiq → nat}/cli/commands/configure/configure.py +3 -3
  42. {aiq → nat}/cli/commands/evaluate.py +3 -3
  43. {aiq → nat}/cli/commands/info/info.py +5 -7
  44. {aiq → nat}/cli/commands/info/list_channels.py +1 -1
  45. {aiq → nat}/cli/commands/info/list_components.py +14 -14
  46. {aiq → nat}/cli/commands/info/list_mcp.py +106 -15
  47. {aiq → nat}/cli/commands/registry/publish.py +9 -9
  48. {aiq → nat}/cli/commands/registry/pull.py +10 -10
  49. {aiq → nat}/cli/commands/registry/registry.py +5 -7
  50. {aiq → nat}/cli/commands/registry/remove.py +8 -8
  51. {aiq → nat}/cli/commands/registry/search.py +15 -15
  52. {aiq → nat}/cli/commands/sizing/calc.py +3 -3
  53. {aiq → nat}/cli/commands/start.py +15 -15
  54. {aiq → nat}/cli/commands/uninstall.py +5 -5
  55. {aiq → nat}/cli/commands/validate.py +1 -1
  56. {aiq → nat}/cli/commands/workflow/templates/pyproject.toml.j2 +4 -4
  57. {aiq → nat}/cli/commands/workflow/templates/workflow.py.j2 +4 -4
  58. {aiq → nat}/cli/commands/workflow/workflow.py +3 -3
  59. {aiq → nat}/cli/commands/workflow/workflow_commands.py +15 -11
  60. {aiq → nat}/cli/entrypoint.py +6 -6
  61. {aiq → nat}/cli/main.py +15 -2
  62. {aiq → nat}/cli/register_workflow.py +70 -70
  63. {aiq → nat}/cli/type_registry.py +82 -82
  64. {aiq → nat}/data_models/api_server.py +121 -99
  65. {aiq → nat}/data_models/authentication.py +2 -2
  66. {aiq → nat}/data_models/component.py +5 -1
  67. {aiq → nat}/data_models/component_ref.py +12 -12
  68. {aiq → nat}/data_models/config.py +17 -13
  69. {aiq → nat}/data_models/dataset_handler.py +58 -12
  70. {aiq → nat}/data_models/discovery_metadata.py +36 -66
  71. {aiq → nat}/data_models/evaluate.py +9 -9
  72. {aiq → nat}/data_models/intermediate_step.py +7 -7
  73. {aiq → nat}/data_models/retriever.py +2 -2
  74. {aiq → nat}/data_models/span.py +10 -7
  75. {aiq → nat}/data_models/step_adaptor.py +1 -1
  76. {aiq → nat}/data_models/telemetry_exporter.py +2 -2
  77. {aiq → nat}/embedder/nim_embedder.py +5 -5
  78. {aiq → nat}/embedder/openai_embedder.py +5 -5
  79. {aiq/retriever → nat/embedder}/register.py +2 -2
  80. {aiq → nat}/eval/config.py +4 -4
  81. {aiq → nat}/eval/dataset_handler/dataset_downloader.py +1 -1
  82. {aiq → nat}/eval/dataset_handler/dataset_filter.py +1 -1
  83. {aiq → nat}/eval/dataset_handler/dataset_handler.py +127 -14
  84. {aiq → nat}/eval/evaluate.py +38 -34
  85. {aiq → nat}/eval/evaluator/base_evaluator.py +9 -5
  86. {aiq → nat}/eval/evaluator/evaluator_model.py +4 -4
  87. {aiq → nat}/eval/intermediate_step_adapter.py +2 -2
  88. {aiq → nat}/eval/rag_evaluator/evaluate.py +8 -8
  89. {aiq → nat}/eval/rag_evaluator/register.py +7 -7
  90. {aiq → nat}/eval/remote_workflow.py +8 -8
  91. {aiq → nat}/eval/runners/config.py +2 -2
  92. {aiq → nat}/eval/runners/multi_eval_runner.py +4 -4
  93. {aiq → nat}/eval/runtime_event_subscriber.py +3 -3
  94. {aiq → nat}/eval/swe_bench_evaluator/evaluate.py +6 -6
  95. {aiq → nat}/eval/swe_bench_evaluator/register.py +4 -4
  96. {aiq → nat}/eval/trajectory_evaluator/evaluate.py +5 -5
  97. {aiq → nat}/eval/trajectory_evaluator/register.py +5 -5
  98. {aiq → nat}/eval/tunable_rag_evaluator/evaluate.py +3 -3
  99. {aiq → nat}/eval/tunable_rag_evaluator/register.py +6 -6
  100. {aiq → nat}/eval/utils/output_uploader.py +1 -1
  101. {aiq → nat}/eval/utils/weave_eval.py +6 -6
  102. {aiq → nat}/experimental/decorators/experimental_warning_decorator.py +6 -2
  103. {aiq → nat}/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +10 -10
  104. {aiq → nat}/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +10 -10
  105. {aiq → nat}/experimental/test_time_compute/editing/motivation_aware_summarization.py +10 -10
  106. {aiq → nat}/experimental/test_time_compute/functions/execute_score_select_function.py +10 -10
  107. {aiq → nat}/experimental/test_time_compute/functions/plan_select_execute_function.py +17 -17
  108. aiq/experimental/test_time_compute/functions/its_tool_orchestration_function.py → nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +12 -12
  109. aiq/experimental/test_time_compute/functions/its_tool_wrapper_function.py → nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +10 -10
  110. {aiq → nat}/experimental/test_time_compute/models/editor_config.py +2 -2
  111. {aiq → nat}/experimental/test_time_compute/models/scoring_config.py +2 -2
  112. {aiq → nat}/experimental/test_time_compute/models/search_config.py +2 -2
  113. {aiq → nat}/experimental/test_time_compute/models/selection_config.py +2 -2
  114. {aiq → nat}/experimental/test_time_compute/models/strategy_base.py +4 -4
  115. {aiq → nat}/experimental/test_time_compute/register.py +2 -2
  116. {aiq → nat}/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +11 -11
  117. {aiq → nat}/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +11 -11
  118. {aiq → nat}/experimental/test_time_compute/scoring/motivation_aware_scorer.py +10 -10
  119. {aiq → nat}/experimental/test_time_compute/search/multi_llm_planner.py +10 -10
  120. {aiq → nat}/experimental/test_time_compute/search/multi_query_retrieval_search.py +10 -10
  121. {aiq → nat}/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +11 -11
  122. {aiq → nat}/experimental/test_time_compute/selection/best_of_n_selector.py +7 -7
  123. {aiq → nat}/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +11 -11
  124. {aiq → nat}/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +11 -11
  125. {aiq → nat}/experimental/test_time_compute/selection/llm_based_plan_selector.py +11 -11
  126. {aiq → nat}/experimental/test_time_compute/selection/threshold_selector.py +7 -7
  127. {aiq → nat}/front_ends/console/authentication_flow_handler.py +6 -6
  128. {aiq → nat}/front_ends/console/console_front_end_config.py +2 -2
  129. {aiq → nat}/front_ends/console/console_front_end_plugin.py +9 -9
  130. {aiq → nat}/front_ends/console/register.py +5 -5
  131. {aiq → nat}/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +4 -4
  132. {aiq → nat}/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +6 -6
  133. {aiq → nat}/front_ends/fastapi/fastapi_front_end_config.py +22 -15
  134. {aiq → nat}/front_ends/fastapi/fastapi_front_end_plugin.py +10 -10
  135. {aiq → nat}/front_ends/fastapi/fastapi_front_end_plugin_worker.py +110 -115
  136. {aiq → nat}/front_ends/fastapi/intermediate_steps_subscriber.py +10 -10
  137. {aiq → nat}/front_ends/fastapi/main.py +8 -8
  138. {aiq → nat}/front_ends/fastapi/message_handler.py +58 -36
  139. {aiq → nat}/front_ends/fastapi/message_validator.py +55 -48
  140. {aiq → nat}/front_ends/fastapi/register.py +5 -5
  141. {aiq → nat}/front_ends/fastapi/response_helpers.py +26 -26
  142. {aiq → nat}/front_ends/fastapi/step_adaptor.py +35 -37
  143. {aiq → nat}/front_ends/mcp/mcp_front_end_config.py +12 -8
  144. nat/front_ends/mcp/mcp_front_end_plugin.py +81 -0
  145. nat/front_ends/mcp/mcp_front_end_plugin_worker.py +143 -0
  146. {aiq → nat}/front_ends/mcp/register.py +5 -5
  147. {aiq → nat}/front_ends/mcp/tool_converter.py +20 -21
  148. {aiq → nat}/front_ends/simple_base/simple_front_end_plugin_base.py +6 -6
  149. {aiq → nat}/llm/aws_bedrock_llm.py +5 -5
  150. {aiq → nat}/llm/nim_llm.py +5 -5
  151. {aiq → nat}/llm/openai_llm.py +5 -5
  152. {aiq → nat}/memory/__init__.py +2 -2
  153. nat/meta/pypi.md +58 -0
  154. {aiq → nat}/object_store/__init__.py +2 -2
  155. {aiq → nat}/object_store/in_memory_object_store.py +6 -6
  156. {aiq → nat}/observability/exporter/base_exporter.py +9 -9
  157. {aiq → nat}/observability/exporter/exporter.py +1 -1
  158. {aiq → nat}/observability/exporter/file_exporter.py +6 -6
  159. {aiq → nat}/observability/exporter/processing_exporter.py +9 -9
  160. {aiq → nat}/observability/exporter/raw_exporter.py +4 -4
  161. {aiq → nat}/observability/exporter/span_exporter.py +57 -34
  162. {aiq → nat}/observability/exporter_manager.py +6 -6
  163. {aiq → nat}/observability/mixin/file_mixin.py +2 -2
  164. {aiq → nat}/observability/processor/batching_processor.py +1 -1
  165. {aiq → nat}/observability/processor/callback_processor.py +1 -1
  166. {aiq → nat}/observability/processor/intermediate_step_serializer.py +4 -4
  167. {aiq → nat}/observability/processor/processor.py +1 -1
  168. {aiq → nat}/observability/register.py +7 -7
  169. {aiq → nat}/profiler/calc/calc_runner.py +18 -18
  170. {aiq → nat}/profiler/calc/calculations.py +3 -3
  171. {aiq → nat}/profiler/calc/plot.py +2 -2
  172. {aiq → nat}/profiler/callbacks/agno_callback_handler.py +14 -14
  173. {aiq → nat}/profiler/callbacks/langchain_callback_handler.py +11 -11
  174. {aiq → nat}/profiler/callbacks/llama_index_callback_handler.py +12 -12
  175. {aiq → nat}/profiler/callbacks/semantic_kernel_callback_handler.py +11 -11
  176. {aiq → nat}/profiler/data_models.py +2 -2
  177. {aiq → nat}/profiler/decorators/framework_wrapper.py +6 -6
  178. {aiq → nat}/profiler/decorators/function_tracking.py +10 -10
  179. {aiq → nat}/profiler/forecasting/model_trainer.py +5 -5
  180. {aiq → nat}/profiler/forecasting/models/linear_model.py +5 -4
  181. {aiq → nat}/profiler/forecasting/models/random_forest_regressor.py +5 -4
  182. {aiq → nat}/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +7 -7
  183. {aiq → nat}/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +4 -4
  184. {aiq → nat}/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +6 -6
  185. {aiq → nat}/profiler/inference_optimization/experimental/prefix_span_analysis.py +6 -6
  186. {aiq → nat}/profiler/inference_optimization/llm_metrics.py +2 -2
  187. {aiq → nat}/profiler/inference_optimization/prompt_caching.py +5 -5
  188. {aiq → nat}/profiler/inference_optimization/token_uniqueness.py +4 -4
  189. {aiq → nat}/profiler/inference_optimization/workflow_runtimes.py +3 -3
  190. {aiq → nat}/profiler/intermediate_property_adapter.py +3 -3
  191. {aiq → nat}/profiler/profile_runner.py +17 -17
  192. {aiq → nat}/profiler/utils.py +4 -4
  193. {aiq → nat}/registry_handlers/local/local_handler.py +19 -19
  194. {aiq → nat}/registry_handlers/local/register_local.py +4 -4
  195. {aiq → nat}/registry_handlers/metadata_factory.py +7 -7
  196. {aiq → nat}/registry_handlers/package_utils.py +37 -33
  197. {aiq → nat}/registry_handlers/pypi/pypi_handler.py +21 -21
  198. {aiq → nat}/registry_handlers/pypi/register_pypi.py +6 -6
  199. {aiq → nat}/registry_handlers/registry_handler_base.py +21 -21
  200. {aiq → nat}/registry_handlers/rest/register_rest.py +7 -7
  201. {aiq → nat}/registry_handlers/rest/rest_handler.py +19 -19
  202. {aiq → nat}/registry_handlers/schemas/package.py +3 -3
  203. {aiq → nat}/registry_handlers/schemas/publish.py +17 -12
  204. {aiq → nat}/registry_handlers/schemas/pull.py +6 -6
  205. {aiq → nat}/registry_handlers/schemas/remove.py +2 -2
  206. {aiq → nat}/registry_handlers/schemas/search.py +11 -11
  207. {aiq → nat}/retriever/interface.py +6 -2
  208. {aiq → nat}/retriever/milvus/register.py +7 -7
  209. {aiq → nat}/retriever/milvus/retriever.py +8 -8
  210. {aiq → nat}/retriever/models.py +10 -7
  211. {aiq → nat}/retriever/nemo_retriever/register.py +6 -6
  212. {aiq → nat}/retriever/nemo_retriever/retriever.py +10 -10
  213. {aiq/embedder → nat/retriever}/register.py +2 -4
  214. {aiq → nat}/runtime/loader.py +38 -33
  215. {aiq → nat}/runtime/runner.py +30 -25
  216. {aiq → nat}/runtime/session.py +19 -15
  217. {aiq → nat}/runtime/user_metadata.py +1 -1
  218. {aiq → nat}/settings/global_settings.py +11 -11
  219. {aiq → nat}/tool/chat_completion.py +6 -6
  220. {aiq → nat}/tool/code_execution/README.md +2 -2
  221. {aiq → nat}/tool/code_execution/code_sandbox.py +1 -1
  222. {aiq → nat}/tool/code_execution/register.py +5 -5
  223. {aiq → nat}/tool/code_execution/test_code_execution_sandbox.py +1 -1
  224. {aiq → nat}/tool/datetime_tools.py +4 -4
  225. {aiq → nat}/tool/document_search.py +6 -6
  226. {aiq → nat}/tool/github_tools/create_github_commit.py +4 -4
  227. {aiq → nat}/tool/github_tools/create_github_issue.py +4 -4
  228. {aiq → nat}/tool/github_tools/create_github_pr.py +4 -4
  229. {aiq → nat}/tool/github_tools/get_github_file.py +4 -4
  230. {aiq → nat}/tool/github_tools/get_github_issue.py +4 -4
  231. {aiq → nat}/tool/github_tools/get_github_pr.py +4 -4
  232. {aiq → nat}/tool/github_tools/update_github_issue.py +4 -4
  233. {aiq → nat}/tool/mcp/exceptions.py +1 -1
  234. {aiq → nat}/tool/mcp/mcp_client.py +2 -2
  235. {aiq → nat}/tool/mcp/mcp_tool.py +7 -7
  236. {aiq → nat}/tool/memory_tools/add_memory_tool.py +6 -6
  237. {aiq → nat}/tool/memory_tools/delete_memory_tool.py +6 -6
  238. {aiq → nat}/tool/memory_tools/get_memory_tool.py +6 -6
  239. {aiq → nat}/tool/nvidia_rag.py +4 -4
  240. {aiq → nat}/tool/retriever.py +20 -15
  241. {aiq → nat}/tool/server_tools.py +16 -16
  242. {aiq → nat}/utils/dump_distro_mapping.py +2 -2
  243. {aiq → nat}/utils/exception_handlers/mcp.py +8 -8
  244. {aiq → nat}/utils/io/yaml_tools.py +1 -1
  245. {aiq → nat}/utils/metadata_utils.py +2 -2
  246. {aiq → nat}/utils/reactive/base/observable_base.py +2 -2
  247. {aiq → nat}/utils/reactive/base/subject_base.py +1 -1
  248. {aiq → nat}/utils/reactive/observable.py +5 -5
  249. {aiq → nat}/utils/reactive/observer.py +1 -1
  250. {aiq → nat}/utils/reactive/subject.py +4 -4
  251. {aiq → nat}/utils/reactive/subscription.py +1 -1
  252. {aiq → nat}/utils/settings/global_settings.py +4 -4
  253. {aiq → nat}/utils/type_converter.py +1 -1
  254. {nvidia_nat-1.2.0rc5.dist-info → nvidia_nat-1.2.0rc7.dist-info}/METADATA +37 -37
  255. nvidia_nat-1.2.0rc7.dist-info/RECORD +434 -0
  256. nvidia_nat-1.2.0rc7.dist-info/entry_points.txt +21 -0
  257. nvidia_nat-1.2.0rc7.dist-info/top_level.txt +1 -0
  258. aiq/embedder/langchain_client.py +0 -41
  259. aiq/front_ends/mcp/mcp_front_end_plugin.py +0 -93
  260. aiq/meta/module_to_distro.json +0 -3
  261. aiq/meta/pypi.md +0 -58
  262. nvidia_nat-1.2.0rc5.dist-info/RECORD +0 -435
  263. nvidia_nat-1.2.0rc5.dist-info/entry_points.txt +0 -20
  264. nvidia_nat-1.2.0rc5.dist-info/top_level.txt +0 -1
  265. {aiq → nat}/agent/__init__.py +0 -0
  266. {aiq → nat}/agent/base.py +0 -0
  267. {aiq → nat}/agent/dual_node.py +0 -0
  268. {aiq → nat}/agent/react_agent/__init__.py +0 -0
  269. {aiq → nat}/agent/react_agent/output_parser.py +0 -0
  270. {aiq → nat}/agent/react_agent/prompt.py +0 -0
  271. {aiq → nat}/agent/reasoning_agent/__init__.py +0 -0
  272. {aiq → nat}/agent/register.py +0 -0
  273. {aiq → nat}/agent/rewoo_agent/__init__.py +0 -0
  274. {aiq → nat}/agent/tool_calling_agent/__init__.py +0 -0
  275. {aiq → nat}/authentication/__init__.py +0 -0
  276. {aiq → nat}/authentication/api_key/__init__.py +0 -0
  277. {aiq → nat}/authentication/exceptions/__init__.py +0 -0
  278. {aiq → nat}/authentication/exceptions/api_key_exceptions.py +0 -0
  279. {aiq → nat}/authentication/http_basic_auth/__init__.py +0 -0
  280. {aiq → nat}/authentication/oauth2/__init__.py +0 -0
  281. {aiq → nat}/builder/__init__.py +0 -0
  282. {aiq → nat}/builder/framework_enum.py +0 -0
  283. {aiq → nat}/cli/__init__.py +0 -0
  284. {aiq → nat}/cli/cli_utils/__init__.py +0 -0
  285. {aiq → nat}/cli/commands/__init__.py +0 -0
  286. {aiq → nat}/cli/commands/configure/__init__.py +0 -0
  287. {aiq → nat}/cli/commands/configure/channel/__init__.py +0 -0
  288. {aiq → nat}/cli/commands/info/__init__.py +0 -0
  289. {aiq → nat}/cli/commands/registry/__init__.py +0 -0
  290. {aiq → nat}/cli/commands/sizing/__init__.py +0 -0
  291. {aiq → nat}/cli/commands/sizing/sizing.py +0 -0
  292. {aiq → nat}/cli/commands/workflow/__init__.py +0 -0
  293. {aiq → nat}/cli/commands/workflow/templates/__init__.py.j2 +0 -0
  294. {aiq → nat}/cli/commands/workflow/templates/config.yml.j2 +0 -0
  295. {aiq → nat}/cli/commands/workflow/templates/register.py.j2 +0 -0
  296. {aiq → nat}/data_models/__init__.py +0 -0
  297. {aiq → nat}/data_models/common.py +0 -0
  298. {aiq → nat}/data_models/embedder.py +0 -0
  299. {aiq → nat}/data_models/evaluator.py +0 -0
  300. {aiq → nat}/data_models/front_end.py +0 -0
  301. {aiq → nat}/data_models/function.py +0 -0
  302. {aiq → nat}/data_models/function_dependencies.py +0 -0
  303. {aiq → nat}/data_models/interactive.py +0 -0
  304. {aiq → nat}/data_models/invocation_node.py +0 -0
  305. {aiq → nat}/data_models/llm.py +0 -0
  306. {aiq → nat}/data_models/logging.py +0 -0
  307. {aiq → nat}/data_models/memory.py +0 -0
  308. {aiq → nat}/data_models/object_store.py +0 -0
  309. {aiq → nat}/data_models/profiler.py +0 -0
  310. {aiq → nat}/data_models/registry_handler.py +0 -0
  311. {aiq → nat}/data_models/retry_mixin.py +0 -0
  312. {aiq → nat}/data_models/streaming.py +0 -0
  313. {aiq → nat}/data_models/swe_bench_model.py +0 -0
  314. {aiq → nat}/data_models/ttc_strategy.py +0 -0
  315. {aiq → nat}/embedder/__init__.py +0 -0
  316. {aiq → nat}/eval/__init__.py +0 -0
  317. {aiq → nat}/eval/dataset_handler/__init__.py +0 -0
  318. {aiq → nat}/eval/evaluator/__init__.py +0 -0
  319. {aiq → nat}/eval/rag_evaluator/__init__.py +0 -0
  320. {aiq → nat}/eval/register.py +0 -0
  321. {aiq → nat}/eval/runners/__init__.py +0 -0
  322. {aiq → nat}/eval/swe_bench_evaluator/__init__.py +0 -0
  323. {aiq → nat}/eval/trajectory_evaluator/__init__.py +0 -0
  324. {aiq → nat}/eval/tunable_rag_evaluator/__init__.py +0 -0
  325. {aiq → nat}/eval/usage_stats.py +0 -0
  326. {aiq → nat}/eval/utils/__init__.py +0 -0
  327. {aiq → nat}/eval/utils/tqdm_position_registry.py +0 -0
  328. {aiq → nat}/experimental/__init__.py +0 -0
  329. {aiq → nat}/experimental/decorators/__init__.py +0 -0
  330. {aiq → nat}/experimental/test_time_compute/__init__.py +0 -0
  331. {aiq → nat}/experimental/test_time_compute/editing/__init__.py +0 -0
  332. {aiq → nat}/experimental/test_time_compute/functions/__init__.py +0 -0
  333. {aiq → nat}/experimental/test_time_compute/models/__init__.py +0 -0
  334. {aiq → nat}/experimental/test_time_compute/models/stage_enums.py +0 -0
  335. {aiq → nat}/experimental/test_time_compute/models/tool_use_config.py +0 -0
  336. {aiq → nat}/experimental/test_time_compute/models/ttc_item.py +0 -0
  337. {aiq → nat}/experimental/test_time_compute/scoring/__init__.py +0 -0
  338. {aiq → nat}/experimental/test_time_compute/search/__init__.py +0 -0
  339. {aiq → nat}/experimental/test_time_compute/selection/__init__.py +0 -0
  340. {aiq → nat}/front_ends/__init__.py +0 -0
  341. {aiq → nat}/front_ends/console/__init__.py +0 -0
  342. {aiq → nat}/front_ends/cron/__init__.py +0 -0
  343. {aiq → nat}/front_ends/fastapi/__init__.py +0 -0
  344. {aiq → nat}/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
  345. {aiq → nat}/front_ends/fastapi/fastapi_front_end_controller.py +0 -0
  346. {aiq → nat}/front_ends/fastapi/html_snippets/__init__.py +0 -0
  347. {aiq → nat}/front_ends/fastapi/html_snippets/auth_code_grant_success.py +0 -0
  348. {aiq → nat}/front_ends/fastapi/job_store.py +0 -0
  349. {aiq → nat}/front_ends/mcp/__init__.py +0 -0
  350. {aiq → nat}/front_ends/register.py +0 -0
  351. {aiq → nat}/front_ends/simple_base/__init__.py +0 -0
  352. {aiq → nat}/llm/__init__.py +0 -0
  353. {aiq → nat}/llm/register.py +0 -0
  354. {aiq → nat}/llm/utils/__init__.py +0 -0
  355. {aiq → nat}/llm/utils/env_config_value.py +0 -0
  356. {aiq → nat}/llm/utils/error.py +0 -0
  357. {aiq → nat}/memory/interfaces.py +0 -0
  358. {aiq → nat}/memory/models.py +0 -0
  359. {aiq → nat}/object_store/interfaces.py +0 -0
  360. {aiq → nat}/object_store/models.py +0 -0
  361. {aiq → nat}/object_store/register.py +0 -0
  362. {aiq → nat}/observability/__init__.py +0 -0
  363. {aiq → nat}/observability/exporter/__init__.py +0 -0
  364. {aiq → nat}/observability/mixin/__init__.py +0 -0
  365. {aiq → nat}/observability/mixin/batch_config_mixin.py +0 -0
  366. {aiq → nat}/observability/mixin/collector_config_mixin.py +0 -0
  367. {aiq → nat}/observability/mixin/file_mode.py +0 -0
  368. {aiq → nat}/observability/mixin/resource_conflict_mixin.py +0 -0
  369. {aiq → nat}/observability/mixin/serialize_mixin.py +0 -0
  370. {aiq → nat}/observability/mixin/type_introspection_mixin.py +0 -0
  371. {aiq → nat}/observability/processor/__init__.py +0 -0
  372. {aiq → nat}/observability/utils/__init__.py +0 -0
  373. {aiq → nat}/observability/utils/dict_utils.py +0 -0
  374. {aiq → nat}/observability/utils/time_utils.py +0 -0
  375. {aiq → nat}/plugins/.namespace +0 -0
  376. {aiq → nat}/profiler/__init__.py +0 -0
  377. {aiq → nat}/profiler/calc/__init__.py +0 -0
  378. {aiq → nat}/profiler/calc/data_models.py +0 -0
  379. {aiq → nat}/profiler/callbacks/__init__.py +0 -0
  380. {aiq → nat}/profiler/callbacks/base_callback_class.py +0 -0
  381. {aiq → nat}/profiler/callbacks/token_usage_base_model.py +0 -0
  382. {aiq → nat}/profiler/data_frame_row.py +0 -0
  383. {aiq → nat}/profiler/decorators/__init__.py +0 -0
  384. {aiq → nat}/profiler/forecasting/__init__.py +0 -0
  385. {aiq → nat}/profiler/forecasting/config.py +0 -0
  386. {aiq → nat}/profiler/forecasting/models/__init__.py +0 -0
  387. {aiq → nat}/profiler/forecasting/models/forecasting_base_model.py +0 -0
  388. {aiq → nat}/profiler/inference_metrics_model.py +0 -0
  389. {aiq → nat}/profiler/inference_optimization/__init__.py +0 -0
  390. {aiq → nat}/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
  391. {aiq → nat}/profiler/inference_optimization/data_models.py +0 -0
  392. {aiq → nat}/profiler/inference_optimization/experimental/__init__.py +0 -0
  393. {aiq → nat}/registry_handlers/__init__.py +0 -0
  394. {aiq → nat}/registry_handlers/local/__init__.py +0 -0
  395. {aiq → nat}/registry_handlers/pypi/__init__.py +0 -0
  396. {aiq → nat}/registry_handlers/register.py +0 -0
  397. {aiq → nat}/registry_handlers/rest/__init__.py +0 -0
  398. {aiq → nat}/registry_handlers/schemas/__init__.py +0 -0
  399. {aiq → nat}/registry_handlers/schemas/headers.py +0 -0
  400. {aiq → nat}/registry_handlers/schemas/status.py +0 -0
  401. {aiq → nat}/retriever/__init__.py +0 -0
  402. {aiq → nat}/retriever/milvus/__init__.py +0 -0
  403. {aiq → nat}/retriever/nemo_retriever/__init__.py +0 -0
  404. {aiq → nat}/runtime/__init__.py +0 -0
  405. {aiq → nat}/settings/__init__.py +0 -0
  406. {aiq → nat}/test/.namespace +0 -0
  407. {aiq → nat}/tool/__init__.py +0 -0
  408. {aiq → nat}/tool/code_execution/__init__.py +0 -0
  409. {aiq → nat}/tool/code_execution/local_sandbox/.gitignore +0 -0
  410. {aiq → nat}/tool/code_execution/local_sandbox/Dockerfile.sandbox +0 -0
  411. {aiq → nat}/tool/code_execution/local_sandbox/__init__.py +0 -0
  412. {aiq → nat}/tool/code_execution/local_sandbox/local_sandbox_server.py +0 -0
  413. {aiq → nat}/tool/code_execution/local_sandbox/sandbox.requirements.txt +0 -0
  414. {aiq → nat}/tool/code_execution/local_sandbox/start_local_sandbox.sh +0 -0
  415. {aiq → nat}/tool/code_execution/utils.py +0 -0
  416. {aiq → nat}/tool/github_tools/__init__.py +0 -0
  417. {aiq → nat}/tool/mcp/__init__.py +0 -0
  418. {aiq → nat}/tool/memory_tools/__init__.py +0 -0
  419. {aiq → nat}/tool/register.py +0 -0
  420. {aiq → nat}/utils/__init__.py +0 -0
  421. {aiq → nat}/utils/data_models/__init__.py +0 -0
  422. {aiq → nat}/utils/data_models/schema_validator.py +0 -0
  423. {aiq → nat}/utils/debugging_utils.py +0 -0
  424. {aiq → nat}/utils/exception_handlers/__init__.py +0 -0
  425. {aiq → nat}/utils/exception_handlers/automatic_retries.py +0 -0
  426. {aiq → nat}/utils/exception_handlers/schemas.py +0 -0
  427. {aiq → nat}/utils/io/__init__.py +0 -0
  428. {aiq → nat}/utils/io/model_processing.py +0 -0
  429. {aiq → nat}/utils/log_utils.py +0 -0
  430. {aiq → nat}/utils/optional_imports.py +0 -0
  431. {aiq → nat}/utils/producer_consumer_queue.py +0 -0
  432. {aiq → nat}/utils/reactive/__init__.py +0 -0
  433. {aiq → nat}/utils/reactive/base/__init__.py +0 -0
  434. {aiq → nat}/utils/reactive/base/observer_base.py +0 -0
  435. {aiq → nat}/utils/settings/__init__.py +0 -0
  436. {aiq → nat}/utils/string_utils.py +0 -0
  437. {aiq → nat}/utils/type_utils.py +0 -0
  438. {aiq → nat}/utils/url_utils.py +0 -0
  439. {nvidia_nat-1.2.0rc5.dist-info → nvidia_nat-1.2.0rc7.dist-info}/WHEEL +0 -0
  440. {nvidia_nat-1.2.0rc5.dist-info → nvidia_nat-1.2.0rc7.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
  441. {nvidia_nat-1.2.0rc5.dist-info → nvidia_nat-1.2.0rc7.dist-info}/licenses/LICENSE.md +0 -0
@@ -15,17 +15,19 @@
15
15
 
16
16
  import json
17
17
  import math
18
+ from pathlib import Path
18
19
 
19
20
  import pandas as pd
20
21
 
21
- from aiq.data_models.dataset_handler import EvalDatasetConfig
22
- from aiq.data_models.dataset_handler import EvalDatasetJsonConfig
23
- from aiq.data_models.intermediate_step import IntermediateStep
24
- from aiq.data_models.intermediate_step import IntermediateStepType
25
- from aiq.eval.dataset_handler.dataset_downloader import DatasetDownloader
26
- from aiq.eval.dataset_handler.dataset_filter import DatasetFilter
27
- from aiq.eval.evaluator.evaluator_model import EvalInput
28
- from aiq.eval.evaluator.evaluator_model import EvalInputItem
22
+ from nat.data_models.dataset_handler import EvalDatasetConfig
23
+ from nat.data_models.dataset_handler import EvalDatasetCustomConfig
24
+ from nat.data_models.dataset_handler import EvalDatasetJsonConfig
25
+ from nat.data_models.intermediate_step import IntermediateStep
26
+ from nat.data_models.intermediate_step import IntermediateStepType
27
+ from nat.eval.dataset_handler.dataset_downloader import DatasetDownloader
28
+ from nat.eval.dataset_handler.dataset_filter import DatasetFilter
29
+ from nat.eval.evaluator.evaluator_model import EvalInput
30
+ from nat.eval.evaluator.evaluator_model import EvalInputItem
29
31
 
30
32
 
31
33
  class DatasetHandler:
@@ -38,9 +40,9 @@ class DatasetHandler:
38
40
  dataset_config: EvalDatasetConfig,
39
41
  reps: int,
40
42
  concurrency: int,
41
- num_passes: int | None = None,
43
+ num_passes: int = 1,
42
44
  adjust_dataset_size: bool = False):
43
- from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
45
+ from nat.eval.intermediate_step_adapter import IntermediateStepAdapter
44
46
 
45
47
  self.dataset_config = dataset_config
46
48
  self.dataset_filter = DatasetFilter(dataset_config.filter)
@@ -184,6 +186,10 @@ class DatasetHandler:
184
186
  # if a dataset file has been provided in the command line, use that
185
187
  dataset_config = EvalDatasetJsonConfig(file_path=dataset) if dataset else self.dataset_config
186
188
 
189
+ # Handle custom dataset type with special processing
190
+ if isinstance(self.dataset_config, EvalDatasetCustomConfig):
191
+ return self._handle_custom_dataset(dataset)
192
+
187
193
  # Download the dataset if it is remote
188
194
  downloader = DatasetDownloader(dataset_config=dataset_config)
189
195
  downloader.download_dataset()
@@ -192,6 +198,19 @@ class DatasetHandler:
192
198
  # Parse the dataset into a DataFrame
193
199
  input_df = parser(dataset_config.file_path, **kwargs)
194
200
 
201
+ # Apply standard preprocessing and convert to EvalInput
202
+ return self._preprocess_eval_dataframe(input_df)
203
+
204
+ def _preprocess_dataframe(self, input_df: pd.DataFrame) -> pd.DataFrame:
205
+ """
206
+ Apply standard preprocessing to a DataFrame: filters, deduplication, repetitions, and size adjustment.
207
+
208
+ Args:
209
+ input_df: DataFrame to preprocess
210
+
211
+ Returns:
212
+ Preprocessed DataFrame
213
+ """
195
214
  # Apply filters and deduplicate
196
215
  input_df = self.dataset_filter.apply_filters(input_df)
197
216
  input_df.drop_duplicates(subset=[self.dataset_config.id_key], inplace=True)
@@ -205,12 +224,104 @@ class DatasetHandler:
205
224
  elif self.adjust_dataset_size:
206
225
  input_df = self.adjust_dataset(input_df)
207
226
 
208
- # Convert the DataFrame to a list of EvalInput objects
209
- return self.get_eval_input_from_df(input_df)
227
+ return input_df
228
+
229
+ def _preprocess_eval_dataframe(self, input_df: pd.DataFrame) -> EvalInput:
230
+ """
231
+ Apply standard preprocessing to a DataFrame and convert to EvalInput.
232
+
233
+ Args:
234
+ input_df: DataFrame to preprocess
235
+
236
+ Returns:
237
+ Preprocessed EvalInput object
238
+ """
239
+ processed_df = self._preprocess_dataframe(input_df)
240
+ return self.get_eval_input_from_df(processed_df)
241
+
242
+ def _preprocess_eval_input(self, eval_input: EvalInput) -> EvalInput:
243
+ """
244
+ Apply standard preprocessing to an EvalInput object.
245
+
246
+ Thin wrapper that converts EvalInput to DataFrame, processes it, and converts back.
247
+
248
+ Args:
249
+ eval_input: EvalInput object to preprocess
250
+
251
+ Returns:
252
+ Preprocessed EvalInput object
253
+ """
254
+ if not eval_input.eval_input_items:
255
+ return eval_input
256
+
257
+ input_df = self._eval_input_to_dataframe(eval_input)
258
+ return self._preprocess_eval_dataframe(input_df)
259
+
260
+ def _handle_custom_dataset(self, dataset: str | None) -> EvalInput:
261
+ """
262
+ Handle custom dataset type by calling the user-defined function
263
+ and applying standard preprocessing to the result.
264
+
265
+ Args:
266
+ dataset: Optional dataset file path from command line
267
+
268
+ Returns:
269
+ Preprocessed EvalInput object
270
+ """
271
+ # Determine input path - use command line dataset or config file_path
272
+ input_path = Path(dataset) if dataset else Path(self.dataset_config.file_path)
273
+
274
+ # Download the dataset if it is remote (for custom datasets too)
275
+ downloader = DatasetDownloader(dataset_config=self.dataset_config)
276
+ downloader.download_dataset()
277
+
278
+ # Load and call custom function
279
+ custom_function, kwargs = self.dataset_config.parser()
280
+
281
+ try:
282
+ # Call the custom function with file_path and kwargs
283
+ eval_input = custom_function(file_path=input_path, **kwargs)
284
+
285
+ if not isinstance(eval_input, EvalInput):
286
+ raise ValueError(f"Custom function must return an EvalInput object, "
287
+ f"but returned {type(eval_input)}")
288
+
289
+ except Exception as e:
290
+ raise RuntimeError(f"Error calling custom dataset function: {e}") from e
291
+
292
+ # Apply standard preprocessing (filters, deduplication, repetitions)
293
+ return self._preprocess_eval_input(eval_input)
294
+
295
+ def _eval_input_to_dataframe(self, eval_input: EvalInput) -> pd.DataFrame:
296
+ """
297
+ Convert an EvalInput object to a pandas DataFrame for processing.
298
+
299
+ Args:
300
+ eval_input: EvalInput object to convert
301
+
302
+ Returns:
303
+ DataFrame representation of the EvalInput
304
+ """
305
+ data = []
306
+ for item in eval_input.eval_input_items:
307
+ row = item.full_dataset_entry.copy() if item.full_dataset_entry else {}
308
+
309
+ # Ensure key fields are present
310
+ row[self.id_key] = item.id
311
+ if self.is_structured_input():
312
+ row[self.question_key] = item.input_obj
313
+ row[self.answer_key] = item.expected_output_obj
314
+ row[self.generated_answer_key] = item.output_obj
315
+ row[self.trajectory_key] = item.trajectory
316
+ row[self.expected_trajectory_key] = item.expected_trajectory
317
+
318
+ data.append(row)
319
+
320
+ return pd.DataFrame(data)
210
321
 
211
322
  def filter_intermediate_steps(self,
212
323
  intermediate_steps: list[IntermediateStep],
213
- event_filter: list[IntermediateStepType] = None) -> list[dict]:
324
+ event_filter: list[IntermediateStepType] | None = None) -> list[dict]:
214
325
  """
215
326
  Filter out the intermediate steps that are not relevant for evaluation.
216
327
  The output is written with with the intention of re-running the evaluation using the original config file.
@@ -220,7 +331,9 @@ class DatasetHandler:
220
331
  filtered_steps = self.intermediate_step_adapter.filter_intermediate_steps(intermediate_steps, event_filter)
221
332
  return self.intermediate_step_adapter.serialize_intermediate_steps(filtered_steps)
222
333
 
223
- def publish_eval_input(self, eval_input, workflow_output_step_filter: list[IntermediateStepType] = None) -> str:
334
+ def publish_eval_input(self,
335
+ eval_input,
336
+ workflow_output_step_filter: list[IntermediateStepType] | None = None) -> str:
224
337
  """
225
338
  Convert the EvalInput object to a JSON output for storing in a file. Use the orginal keys to
226
339
  allow re-running evaluation using the orignal config file and '--skip_workflow' option.
@@ -23,21 +23,21 @@ from uuid import uuid4
23
23
  from pydantic import BaseModel
24
24
  from tqdm import tqdm
25
25
 
26
- from aiq.data_models.evaluate import EvalConfig
27
- from aiq.data_models.evaluate import JobEvictionPolicy
28
- from aiq.eval.config import EvaluationRunConfig
29
- from aiq.eval.config import EvaluationRunOutput
30
- from aiq.eval.dataset_handler.dataset_handler import DatasetHandler
31
- from aiq.eval.evaluator.evaluator_model import EvalInput
32
- from aiq.eval.evaluator.evaluator_model import EvalInputItem
33
- from aiq.eval.evaluator.evaluator_model import EvalOutput
34
- from aiq.eval.usage_stats import UsageStats
35
- from aiq.eval.usage_stats import UsageStatsItem
36
- from aiq.eval.usage_stats import UsageStatsLLM
37
- from aiq.eval.utils.output_uploader import OutputUploader
38
- from aiq.eval.utils.weave_eval import WeaveEvaluationIntegration
39
- from aiq.profiler.data_models import ProfilerResults
40
- from aiq.runtime.session import AIQSessionManager
26
+ from nat.data_models.evaluate import EvalConfig
27
+ from nat.data_models.evaluate import JobEvictionPolicy
28
+ from nat.eval.config import EvaluationRunConfig
29
+ from nat.eval.config import EvaluationRunOutput
30
+ from nat.eval.dataset_handler.dataset_handler import DatasetHandler
31
+ from nat.eval.evaluator.evaluator_model import EvalInput
32
+ from nat.eval.evaluator.evaluator_model import EvalInputItem
33
+ from nat.eval.evaluator.evaluator_model import EvalOutput
34
+ from nat.eval.usage_stats import UsageStats
35
+ from nat.eval.usage_stats import UsageStatsItem
36
+ from nat.eval.usage_stats import UsageStatsLLM
37
+ from nat.eval.utils.output_uploader import OutputUploader
38
+ from nat.eval.utils.weave_eval import WeaveEvaluationIntegration
39
+ from nat.profiler.data_models import ProfilerResults
40
+ from nat.runtime.session import SessionManager
41
41
 
42
42
  logger = logging.getLogger(__name__)
43
43
 
@@ -45,13 +45,17 @@ logger = logging.getLogger(__name__)
45
45
  class EvaluationRun: # pylint: disable=too-many-public-methods
46
46
  """
47
47
  Instantiated for each evaluation run and used to store data for that single run.
48
+
49
+ .. warning::
50
+ **Experimental Feature**: The Evaluation API is experimental and may change in future releases.
51
+ Future versions may introduce breaking changes without notice.
48
52
  """
49
53
 
50
54
  def __init__(self, config: EvaluationRunConfig):
51
55
  """
52
56
  Initialize an EvaluationRun with configuration.
53
57
  """
54
- from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
58
+ from nat.eval.intermediate_step_adapter import IntermediateStepAdapter
55
59
 
56
60
  # Run-specific configuration
57
61
  self.config: EvaluationRunConfig = config
@@ -79,7 +83,7 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
79
83
  def _compute_usage_stats(self, item: EvalInputItem):
80
84
  """Compute usage stats for a single item using the intermediate steps"""
81
85
  # get the prompt and completion tokens from the intermediate steps
82
- from aiq.profiler.intermediate_property_adapter import IntermediatePropertyAdaptor
86
+ from nat.profiler.intermediate_property_adapter import IntermediatePropertyAdaptor
83
87
  steps = [IntermediatePropertyAdaptor.from_intermediate_step(step) for step in item.trajectory]
84
88
  usage_stats_per_llm = {}
85
89
  total_tokens = 0
@@ -129,14 +133,14 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
129
133
  llm_latency=llm_latency)
130
134
  return self.usage_stats.usage_stats_items[item.id]
131
135
 
132
- async def run_workflow_local(self, session_manager: AIQSessionManager):
136
+ async def run_workflow_local(self, session_manager: SessionManager):
133
137
  '''
134
138
  Launch the workflow with the specified questions and extract the output using the jsonpath
135
139
  '''
136
140
  # import function level dependencies
137
141
  from jsonpath_ng import parse
138
142
 
139
- from aiq.eval.runtime_event_subscriber import pull_intermediate
143
+ from nat.eval.runtime_event_subscriber import pull_intermediate
140
144
 
141
145
  # Run the workflow
142
146
  jsonpath_expr = parse(self.config.result_json_path)
@@ -220,7 +224,7 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
220
224
  pbar.close()
221
225
 
222
226
  async def run_workflow_remote(self):
223
- from aiq.eval.remote_workflow import EvaluationRemoteWorkflowHandler
227
+ from nat.eval.remote_workflow import EvaluationRemoteWorkflowHandler
224
228
  handler = EvaluationRemoteWorkflowHandler(self.config, self.eval_config.general.max_concurrency)
225
229
  await handler.run_workflow_remote(self.eval_input)
226
230
  for item in self.eval_input.eval_input_items:
@@ -237,7 +241,7 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
237
241
  logger.info("Profiler is not enabled. Skipping profiling.")
238
242
  return ProfilerResults()
239
243
 
240
- from aiq.profiler.profile_runner import ProfilerRunner
244
+ from nat.profiler.profile_runner import ProfilerRunner
241
245
 
242
246
  all_stats = []
243
247
  for input_item in self.eval_input.eval_input_items:
@@ -306,7 +310,7 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
306
310
  except Exception as e:
307
311
  logger.exception("Failed to delete old job directory: %s: %s", dir_to_delete, e, exc_info=True)
308
312
 
309
- def write_output(self, dataset_handler: DatasetHandler, profiler_results: ProfilerResults):
313
+ def write_output(self, dataset_handler: DatasetHandler, profiler_results: ProfilerResults): # pylint: disable=unused-argument # noqa: E501
310
314
  workflow_output_file = self.eval_config.general.output_dir / "workflow_output.json"
311
315
  workflow_output_file.parent.mkdir(parents=True, exist_ok=True)
312
316
 
@@ -374,17 +378,17 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
374
378
  await self.weave_eval.afinish_loggers()
375
379
 
376
380
  def apply_overrides(self):
377
- from aiq.cli.cli_utils.config_override import load_and_override_config
378
- from aiq.data_models.config import AIQConfig
379
- from aiq.runtime.loader import PluginTypes
380
- from aiq.runtime.loader import discover_and_register_plugins
381
- from aiq.utils.data_models.schema_validator import validate_schema
381
+ from nat.cli.cli_utils.config_override import load_and_override_config
382
+ from nat.data_models.config import Config
383
+ from nat.runtime.loader import PluginTypes
384
+ from nat.runtime.loader import discover_and_register_plugins
385
+ from nat.utils.data_models.schema_validator import validate_schema
382
386
 
383
387
  # Register plugins before validation
384
388
  discover_and_register_plugins(PluginTypes.CONFIG_OBJECT)
385
389
 
386
390
  config_dict = load_and_override_config(self.config.config_file, self.config.override)
387
- config = validate_schema(config_dict, AIQConfig)
391
+ config = validate_schema(config_dict, Config)
388
392
  return config
389
393
 
390
394
  def _get_workflow_alias(self, workflow_type: str | None = None):
@@ -393,20 +397,20 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
393
397
  return self.eval_config.general.workflow_alias
394
398
 
395
399
  if not workflow_type or workflow_type == "EmptyFunctionConfig":
396
- return "aiqtoolkit-eval"
400
+ return "nat-eval"
397
401
 
398
402
  return workflow_type
399
403
 
400
404
  async def run_and_evaluate(self,
401
- session_manager: AIQSessionManager | None = None,
405
+ session_manager: SessionManager | None = None,
402
406
  job_id: str | None = None) -> EvaluationRunOutput:
403
407
  """
404
408
  Run the workflow with the specified config file and evaluate the dataset
405
409
  """
406
410
  logger.info("Starting evaluation run with config file: %s", self.config.config_file)
407
411
 
408
- from aiq.builder.eval_builder import WorkflowEvalBuilder
409
- from aiq.runtime.loader import load_config
412
+ from nat.builder.eval_builder import WorkflowEvalBuilder
413
+ from nat.runtime.loader import load_config
410
414
 
411
415
  # Load and override the config
412
416
  if self.config.override:
@@ -469,8 +473,8 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
469
473
  else:
470
474
  if not self.config.skip_workflow:
471
475
  if session_manager is None:
472
- session_manager = AIQSessionManager(eval_workflow.build(),
473
- max_concurrency=self.eval_config.general.max_concurrency)
476
+ session_manager = SessionManager(eval_workflow.build(),
477
+ max_concurrency=self.eval_config.general.max_concurrency)
474
478
  await self.run_workflow_local(session_manager)
475
479
 
476
480
  # Evaluate
@@ -19,17 +19,21 @@ from abc import abstractmethod
19
19
 
20
20
  from tqdm import tqdm
21
21
 
22
- from aiq.eval.evaluator.evaluator_model import EvalInput
23
- from aiq.eval.evaluator.evaluator_model import EvalInputItem
24
- from aiq.eval.evaluator.evaluator_model import EvalOutput
25
- from aiq.eval.evaluator.evaluator_model import EvalOutputItem
26
- from aiq.eval.utils.tqdm_position_registry import TqdmPositionRegistry
22
+ from nat.eval.evaluator.evaluator_model import EvalInput
23
+ from nat.eval.evaluator.evaluator_model import EvalInputItem
24
+ from nat.eval.evaluator.evaluator_model import EvalOutput
25
+ from nat.eval.evaluator.evaluator_model import EvalOutputItem
26
+ from nat.eval.utils.tqdm_position_registry import TqdmPositionRegistry
27
27
 
28
28
 
29
29
  class BaseEvaluator(ABC):
30
30
  """
31
31
  Base class for custom evaluators.
32
32
 
33
+ .. warning::
34
+ **Experimental Feature**: The Evaluation API is experimental and may change in future releases.
35
+ Future versions may introduce breaking changes without notice.
36
+
33
37
  Each custom evaluator must implement the `evaluate_item` method which is used to evaluate a
34
38
  single EvalInputItem.
35
39
  """
@@ -17,16 +17,16 @@ import typing
17
17
 
18
18
  from pydantic import BaseModel
19
19
 
20
- from aiq.data_models.intermediate_step import IntermediateStep
20
+ from nat.data_models.intermediate_step import IntermediateStep
21
21
 
22
22
 
23
23
  class EvalInputItem(BaseModel):
24
24
  id: typing.Any
25
25
  input_obj: typing.Any
26
26
  expected_output_obj: typing.Any
27
- output_obj: typing.Any
28
- expected_trajectory: list[IntermediateStep]
29
- trajectory: list[IntermediateStep]
27
+ output_obj: typing.Any = None # populated by the workflow
28
+ expected_trajectory: list[IntermediateStep] = []
29
+ trajectory: list[IntermediateStep] = [] # populated by the workflow
30
30
  full_dataset_entry: typing.Any
31
31
 
32
32
 
@@ -17,8 +17,8 @@ import logging
17
17
 
18
18
  from langchain_core.agents import AgentAction
19
19
 
20
- from aiq.data_models.intermediate_step import IntermediateStep
21
- from aiq.data_models.intermediate_step import IntermediateStepType
20
+ from nat.data_models.intermediate_step import IntermediateStep
21
+ from nat.data_models.intermediate_step import IntermediateStepType
22
22
 
23
23
  logger = logging.getLogger(__name__)
24
24
 
@@ -25,12 +25,12 @@ from ragas.llms import LangchainLLMWrapper
25
25
  from ragas.metrics import Metric
26
26
  from tqdm import tqdm
27
27
 
28
- from aiq.data_models.intermediate_step import IntermediateStepType
29
- from aiq.eval.evaluator.evaluator_model import EvalInput
30
- from aiq.eval.evaluator.evaluator_model import EvalInputItem
31
- from aiq.eval.evaluator.evaluator_model import EvalOutput
32
- from aiq.eval.evaluator.evaluator_model import EvalOutputItem
33
- from aiq.eval.utils.tqdm_position_registry import TqdmPositionRegistry
28
+ from nat.data_models.intermediate_step import IntermediateStepType
29
+ from nat.eval.evaluator.evaluator_model import EvalInput
30
+ from nat.eval.evaluator.evaluator_model import EvalInputItem
31
+ from nat.eval.evaluator.evaluator_model import EvalOutput
32
+ from nat.eval.evaluator.evaluator_model import EvalOutputItem
33
+ from nat.eval.utils.tqdm_position_registry import TqdmPositionRegistry
34
34
 
35
35
  logger = logging.getLogger(__name__)
36
36
 
@@ -68,7 +68,7 @@ class RAGEvaluator:
68
68
 
69
69
  def eval_input_to_ragas(self, eval_input: EvalInput) -> EvaluationDataset:
70
70
  """Converts EvalInput into a Ragas-compatible EvaluationDataset."""
71
- from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
71
+ from nat.eval.intermediate_step_adapter import IntermediateStepAdapter
72
72
  event_filter = [IntermediateStepType.TOOL_END, IntermediateStepType.LLM_END, IntermediateStepType.CUSTOM_END]
73
73
  samples = []
74
74
 
@@ -99,7 +99,7 @@ class RAGEvaluator:
99
99
  return EvaluationDataset(samples=samples)
100
100
 
101
101
  def ragas_to_eval_output(self, eval_input: EvalInput, results_dataset: EvaluationResult | None) -> EvalOutput:
102
- """Converts the ragas EvaluationResult to aiq EvalOutput"""
102
+ """Converts the ragas EvaluationResult to nat EvalOutput"""
103
103
 
104
104
  if not results_dataset:
105
105
  logger.error("Ragas evaluation failed with no results")
@@ -19,13 +19,13 @@ from pydantic import BaseModel
19
19
  from pydantic import Field
20
20
  from pydantic import model_validator
21
21
 
22
- from aiq.builder.builder import EvalBuilder
23
- from aiq.builder.evaluator import EvaluatorInfo
24
- from aiq.builder.framework_enum import LLMFrameworkEnum
25
- from aiq.cli.register_workflow import register_evaluator
26
- from aiq.data_models.evaluator import EvaluatorBaseConfig
27
- from aiq.eval.evaluator.evaluator_model import EvalInput
28
- from aiq.eval.evaluator.evaluator_model import EvalOutput
22
+ from nat.builder.builder import EvalBuilder
23
+ from nat.builder.evaluator import EvaluatorInfo
24
+ from nat.builder.framework_enum import LLMFrameworkEnum
25
+ from nat.cli.register_workflow import register_evaluator
26
+ from nat.data_models.evaluator import EvaluatorBaseConfig
27
+ from nat.eval.evaluator.evaluator_model import EvalInput
28
+ from nat.eval.evaluator.evaluator_model import EvalOutput
29
29
 
30
30
  logger = logging.getLogger(__name__)
31
31
 
@@ -21,13 +21,13 @@ import aiohttp
21
21
  from pydantic import ValidationError
22
22
  from tqdm import tqdm
23
23
 
24
- from aiq.data_models.api_server import AIQResponseIntermediateStep
25
- from aiq.data_models.intermediate_step import IntermediateStep
26
- from aiq.data_models.intermediate_step import IntermediateStepPayload
27
- from aiq.data_models.invocation_node import InvocationNode
28
- from aiq.eval.config import EvaluationRunConfig
29
- from aiq.eval.evaluator.evaluator_model import EvalInput
30
- from aiq.eval.evaluator.evaluator_model import EvalInputItem
24
+ from nat.data_models.api_server import ResponseIntermediateStep
25
+ from nat.data_models.intermediate_step import IntermediateStep
26
+ from nat.data_models.intermediate_step import IntermediateStepPayload
27
+ from nat.data_models.invocation_node import InvocationNode
28
+ from nat.eval.config import EvaluationRunConfig
29
+ from nat.eval.evaluator.evaluator_model import EvalInput
30
+ from nat.eval.evaluator.evaluator_model import EvalInputItem
31
31
 
32
32
  logger = logging.getLogger(__name__)
33
33
 
@@ -80,7 +80,7 @@ class EvaluationRemoteWorkflowHandler:
80
80
  # This is an intermediate step
81
81
  try:
82
82
  step_data = json.loads(line[len(INTERMEDIATE_DATA_PREFIX):])
83
- response_intermediate = AIQResponseIntermediateStep.model_validate(step_data)
83
+ response_intermediate = ResponseIntermediateStep.model_validate(step_data)
84
84
  # The payload is expected to be IntermediateStepPayload
85
85
  payload = IntermediateStepPayload.model_validate_json(response_intermediate.payload)
86
86
  intermediate_step = IntermediateStep(parent_id="remote",
@@ -17,8 +17,8 @@ import typing
17
17
 
18
18
  from pydantic import BaseModel
19
19
 
20
- from aiq.eval.config import EvaluationRunConfig
21
- from aiq.eval.config import EvaluationRunOutput
20
+ from nat.eval.config import EvaluationRunConfig
21
+ from nat.eval.config import EvaluationRunOutput
22
22
 
23
23
 
24
24
  class MultiEvaluationRunConfig(BaseModel):
@@ -16,10 +16,10 @@
16
16
  import copy
17
17
  import typing
18
18
 
19
- from aiq.eval.config import EvaluationRunConfig
20
- from aiq.eval.config import EvaluationRunOutput
21
- from aiq.eval.evaluate import EvaluationRun
22
- from aiq.eval.runners.config import MultiEvaluationRunConfig
19
+ from nat.eval.config import EvaluationRunConfig
20
+ from nat.eval.config import EvaluationRunOutput
21
+ from nat.eval.evaluate import EvaluationRun
22
+ from nat.eval.runners.config import MultiEvaluationRunConfig
23
23
 
24
24
 
25
25
  class MultiEvaluationRunner:
@@ -16,8 +16,8 @@
16
16
  import asyncio
17
17
  import logging
18
18
 
19
- from aiq.builder.context import AIQContext
20
- from aiq.data_models.intermediate_step import IntermediateStep
19
+ from nat.builder.context import Context
20
+ from nat.data_models.intermediate_step import IntermediateStep
21
21
 
22
22
  logger = logging.getLogger(__name__)
23
23
 
@@ -30,7 +30,7 @@ def pull_intermediate() -> asyncio.Future[list[dict]]:
30
30
  """
31
31
  future = asyncio.Future()
32
32
  intermediate_steps = [] # We'll store the dumped steps here.
33
- context = AIQContext.get()
33
+ context = Context.get()
34
34
 
35
35
  def on_next_cb(item: IntermediateStep):
36
36
  # Append each new intermediate step (dumped to dict) to the list.
@@ -19,10 +19,10 @@ import os
19
19
  import shutil
20
20
  from pathlib import Path
21
21
 
22
- from aiq.data_models.swe_bench_model import SWEBenchInput
23
- from aiq.data_models.swe_bench_model import SWEBenchOutput
24
- from aiq.eval.evaluator.evaluator_model import EvalInput
25
- from aiq.eval.evaluator.evaluator_model import EvalOutput
22
+ from nat.data_models.swe_bench_model import SWEBenchInput
23
+ from nat.data_models.swe_bench_model import SWEBenchOutput
24
+ from nat.eval.evaluator.evaluator_model import EvalInput
25
+ from nat.eval.evaluator.evaluator_model import EvalOutput
26
26
 
27
27
  try:
28
28
  import swebench.harness.run_evaluation as swebench_eval
@@ -123,7 +123,7 @@ class SweBenchEvaluator:
123
123
  for s in swebench_inputs if s not in supported_inputs})
124
124
 
125
125
  # Write SWEBenchInput to file
126
- workflow_input_file = self.output_dir / "aiq_workflow_input.json"
126
+ workflow_input_file = self.output_dir / "nat_workflow_input.json"
127
127
  workflow_input_file.parent.mkdir(parents=True, exist_ok=True)
128
128
  Path(workflow_input_file).write_text(json.dumps([swebench.model_dump() for swebench in supported_inputs],
129
129
  indent=2),
@@ -139,7 +139,7 @@ class SweBenchEvaluator:
139
139
  return None, None
140
140
 
141
141
  # Write SWEBenchOutput to file
142
- workflow_output_file = self.output_dir / "aiq_workflow_output.json"
142
+ workflow_output_file = self.output_dir / "nat_workflow_output.json"
143
143
  Path(workflow_output_file).write_text(json.dumps([output.model_dump() for output in filtered_outputs],
144
144
  indent=2),
145
145
  encoding="utf-8")
@@ -15,10 +15,10 @@
15
15
 
16
16
  from pydantic import Field
17
17
 
18
- from aiq.builder.builder import EvalBuilder
19
- from aiq.builder.evaluator import EvaluatorInfo
20
- from aiq.cli.register_workflow import register_evaluator
21
- from aiq.data_models.evaluator import EvaluatorBaseConfig
18
+ from nat.builder.builder import EvalBuilder
19
+ from nat.builder.evaluator import EvaluatorInfo
20
+ from nat.cli.register_workflow import register_evaluator
21
+ from nat.data_models.evaluator import EvaluatorBaseConfig
22
22
 
23
23
 
24
24
  class SweBenchEvaluatorConfig(EvaluatorBaseConfig, name="swe_bench"):
@@ -19,9 +19,9 @@ from langchain.evaluation import TrajectoryEvalChain
19
19
  from langchain_core.language_models import BaseChatModel
20
20
  from langchain_core.tools import BaseTool
21
21
 
22
- from aiq.eval.evaluator.base_evaluator import BaseEvaluator
23
- from aiq.eval.evaluator.evaluator_model import EvalInputItem
24
- from aiq.eval.evaluator.evaluator_model import EvalOutputItem
22
+ from nat.eval.evaluator.base_evaluator import BaseEvaluator
23
+ from nat.eval.evaluator.evaluator_model import EvalInputItem
24
+ from nat.eval.evaluator.evaluator_model import EvalOutputItem
25
25
 
26
26
  logger = logging.getLogger(__name__)
27
27
 
@@ -48,8 +48,8 @@ class TrajectoryEvaluator(BaseEvaluator):
48
48
  """
49
49
  Evaluate a single EvalInputItem and return an EvalOutputItem.
50
50
  """
51
- from aiq.data_models.intermediate_step import IntermediateStepType
52
- from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
51
+ from nat.data_models.intermediate_step import IntermediateStepType
52
+ from nat.eval.intermediate_step_adapter import IntermediateStepAdapter
53
53
 
54
54
  intermediate_step_adapter = IntermediateStepAdapter()
55
55
  event_filter = [IntermediateStepType.LLM_END, IntermediateStepType.TOOL_END]
@@ -15,10 +15,10 @@
15
15
 
16
16
  from pydantic import Field
17
17
 
18
- from aiq.builder.builder import EvalBuilder
19
- from aiq.builder.evaluator import EvaluatorInfo
20
- from aiq.cli.register_workflow import register_evaluator
21
- from aiq.data_models.evaluator import EvaluatorBaseConfig
18
+ from nat.builder.builder import EvalBuilder
19
+ from nat.builder.evaluator import EvaluatorInfo
20
+ from nat.cli.register_workflow import register_evaluator
21
+ from nat.data_models.evaluator import EvaluatorBaseConfig
22
22
 
23
23
 
24
24
  class TrajectoryEvaluatorConfig(EvaluatorBaseConfig, name="trajectory"):
@@ -29,7 +29,7 @@ class TrajectoryEvaluatorConfig(EvaluatorBaseConfig, name="trajectory"):
29
29
 
30
30
  @register_evaluator(config_type=TrajectoryEvaluatorConfig)
31
31
  async def register_trajectory_evaluator(config: TrajectoryEvaluatorConfig, builder: EvalBuilder):
32
- from aiq.builder.framework_enum import LLMFrameworkEnum
32
+ from nat.builder.framework_enum import LLMFrameworkEnum
33
33
 
34
34
  from .evaluate import TrajectoryEvaluator
35
35
  llm = await builder.get_llm(config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN)