nvidia-nat 1.2.0rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. aiq/agent/__init__.py +0 -0
  2. aiq/agent/base.py +239 -0
  3. aiq/agent/dual_node.py +67 -0
  4. aiq/agent/react_agent/__init__.py +0 -0
  5. aiq/agent/react_agent/agent.py +355 -0
  6. aiq/agent/react_agent/output_parser.py +104 -0
  7. aiq/agent/react_agent/prompt.py +41 -0
  8. aiq/agent/react_agent/register.py +149 -0
  9. aiq/agent/reasoning_agent/__init__.py +0 -0
  10. aiq/agent/reasoning_agent/reasoning_agent.py +225 -0
  11. aiq/agent/register.py +23 -0
  12. aiq/agent/rewoo_agent/__init__.py +0 -0
  13. aiq/agent/rewoo_agent/agent.py +411 -0
  14. aiq/agent/rewoo_agent/prompt.py +108 -0
  15. aiq/agent/rewoo_agent/register.py +158 -0
  16. aiq/agent/tool_calling_agent/__init__.py +0 -0
  17. aiq/agent/tool_calling_agent/agent.py +119 -0
  18. aiq/agent/tool_calling_agent/register.py +106 -0
  19. aiq/authentication/__init__.py +14 -0
  20. aiq/authentication/api_key/__init__.py +14 -0
  21. aiq/authentication/api_key/api_key_auth_provider.py +96 -0
  22. aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
  23. aiq/authentication/api_key/register.py +26 -0
  24. aiq/authentication/exceptions/__init__.py +14 -0
  25. aiq/authentication/exceptions/api_key_exceptions.py +38 -0
  26. aiq/authentication/http_basic_auth/__init__.py +0 -0
  27. aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
  28. aiq/authentication/http_basic_auth/register.py +30 -0
  29. aiq/authentication/interfaces.py +93 -0
  30. aiq/authentication/oauth2/__init__.py +14 -0
  31. aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
  32. aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
  33. aiq/authentication/oauth2/register.py +25 -0
  34. aiq/authentication/register.py +21 -0
  35. aiq/builder/__init__.py +0 -0
  36. aiq/builder/builder.py +285 -0
  37. aiq/builder/component_utils.py +316 -0
  38. aiq/builder/context.py +264 -0
  39. aiq/builder/embedder.py +24 -0
  40. aiq/builder/eval_builder.py +161 -0
  41. aiq/builder/evaluator.py +29 -0
  42. aiq/builder/framework_enum.py +24 -0
  43. aiq/builder/front_end.py +73 -0
  44. aiq/builder/function.py +344 -0
  45. aiq/builder/function_base.py +380 -0
  46. aiq/builder/function_info.py +627 -0
  47. aiq/builder/intermediate_step_manager.py +174 -0
  48. aiq/builder/llm.py +25 -0
  49. aiq/builder/retriever.py +25 -0
  50. aiq/builder/user_interaction_manager.py +74 -0
  51. aiq/builder/workflow.py +148 -0
  52. aiq/builder/workflow_builder.py +1117 -0
  53. aiq/cli/__init__.py +14 -0
  54. aiq/cli/cli_utils/__init__.py +0 -0
  55. aiq/cli/cli_utils/config_override.py +231 -0
  56. aiq/cli/cli_utils/validation.py +37 -0
  57. aiq/cli/commands/__init__.py +0 -0
  58. aiq/cli/commands/configure/__init__.py +0 -0
  59. aiq/cli/commands/configure/channel/__init__.py +0 -0
  60. aiq/cli/commands/configure/channel/add.py +28 -0
  61. aiq/cli/commands/configure/channel/channel.py +36 -0
  62. aiq/cli/commands/configure/channel/remove.py +30 -0
  63. aiq/cli/commands/configure/channel/update.py +30 -0
  64. aiq/cli/commands/configure/configure.py +33 -0
  65. aiq/cli/commands/evaluate.py +139 -0
  66. aiq/cli/commands/info/__init__.py +14 -0
  67. aiq/cli/commands/info/info.py +39 -0
  68. aiq/cli/commands/info/list_channels.py +32 -0
  69. aiq/cli/commands/info/list_components.py +129 -0
  70. aiq/cli/commands/info/list_mcp.py +213 -0
  71. aiq/cli/commands/registry/__init__.py +14 -0
  72. aiq/cli/commands/registry/publish.py +88 -0
  73. aiq/cli/commands/registry/pull.py +118 -0
  74. aiq/cli/commands/registry/registry.py +38 -0
  75. aiq/cli/commands/registry/remove.py +108 -0
  76. aiq/cli/commands/registry/search.py +155 -0
  77. aiq/cli/commands/sizing/__init__.py +14 -0
  78. aiq/cli/commands/sizing/calc.py +297 -0
  79. aiq/cli/commands/sizing/sizing.py +27 -0
  80. aiq/cli/commands/start.py +246 -0
  81. aiq/cli/commands/uninstall.py +81 -0
  82. aiq/cli/commands/validate.py +47 -0
  83. aiq/cli/commands/workflow/__init__.py +14 -0
  84. aiq/cli/commands/workflow/templates/__init__.py.j2 +0 -0
  85. aiq/cli/commands/workflow/templates/config.yml.j2 +16 -0
  86. aiq/cli/commands/workflow/templates/pyproject.toml.j2 +22 -0
  87. aiq/cli/commands/workflow/templates/register.py.j2 +5 -0
  88. aiq/cli/commands/workflow/templates/workflow.py.j2 +36 -0
  89. aiq/cli/commands/workflow/workflow.py +37 -0
  90. aiq/cli/commands/workflow/workflow_commands.py +313 -0
  91. aiq/cli/entrypoint.py +135 -0
  92. aiq/cli/main.py +44 -0
  93. aiq/cli/register_workflow.py +488 -0
  94. aiq/cli/type_registry.py +1000 -0
  95. aiq/data_models/__init__.py +14 -0
  96. aiq/data_models/api_server.py +694 -0
  97. aiq/data_models/authentication.py +231 -0
  98. aiq/data_models/common.py +171 -0
  99. aiq/data_models/component.py +54 -0
  100. aiq/data_models/component_ref.py +168 -0
  101. aiq/data_models/config.py +406 -0
  102. aiq/data_models/dataset_handler.py +123 -0
  103. aiq/data_models/discovery_metadata.py +335 -0
  104. aiq/data_models/embedder.py +27 -0
  105. aiq/data_models/evaluate.py +127 -0
  106. aiq/data_models/evaluator.py +26 -0
  107. aiq/data_models/front_end.py +26 -0
  108. aiq/data_models/function.py +30 -0
  109. aiq/data_models/function_dependencies.py +72 -0
  110. aiq/data_models/interactive.py +246 -0
  111. aiq/data_models/intermediate_step.py +302 -0
  112. aiq/data_models/invocation_node.py +38 -0
  113. aiq/data_models/llm.py +27 -0
  114. aiq/data_models/logging.py +26 -0
  115. aiq/data_models/memory.py +27 -0
  116. aiq/data_models/object_store.py +44 -0
  117. aiq/data_models/profiler.py +54 -0
  118. aiq/data_models/registry_handler.py +26 -0
  119. aiq/data_models/retriever.py +30 -0
  120. aiq/data_models/retry_mixin.py +35 -0
  121. aiq/data_models/span.py +187 -0
  122. aiq/data_models/step_adaptor.py +64 -0
  123. aiq/data_models/streaming.py +33 -0
  124. aiq/data_models/swe_bench_model.py +54 -0
  125. aiq/data_models/telemetry_exporter.py +26 -0
  126. aiq/data_models/ttc_strategy.py +30 -0
  127. aiq/embedder/__init__.py +0 -0
  128. aiq/embedder/langchain_client.py +41 -0
  129. aiq/embedder/nim_embedder.py +59 -0
  130. aiq/embedder/openai_embedder.py +43 -0
  131. aiq/embedder/register.py +24 -0
  132. aiq/eval/__init__.py +14 -0
  133. aiq/eval/config.py +60 -0
  134. aiq/eval/dataset_handler/__init__.py +0 -0
  135. aiq/eval/dataset_handler/dataset_downloader.py +106 -0
  136. aiq/eval/dataset_handler/dataset_filter.py +52 -0
  137. aiq/eval/dataset_handler/dataset_handler.py +254 -0
  138. aiq/eval/evaluate.py +506 -0
  139. aiq/eval/evaluator/__init__.py +14 -0
  140. aiq/eval/evaluator/base_evaluator.py +73 -0
  141. aiq/eval/evaluator/evaluator_model.py +45 -0
  142. aiq/eval/intermediate_step_adapter.py +99 -0
  143. aiq/eval/rag_evaluator/__init__.py +0 -0
  144. aiq/eval/rag_evaluator/evaluate.py +178 -0
  145. aiq/eval/rag_evaluator/register.py +143 -0
  146. aiq/eval/register.py +23 -0
  147. aiq/eval/remote_workflow.py +133 -0
  148. aiq/eval/runners/__init__.py +14 -0
  149. aiq/eval/runners/config.py +39 -0
  150. aiq/eval/runners/multi_eval_runner.py +54 -0
  151. aiq/eval/runtime_event_subscriber.py +52 -0
  152. aiq/eval/swe_bench_evaluator/__init__.py +0 -0
  153. aiq/eval/swe_bench_evaluator/evaluate.py +215 -0
  154. aiq/eval/swe_bench_evaluator/register.py +36 -0
  155. aiq/eval/trajectory_evaluator/__init__.py +0 -0
  156. aiq/eval/trajectory_evaluator/evaluate.py +75 -0
  157. aiq/eval/trajectory_evaluator/register.py +40 -0
  158. aiq/eval/tunable_rag_evaluator/__init__.py +0 -0
  159. aiq/eval/tunable_rag_evaluator/evaluate.py +245 -0
  160. aiq/eval/tunable_rag_evaluator/register.py +52 -0
  161. aiq/eval/usage_stats.py +41 -0
  162. aiq/eval/utils/__init__.py +0 -0
  163. aiq/eval/utils/output_uploader.py +140 -0
  164. aiq/eval/utils/tqdm_position_registry.py +40 -0
  165. aiq/eval/utils/weave_eval.py +184 -0
  166. aiq/experimental/__init__.py +0 -0
  167. aiq/experimental/decorators/__init__.py +0 -0
  168. aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
  169. aiq/experimental/test_time_compute/__init__.py +0 -0
  170. aiq/experimental/test_time_compute/editing/__init__.py +0 -0
  171. aiq/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +147 -0
  172. aiq/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +204 -0
  173. aiq/experimental/test_time_compute/editing/motivation_aware_summarization.py +107 -0
  174. aiq/experimental/test_time_compute/functions/__init__.py +0 -0
  175. aiq/experimental/test_time_compute/functions/execute_score_select_function.py +105 -0
  176. aiq/experimental/test_time_compute/functions/its_tool_orchestration_function.py +205 -0
  177. aiq/experimental/test_time_compute/functions/its_tool_wrapper_function.py +146 -0
  178. aiq/experimental/test_time_compute/functions/plan_select_execute_function.py +224 -0
  179. aiq/experimental/test_time_compute/models/__init__.py +0 -0
  180. aiq/experimental/test_time_compute/models/editor_config.py +132 -0
  181. aiq/experimental/test_time_compute/models/scoring_config.py +112 -0
  182. aiq/experimental/test_time_compute/models/search_config.py +120 -0
  183. aiq/experimental/test_time_compute/models/selection_config.py +154 -0
  184. aiq/experimental/test_time_compute/models/stage_enums.py +43 -0
  185. aiq/experimental/test_time_compute/models/strategy_base.py +66 -0
  186. aiq/experimental/test_time_compute/models/tool_use_config.py +41 -0
  187. aiq/experimental/test_time_compute/models/ttc_item.py +48 -0
  188. aiq/experimental/test_time_compute/register.py +36 -0
  189. aiq/experimental/test_time_compute/scoring/__init__.py +0 -0
  190. aiq/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +168 -0
  191. aiq/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +168 -0
  192. aiq/experimental/test_time_compute/scoring/motivation_aware_scorer.py +111 -0
  193. aiq/experimental/test_time_compute/search/__init__.py +0 -0
  194. aiq/experimental/test_time_compute/search/multi_llm_planner.py +128 -0
  195. aiq/experimental/test_time_compute/search/multi_query_retrieval_search.py +122 -0
  196. aiq/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +128 -0
  197. aiq/experimental/test_time_compute/selection/__init__.py +0 -0
  198. aiq/experimental/test_time_compute/selection/best_of_n_selector.py +63 -0
  199. aiq/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +131 -0
  200. aiq/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +159 -0
  201. aiq/experimental/test_time_compute/selection/llm_based_plan_selector.py +128 -0
  202. aiq/experimental/test_time_compute/selection/threshold_selector.py +58 -0
  203. aiq/front_ends/__init__.py +14 -0
  204. aiq/front_ends/console/__init__.py +14 -0
  205. aiq/front_ends/console/authentication_flow_handler.py +233 -0
  206. aiq/front_ends/console/console_front_end_config.py +32 -0
  207. aiq/front_ends/console/console_front_end_plugin.py +96 -0
  208. aiq/front_ends/console/register.py +25 -0
  209. aiq/front_ends/cron/__init__.py +14 -0
  210. aiq/front_ends/fastapi/__init__.py +14 -0
  211. aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
  212. aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
  213. aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
  214. aiq/front_ends/fastapi/fastapi_front_end_config.py +234 -0
  215. aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
  216. aiq/front_ends/fastapi/fastapi_front_end_plugin.py +116 -0
  217. aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +1092 -0
  218. aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
  219. aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
  220. aiq/front_ends/fastapi/intermediate_steps_subscriber.py +80 -0
  221. aiq/front_ends/fastapi/job_store.py +183 -0
  222. aiq/front_ends/fastapi/main.py +72 -0
  223. aiq/front_ends/fastapi/message_handler.py +298 -0
  224. aiq/front_ends/fastapi/message_validator.py +345 -0
  225. aiq/front_ends/fastapi/register.py +25 -0
  226. aiq/front_ends/fastapi/response_helpers.py +195 -0
  227. aiq/front_ends/fastapi/step_adaptor.py +321 -0
  228. aiq/front_ends/mcp/__init__.py +14 -0
  229. aiq/front_ends/mcp/mcp_front_end_config.py +32 -0
  230. aiq/front_ends/mcp/mcp_front_end_plugin.py +93 -0
  231. aiq/front_ends/mcp/register.py +27 -0
  232. aiq/front_ends/mcp/tool_converter.py +242 -0
  233. aiq/front_ends/register.py +22 -0
  234. aiq/front_ends/simple_base/__init__.py +14 -0
  235. aiq/front_ends/simple_base/simple_front_end_plugin_base.py +54 -0
  236. aiq/llm/__init__.py +0 -0
  237. aiq/llm/aws_bedrock_llm.py +57 -0
  238. aiq/llm/nim_llm.py +46 -0
  239. aiq/llm/openai_llm.py +46 -0
  240. aiq/llm/register.py +23 -0
  241. aiq/llm/utils/__init__.py +14 -0
  242. aiq/llm/utils/env_config_value.py +94 -0
  243. aiq/llm/utils/error.py +17 -0
  244. aiq/memory/__init__.py +20 -0
  245. aiq/memory/interfaces.py +183 -0
  246. aiq/memory/models.py +112 -0
  247. aiq/meta/module_to_distro.json +3 -0
  248. aiq/meta/pypi.md +58 -0
  249. aiq/object_store/__init__.py +20 -0
  250. aiq/object_store/in_memory_object_store.py +76 -0
  251. aiq/object_store/interfaces.py +84 -0
  252. aiq/object_store/models.py +36 -0
  253. aiq/object_store/register.py +20 -0
  254. aiq/observability/__init__.py +14 -0
  255. aiq/observability/exporter/__init__.py +14 -0
  256. aiq/observability/exporter/base_exporter.py +449 -0
  257. aiq/observability/exporter/exporter.py +78 -0
  258. aiq/observability/exporter/file_exporter.py +33 -0
  259. aiq/observability/exporter/processing_exporter.py +322 -0
  260. aiq/observability/exporter/raw_exporter.py +52 -0
  261. aiq/observability/exporter/span_exporter.py +265 -0
  262. aiq/observability/exporter_manager.py +335 -0
  263. aiq/observability/mixin/__init__.py +14 -0
  264. aiq/observability/mixin/batch_config_mixin.py +26 -0
  265. aiq/observability/mixin/collector_config_mixin.py +23 -0
  266. aiq/observability/mixin/file_mixin.py +288 -0
  267. aiq/observability/mixin/file_mode.py +23 -0
  268. aiq/observability/mixin/resource_conflict_mixin.py +134 -0
  269. aiq/observability/mixin/serialize_mixin.py +61 -0
  270. aiq/observability/mixin/type_introspection_mixin.py +183 -0
  271. aiq/observability/processor/__init__.py +14 -0
  272. aiq/observability/processor/batching_processor.py +310 -0
  273. aiq/observability/processor/callback_processor.py +42 -0
  274. aiq/observability/processor/intermediate_step_serializer.py +28 -0
  275. aiq/observability/processor/processor.py +71 -0
  276. aiq/observability/register.py +96 -0
  277. aiq/observability/utils/__init__.py +14 -0
  278. aiq/observability/utils/dict_utils.py +236 -0
  279. aiq/observability/utils/time_utils.py +31 -0
  280. aiq/plugins/.namespace +1 -0
  281. aiq/profiler/__init__.py +0 -0
  282. aiq/profiler/calc/__init__.py +14 -0
  283. aiq/profiler/calc/calc_runner.py +627 -0
  284. aiq/profiler/calc/calculations.py +288 -0
  285. aiq/profiler/calc/data_models.py +188 -0
  286. aiq/profiler/calc/plot.py +345 -0
  287. aiq/profiler/callbacks/__init__.py +0 -0
  288. aiq/profiler/callbacks/agno_callback_handler.py +295 -0
  289. aiq/profiler/callbacks/base_callback_class.py +20 -0
  290. aiq/profiler/callbacks/langchain_callback_handler.py +290 -0
  291. aiq/profiler/callbacks/llama_index_callback_handler.py +205 -0
  292. aiq/profiler/callbacks/semantic_kernel_callback_handler.py +238 -0
  293. aiq/profiler/callbacks/token_usage_base_model.py +27 -0
  294. aiq/profiler/data_frame_row.py +51 -0
  295. aiq/profiler/data_models.py +24 -0
  296. aiq/profiler/decorators/__init__.py +0 -0
  297. aiq/profiler/decorators/framework_wrapper.py +131 -0
  298. aiq/profiler/decorators/function_tracking.py +254 -0
  299. aiq/profiler/forecasting/__init__.py +0 -0
  300. aiq/profiler/forecasting/config.py +18 -0
  301. aiq/profiler/forecasting/model_trainer.py +75 -0
  302. aiq/profiler/forecasting/models/__init__.py +22 -0
  303. aiq/profiler/forecasting/models/forecasting_base_model.py +40 -0
  304. aiq/profiler/forecasting/models/linear_model.py +196 -0
  305. aiq/profiler/forecasting/models/random_forest_regressor.py +268 -0
  306. aiq/profiler/inference_metrics_model.py +28 -0
  307. aiq/profiler/inference_optimization/__init__.py +0 -0
  308. aiq/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
  309. aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +460 -0
  310. aiq/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +258 -0
  311. aiq/profiler/inference_optimization/data_models.py +386 -0
  312. aiq/profiler/inference_optimization/experimental/__init__.py +0 -0
  313. aiq/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +468 -0
  314. aiq/profiler/inference_optimization/experimental/prefix_span_analysis.py +405 -0
  315. aiq/profiler/inference_optimization/llm_metrics.py +212 -0
  316. aiq/profiler/inference_optimization/prompt_caching.py +163 -0
  317. aiq/profiler/inference_optimization/token_uniqueness.py +107 -0
  318. aiq/profiler/inference_optimization/workflow_runtimes.py +72 -0
  319. aiq/profiler/intermediate_property_adapter.py +102 -0
  320. aiq/profiler/profile_runner.py +473 -0
  321. aiq/profiler/utils.py +184 -0
  322. aiq/registry_handlers/__init__.py +0 -0
  323. aiq/registry_handlers/local/__init__.py +0 -0
  324. aiq/registry_handlers/local/local_handler.py +176 -0
  325. aiq/registry_handlers/local/register_local.py +37 -0
  326. aiq/registry_handlers/metadata_factory.py +60 -0
  327. aiq/registry_handlers/package_utils.py +567 -0
  328. aiq/registry_handlers/pypi/__init__.py +0 -0
  329. aiq/registry_handlers/pypi/pypi_handler.py +251 -0
  330. aiq/registry_handlers/pypi/register_pypi.py +40 -0
  331. aiq/registry_handlers/register.py +21 -0
  332. aiq/registry_handlers/registry_handler_base.py +157 -0
  333. aiq/registry_handlers/rest/__init__.py +0 -0
  334. aiq/registry_handlers/rest/register_rest.py +56 -0
  335. aiq/registry_handlers/rest/rest_handler.py +237 -0
  336. aiq/registry_handlers/schemas/__init__.py +0 -0
  337. aiq/registry_handlers/schemas/headers.py +42 -0
  338. aiq/registry_handlers/schemas/package.py +68 -0
  339. aiq/registry_handlers/schemas/publish.py +63 -0
  340. aiq/registry_handlers/schemas/pull.py +82 -0
  341. aiq/registry_handlers/schemas/remove.py +36 -0
  342. aiq/registry_handlers/schemas/search.py +91 -0
  343. aiq/registry_handlers/schemas/status.py +47 -0
  344. aiq/retriever/__init__.py +0 -0
  345. aiq/retriever/interface.py +37 -0
  346. aiq/retriever/milvus/__init__.py +14 -0
  347. aiq/retriever/milvus/register.py +81 -0
  348. aiq/retriever/milvus/retriever.py +228 -0
  349. aiq/retriever/models.py +74 -0
  350. aiq/retriever/nemo_retriever/__init__.py +14 -0
  351. aiq/retriever/nemo_retriever/register.py +60 -0
  352. aiq/retriever/nemo_retriever/retriever.py +190 -0
  353. aiq/retriever/register.py +22 -0
  354. aiq/runtime/__init__.py +14 -0
  355. aiq/runtime/loader.py +215 -0
  356. aiq/runtime/runner.py +190 -0
  357. aiq/runtime/session.py +158 -0
  358. aiq/runtime/user_metadata.py +130 -0
  359. aiq/settings/__init__.py +0 -0
  360. aiq/settings/global_settings.py +318 -0
  361. aiq/test/.namespace +1 -0
  362. aiq/tool/__init__.py +0 -0
  363. aiq/tool/chat_completion.py +74 -0
  364. aiq/tool/code_execution/README.md +151 -0
  365. aiq/tool/code_execution/__init__.py +0 -0
  366. aiq/tool/code_execution/code_sandbox.py +267 -0
  367. aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
  368. aiq/tool/code_execution/local_sandbox/Dockerfile.sandbox +60 -0
  369. aiq/tool/code_execution/local_sandbox/__init__.py +13 -0
  370. aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +198 -0
  371. aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +6 -0
  372. aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +50 -0
  373. aiq/tool/code_execution/register.py +74 -0
  374. aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
  375. aiq/tool/code_execution/utils.py +100 -0
  376. aiq/tool/datetime_tools.py +42 -0
  377. aiq/tool/document_search.py +141 -0
  378. aiq/tool/github_tools/__init__.py +0 -0
  379. aiq/tool/github_tools/create_github_commit.py +133 -0
  380. aiq/tool/github_tools/create_github_issue.py +87 -0
  381. aiq/tool/github_tools/create_github_pr.py +106 -0
  382. aiq/tool/github_tools/get_github_file.py +106 -0
  383. aiq/tool/github_tools/get_github_issue.py +166 -0
  384. aiq/tool/github_tools/get_github_pr.py +256 -0
  385. aiq/tool/github_tools/update_github_issue.py +100 -0
  386. aiq/tool/mcp/__init__.py +14 -0
  387. aiq/tool/mcp/exceptions.py +142 -0
  388. aiq/tool/mcp/mcp_client.py +255 -0
  389. aiq/tool/mcp/mcp_tool.py +96 -0
  390. aiq/tool/memory_tools/__init__.py +0 -0
  391. aiq/tool/memory_tools/add_memory_tool.py +79 -0
  392. aiq/tool/memory_tools/delete_memory_tool.py +67 -0
  393. aiq/tool/memory_tools/get_memory_tool.py +72 -0
  394. aiq/tool/nvidia_rag.py +95 -0
  395. aiq/tool/register.py +38 -0
  396. aiq/tool/retriever.py +89 -0
  397. aiq/tool/server_tools.py +66 -0
  398. aiq/utils/__init__.py +0 -0
  399. aiq/utils/data_models/__init__.py +0 -0
  400. aiq/utils/data_models/schema_validator.py +58 -0
  401. aiq/utils/debugging_utils.py +43 -0
  402. aiq/utils/dump_distro_mapping.py +32 -0
  403. aiq/utils/exception_handlers/__init__.py +0 -0
  404. aiq/utils/exception_handlers/automatic_retries.py +289 -0
  405. aiq/utils/exception_handlers/mcp.py +211 -0
  406. aiq/utils/exception_handlers/schemas.py +114 -0
  407. aiq/utils/io/__init__.py +0 -0
  408. aiq/utils/io/model_processing.py +28 -0
  409. aiq/utils/io/yaml_tools.py +119 -0
  410. aiq/utils/log_utils.py +37 -0
  411. aiq/utils/metadata_utils.py +74 -0
  412. aiq/utils/optional_imports.py +142 -0
  413. aiq/utils/producer_consumer_queue.py +178 -0
  414. aiq/utils/reactive/__init__.py +0 -0
  415. aiq/utils/reactive/base/__init__.py +0 -0
  416. aiq/utils/reactive/base/observable_base.py +65 -0
  417. aiq/utils/reactive/base/observer_base.py +55 -0
  418. aiq/utils/reactive/base/subject_base.py +79 -0
  419. aiq/utils/reactive/observable.py +59 -0
  420. aiq/utils/reactive/observer.py +76 -0
  421. aiq/utils/reactive/subject.py +131 -0
  422. aiq/utils/reactive/subscription.py +49 -0
  423. aiq/utils/settings/__init__.py +0 -0
  424. aiq/utils/settings/global_settings.py +197 -0
  425. aiq/utils/string_utils.py +38 -0
  426. aiq/utils/type_converter.py +290 -0
  427. aiq/utils/type_utils.py +484 -0
  428. aiq/utils/url_utils.py +27 -0
  429. nvidia_nat-1.2.0rc5.dist-info/METADATA +363 -0
  430. nvidia_nat-1.2.0rc5.dist-info/RECORD +435 -0
  431. nvidia_nat-1.2.0rc5.dist-info/WHEEL +5 -0
  432. nvidia_nat-1.2.0rc5.dist-info/entry_points.txt +20 -0
  433. nvidia_nat-1.2.0rc5.dist-info/licenses/LICENSE-3rd-party.txt +3686 -0
  434. nvidia_nat-1.2.0rc5.dist-info/licenses/LICENSE.md +201 -0
  435. nvidia_nat-1.2.0rc5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,627 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import copy
17
+ import logging
18
+ import shutil
19
+ import time
20
+ import uuid
21
+ from pathlib import Path
22
+
23
+ from pydantic import ValidationError
24
+
25
+ from aiq.eval.config import EvaluationRunConfig
26
+ from aiq.eval.runners.config import MultiEvaluationRunConfig
27
+ from aiq.eval.runners.multi_eval_runner import MultiEvaluationRunner
28
+ from aiq.profiler.calc.calculations import LinearFitResult
29
+ from aiq.profiler.calc.calculations import calc_gpu_estimate_based_on_slope
30
+ from aiq.profiler.calc.calculations import calc_gpu_estimate_for_single_concurrency
31
+ from aiq.profiler.calc.calculations import compute_slope
32
+ from aiq.profiler.calc.data_models import CalcAlerts
33
+ from aiq.profiler.calc.data_models import CalcData
34
+ from aiq.profiler.calc.data_models import CalcRunnerConfig
35
+ from aiq.profiler.calc.data_models import CalcRunnerOutput
36
+ from aiq.profiler.calc.data_models import FitConfig
37
+ from aiq.profiler.calc.data_models import FitResults
38
+ from aiq.profiler.calc.data_models import GPUEstimates
39
+ from aiq.profiler.calc.data_models import SizingMetricPerItem
40
+ from aiq.profiler.calc.data_models import SizingMetrics
41
+ from aiq.profiler.calc.data_models import SizingMetricsAlerts
42
+
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ class LinearFitAnalyzer:
47
+ """Handles linear regression analysis for concurrency vs time metrics."""
48
+
49
+ def __init__(self, fit_config: FitConfig):
50
+ self.fit_config = fit_config
51
+ self.llm_latency_fit: LinearFitResult | None = None
52
+ self.wf_runtime_fit: LinearFitResult | None = None
53
+
54
+ def analyze_metrics(self, sizing_metrics_per_concurrency: dict[int, SizingMetrics]) -> dict[int, CalcAlerts]:
55
+ """
56
+ Analyze metrics and return alerts including outlier information.
57
+
58
+ Returns:
59
+ dict[int, CalcAlerts]: Alerts per concurrency including outlier flags
60
+ """
61
+ alerts_per_concurrency = {}
62
+
63
+ # Need at least 2 points for linear regression
64
+ if len(sizing_metrics_per_concurrency) < 2:
65
+ logger.warning("Need at least 2 concurrencies for linear analysis")
66
+ # Return empty alerts for all concurrencies
67
+ for concurrency in sizing_metrics_per_concurrency.keys():
68
+ alerts_per_concurrency[concurrency] = CalcAlerts()
69
+ return alerts_per_concurrency
70
+
71
+ # Calculate linear fits
72
+ concurrencies = list(sizing_metrics_per_concurrency.keys())
73
+ latencies = [run.llm_latency_p95 for run in sizing_metrics_per_concurrency.values()]
74
+ try:
75
+ self.llm_latency_fit = compute_slope(concurrencies, latencies, self.fit_config)
76
+ logger.info("Computed latency fit: slope=%.4f, R²=%.3f",
77
+ self.llm_latency_fit.slope,
78
+ self.llm_latency_fit.r_squared)
79
+ except ValueError as e:
80
+ logger.warning("Failed to compute latency fit: %s", e)
81
+ self.llm_latency_fit = None
82
+
83
+ runtimes = [run.workflow_runtime_p95 for run in sizing_metrics_per_concurrency.values()]
84
+ try:
85
+ self.wf_runtime_fit = compute_slope(concurrencies, runtimes, self.fit_config)
86
+ logger.info("Computed runtime fit: slope=%.4f, R²=%.3f",
87
+ self.wf_runtime_fit.slope,
88
+ self.wf_runtime_fit.r_squared)
89
+ except ValueError as e:
90
+ logger.warning("Failed to compute runtime fit: %s", e)
91
+ self.wf_runtime_fit = None
92
+
93
+ # Add outlier information to alerts
94
+ for concurrency in sizing_metrics_per_concurrency.keys():
95
+ alerts = CalcAlerts()
96
+
97
+ # Check for latency outliers
98
+ if self.llm_latency_fit and concurrency in self.llm_latency_fit.outliers_removed:
99
+ alerts.outlier_llm_latency = True
100
+
101
+ # Check for runtime outliers
102
+ if self.wf_runtime_fit and concurrency in self.wf_runtime_fit.outliers_removed:
103
+ alerts.outlier_workflow_runtime = True
104
+
105
+ alerts_per_concurrency[concurrency] = alerts
106
+
107
+ return alerts_per_concurrency
108
+
109
+
110
+ class CalcRunner:
111
+ """
112
+ Calculator for GPU sizing based on concurrency vs. time metrics.
113
+ """
114
+
115
+ def __init__(self, config: CalcRunnerConfig):
116
+ """
117
+ Initialize CalcRunner with a config file and a list of concurrencies.
118
+ """
119
+ self.config = config
120
+
121
+ # Sizing metrics per concurrency, collected from the evaluation runs
122
+ # This is used as input to calculate the GPU estimates and alerts
123
+ self.metrics_per_concurrency: dict[int, SizingMetrics] = {}
124
+
125
+ self.valid_concurrencies: list = []
126
+
127
+ # GPU estimates and alerts
128
+ self.gpu_estimates_per_concurrency: dict[int, GPUEstimates] = {}
129
+ self.alerts_per_concurrency: dict[int, CalcAlerts] = {}
130
+
131
+ # Linear fit analyzer for outlier detection and trend analysis
132
+ self.linear_analyzer = LinearFitAnalyzer(self.config.fit_config)
133
+
134
+ # Validate configuration
135
+ self.validate_config()
136
+
137
+ def validate_config(self) -> None:
138
+ """
139
+ Validate the configuration parameters.
140
+ Raises ValueError if configuration is invalid.
141
+ """
142
+ # atleast two concurrencies are needed to estimate the GPU count
143
+ if len(self.config.concurrencies) < 2:
144
+ raise ValueError("Atleast two concurrencies are needed to estimate the GPU count.")
145
+
146
+ # if the same value is repeated in the concurrencies list, raise an error
147
+ if len(self.config.concurrencies) != len(set(self.config.concurrencies)):
148
+ raise ValueError("Concurrencies list contains duplicate values.")
149
+
150
+ # The value of the concurrencies has to be greater than 0
151
+ if any(concurrency <= 0 for concurrency in self.config.concurrencies):
152
+ raise ValueError("Concurrencies list contains values less than or equal to 0.")
153
+
154
+ if self.config.offline_mode:
155
+ # In offline mode target test parameters are needed to estimate the GPU count
156
+ if self.target_llm_latency <= 0 and self.target_wf_runtime <= 0:
157
+ raise ValueError("Both target_llm_latency and target_workflow_runtime are 0. "
158
+ "Cannot estimate the GPU count in offline mode.")
159
+ if self.test_gpu_count <= 0:
160
+ raise ValueError("Test GPU count is 0. Cannot estimate the GPU count in offline mode.")
161
+ if self.target_users <= 0:
162
+ raise ValueError("Target users is 0. Cannot estimate the GPU count in offline mode.")
163
+ if self.append_job:
164
+ raise ValueError("Appending jobs is not supported in offline mode.")
165
+ if not self.config.output_dir:
166
+ raise ValueError("Output directory is required in offline mode.")
167
+ else:
168
+ # Online mode validation
169
+ if not self.config.config_file:
170
+ raise ValueError("Config file is required in online mode.")
171
+ if self.target_llm_latency <= 0 and self.target_wf_runtime <= 0:
172
+ logger.warning("Both target_llm_latency and target_workflow_runtime are 0. "
173
+ "No SLA will be enforced.")
174
+ if self.test_gpu_count <= 0:
175
+ logger.warning("Test GPU count is 0. Tests will be run but the GPU count will not be estimated.")
176
+ if self.target_users <= 0:
177
+ logger.warning("Target users is 0. Tests will be run but the GPU count will not be estimated.")
178
+
179
+ @property
180
+ def target_llm_latency(self) -> float:
181
+ return self.config.target_llm_latency_p95
182
+
183
+ @property
184
+ def target_wf_runtime(self) -> float:
185
+ return self.config.target_workflow_runtime_p95
186
+
187
+ @property
188
+ def target_users(self) -> int:
189
+ return self.config.target_users
190
+
191
+ @property
192
+ def test_gpu_count(self) -> int:
193
+ return self.config.test_gpu_count
194
+
195
+ @property
196
+ def append_job(self) -> bool:
197
+ return self.config.append_job
198
+
199
+ @property
200
+ def output_dir(self) -> Path:
201
+ return self.config.output_dir
202
+
203
+ def _calc_gpu_estimates_based_on_slope(self,
204
+ sizing_metrics_per_concurrency: dict[int, SizingMetrics],
205
+ use_latency: bool,
206
+ use_runtime: bool) -> GPUEstimates:
207
+ """
208
+ Calculate GPU estimates based on the linear fit results
209
+ """
210
+ gpu_estimate_by_wf_runtime = None
211
+ gpu_estimate_by_llm_latency = None
212
+
213
+ if use_runtime and self.linear_analyzer.wf_runtime_fit:
214
+ fit = self.linear_analyzer.wf_runtime_fit
215
+ gpu_estimate_by_wf_runtime = calc_gpu_estimate_based_on_slope(target_time_metric=self.target_wf_runtime,
216
+ target_users=self.target_users,
217
+ test_gpu_count=self.test_gpu_count,
218
+ observed_slope=fit.slope,
219
+ observed_intercept=fit.intercept)
220
+ logger.info(
221
+ "[GPU Estimation %s] Runtime slope=%.4f, intercept=%.4f, R²=%.3f, outliers_removed=%s, estimate=%.2f",
222
+ "offline" if self.config.offline_mode else "online",
223
+ fit.slope,
224
+ fit.intercept,
225
+ fit.r_squared,
226
+ fit.outliers_removed,
227
+ gpu_estimate_by_wf_runtime)
228
+
229
+ if use_latency and self.linear_analyzer.llm_latency_fit:
230
+ fit = self.linear_analyzer.llm_latency_fit
231
+ gpu_estimate_by_llm_latency = calc_gpu_estimate_based_on_slope(target_time_metric=self.target_llm_latency,
232
+ target_users=self.target_users,
233
+ test_gpu_count=self.test_gpu_count,
234
+ observed_slope=fit.slope,
235
+ observed_intercept=fit.intercept)
236
+ logger.info(
237
+ "[GPU Estimation %s] Latency slope=%.4f, intercept=%.4f, R²=%.3f, outliers_removed=%s, estimate=%.2f",
238
+ "offline" if self.config.offline_mode else "online",
239
+ fit.slope,
240
+ fit.intercept,
241
+ fit.r_squared,
242
+ fit.outliers_removed,
243
+ gpu_estimate_by_llm_latency)
244
+
245
+ return GPUEstimates(gpu_estimate_by_wf_runtime=gpu_estimate_by_wf_runtime,
246
+ gpu_estimate_by_llm_latency=gpu_estimate_by_llm_latency)
247
+
248
+ def _calc_gpu_estimates_per_concurrency(self, sizing_metrics_per_concurrency: dict[int, SizingMetrics]):
249
+ """Calculate per-concurrency GPU estimates and existing alerts."""
250
+ use_latency = self.target_llm_latency > 0
251
+ use_runtime = self.target_wf_runtime > 0
252
+
253
+ logger.info("Calculating per-concurrency metrics for %d concurrencies", len(sizing_metrics_per_concurrency))
254
+ logger.info("Target users: %d, Test GPU count: %d", self.target_users, self.test_gpu_count)
255
+ logger.info("Using targets - Latency: %s, Runtime: %s",
256
+ "Yes" if use_latency else "No",
257
+ "Yes" if use_runtime else "No")
258
+
259
+ for concurrency, metrics_per_concurrency in sizing_metrics_per_concurrency.items():
260
+ observed_latency = metrics_per_concurrency.llm_latency_p95
261
+ observed_runtime = metrics_per_concurrency.workflow_runtime_p95
262
+
263
+ # Get ROUGH GPU estimates per concurrency. This is not used for the final GPU estimation.
264
+ # It is only available for information purposes.
265
+ gpu_estimates = calc_gpu_estimate_for_single_concurrency(target_llm_latency=self.target_llm_latency,
266
+ target_workflow_runtime=self.target_wf_runtime,
267
+ target_users=self.target_users,
268
+ test_concurrency=concurrency,
269
+ test_gpu_count=self.test_gpu_count,
270
+ observed_latency=observed_latency,
271
+ observed_runtime=observed_runtime)
272
+
273
+ # Store the GPU estimates directly (no need to reconstruct the same object)
274
+ self.gpu_estimates_per_concurrency[concurrency] = gpu_estimates
275
+
276
+ # Calculate out-of-range items based on per-item metrics (only if targets are specified)
277
+ num_items_greater_than_target_latency = 0
278
+ num_items_greater_than_target_runtime = 0
279
+
280
+ if (use_latency or use_runtime) and metrics_per_concurrency.per_item_metrics:
281
+ for item_metrics in metrics_per_concurrency.per_item_metrics.values():
282
+ if use_latency and item_metrics.llm_latency > self.target_llm_latency:
283
+ num_items_greater_than_target_latency += 1
284
+ if use_runtime and item_metrics.workflow_runtime > self.target_wf_runtime:
285
+ num_items_greater_than_target_runtime += 1
286
+ else:
287
+ logger.debug("Skipping per-item processing for concurrency %d (no targets or no per-item data)",
288
+ concurrency)
289
+
290
+ # Update existing alerts with the out-of-range data
291
+ existing_alerts = self.alerts_per_concurrency.get(concurrency, CalcAlerts())
292
+ existing_alerts.num_items_greater_than_target_latency = num_items_greater_than_target_latency
293
+ existing_alerts.num_items_greater_than_target_runtime = num_items_greater_than_target_runtime
294
+ self.alerts_per_concurrency[concurrency] = existing_alerts
295
+
296
+ logger.debug("Concurrency %d: GPU estimate=%.2f, out-of-range items=%d",
297
+ concurrency,
298
+ gpu_estimates.gpu_estimate_by_wf_runtime,
299
+ num_items_greater_than_target_latency + num_items_greater_than_target_runtime)
300
+
301
+ logger.info("Completed per-concurrency calculations:")
302
+ logger.info(" - GPU estimates calculated for %d concurrencies", len(self.gpu_estimates_per_concurrency))
303
+
304
+ def _validate_gpu_estimation_parameters(self, use_latency: bool, use_runtime: bool) -> bool:
305
+ """Validate parameters required for GPU estimation."""
306
+ if self.target_users <= 0:
307
+ logger.warning("Target users must be greater than 0 for GPU estimation")
308
+ return False
309
+
310
+ if self.test_gpu_count <= 0:
311
+ logger.warning("Test GPU count must be greater than 0 for GPU estimation")
312
+ return False
313
+
314
+ if not use_latency and not use_runtime:
315
+ logger.warning("No targets time metrics specified")
316
+ return False
317
+
318
+ return True
319
+
320
+ def _validate_metrics_data(self, sizing_metrics_per_concurrency: dict) -> dict:
321
+ """Validate and filter metrics data."""
322
+ valid_metrics = {}
323
+ for concurrency, metrics in sizing_metrics_per_concurrency.items():
324
+ if not metrics or not metrics.llm_latency_p95 or not metrics.workflow_runtime_p95:
325
+ logger.warning("Invalid metrics for concurrency %d: missing required fields", concurrency)
326
+ continue
327
+ valid_metrics[concurrency] = metrics
328
+ return valid_metrics
329
+
330
+ def _calc_fit_and_gpu_estimate(self, sizing_metrics_per_concurrency: dict[int, SizingMetrics]) -> GPUEstimates:
331
+ """
332
+ Estimate GPU count to meet target latency and/or workflow runtime SLA
333
+ for a given target user load.
334
+
335
+ Returns:
336
+ - GPU estimates based on the slope of the time vs concurrency
337
+ - GPU estimates per concurrency (rough estimates)
338
+ - Alerts per concurrency (outliers, etc.)
339
+ """
340
+ gpu_estimates = GPUEstimates()
341
+ # Filter out concurrencies that are missing required metrics
342
+ valid_metrics = self._validate_metrics_data(sizing_metrics_per_concurrency)
343
+ if not valid_metrics:
344
+ logger.warning("No valid metrics found for metrics calculation")
345
+ return gpu_estimates
346
+
347
+ # Filter out concurrencies that were interrupted
348
+ valid_runs = {
349
+ concurrency: metrics
350
+ for concurrency, metrics in valid_metrics.items() if not metrics.alerts.workflow_interrupted
351
+ }
352
+ if not valid_runs:
353
+ logger.warning("No valid runs found for slope-based estimation")
354
+ return gpu_estimates
355
+
356
+ self.valid_concurrencies = valid_runs.keys()
357
+
358
+ # Perform linear analysis on valid runs, this is done even if GPU estimation is skipped
359
+ self.alerts_per_concurrency = self.linear_analyzer.analyze_metrics(valid_runs)
360
+
361
+ # Validate GPU estimation parameters
362
+ use_latency = self.target_llm_latency > 0
363
+ use_runtime = self.target_wf_runtime > 0
364
+ if not self._validate_gpu_estimation_parameters(use_latency, use_runtime):
365
+ return gpu_estimates
366
+
367
+ logger.info("Starting GPU estimation with %d concurrencies", len(valid_metrics))
368
+ logger.info("Target users: %d, Test GPU count: %d", self.target_users, self.test_gpu_count)
369
+ logger.info("Target latency: %.3fs, Target runtime: %.3fs",
370
+ self.target_llm_latency if self.target_llm_latency > 0 else 0,
371
+ self.target_wf_runtime if self.target_wf_runtime > 0 else 0)
372
+
373
+ # Calculate GPU estimates per-concurrency
374
+ self._calc_gpu_estimates_per_concurrency(valid_runs)
375
+
376
+ # Calculate overall gpu estimates using linear fits
377
+ gpu_estimates = self._calc_gpu_estimates_based_on_slope(valid_runs, use_latency, use_runtime)
378
+
379
+ return gpu_estimates
380
+
381
+ def generate_calc_runner_output(self) -> CalcRunnerOutput:
382
+ """
383
+ Build CalcRunnerOutput from sizing metrics per concurrency.
384
+ """
385
+ if not self.metrics_per_concurrency:
386
+ logger.warning("No metrics per concurrency found. Skipping generation of CalcRunnerOutput.")
387
+ return CalcRunnerOutput()
388
+
389
+ logger.info("Building CalcRunnerOutput from %d concurrency metrics", len(self.metrics_per_concurrency))
390
+
391
+ # Calculate gpu estimates and per-concurrency metrics
392
+ gpu_estimates = self._calc_fit_and_gpu_estimate(self.metrics_per_concurrency)
393
+
394
+ # Group per-concurrency data (inputs to the calculator and outputs from the calculator)
395
+ calc_data = {}
396
+ for concurrency in self.metrics_per_concurrency.keys():
397
+ # Inputs to the calculator
398
+ tmp_sizing_metrics = self.metrics_per_concurrency[concurrency]
399
+ # Outputs from the calculator
400
+ tmp_gpu_estimates = self.gpu_estimates_per_concurrency.get(concurrency, GPUEstimates())
401
+ tmp_alerts = self.alerts_per_concurrency.get(concurrency, CalcAlerts())
402
+
403
+ calc_data[concurrency] = CalcData(gpu_estimates=tmp_gpu_estimates,
404
+ alerts=tmp_alerts,
405
+ sizing_metrics=tmp_sizing_metrics)
406
+
407
+ if gpu_estimates.gpu_estimate_by_wf_runtime is not None:
408
+ logger.info("GPU estimate by workflow runtime: %.2f", gpu_estimates.gpu_estimate_by_wf_runtime)
409
+ if gpu_estimates.gpu_estimate_by_llm_latency is not None:
410
+ logger.info("GPU estimate by LLM latency: %.2f", gpu_estimates.gpu_estimate_by_llm_latency)
411
+
412
+ return CalcRunnerOutput(gpu_estimates=gpu_estimates,
413
+ calc_data=calc_data,
414
+ fit_results=FitResults(llm_latency_fit=self.linear_analyzer.llm_latency_fit,
415
+ wf_runtime_fit=self.linear_analyzer.wf_runtime_fit))
416
+
417
+ def plot_concurrency_vs_time_metrics(self, output_dir: Path):
418
+ """Plots concurrency vs. time metrics using pre-computed fits."""
419
+ from aiq.profiler.calc.plot import plot_concurrency_vs_time_metrics as plot_metrics
420
+
421
+ # Only plot if we have valid metrics and at least one fit
422
+ if not self.metrics_per_concurrency:
423
+ logger.warning("No metrics available for plotting")
424
+ return
425
+
426
+ # Filter to only valid runs for plotting
427
+ valid_runs = {
428
+ concurrency: metrics
429
+ for concurrency, metrics in self.metrics_per_concurrency.items() if concurrency in self.valid_concurrencies
430
+ }
431
+
432
+ if not valid_runs:
433
+ logger.warning("No valid runs available for plotting")
434
+ return
435
+ try:
436
+ plot_metrics(
437
+ metrics_per_concurrency=valid_runs, # Only valid runs
438
+ output_dir=output_dir,
439
+ target_llm_latency=self.target_llm_latency,
440
+ target_runtime=self.target_wf_runtime,
441
+ llm_latency_fit=self.linear_analyzer.llm_latency_fit, # May be None
442
+ runtime_fit=self.linear_analyzer.wf_runtime_fit # May be None
443
+ )
444
+ except Exception as e:
445
+ logger.exception("Failed to plot concurrency vs. time metrics: %s", e, exc_info=True)
446
+ logger.warning("Skipping plot of concurrency vs. time metrics")
447
+
448
+ def write_output(self, output_dir: Path, calc_runner_output: CalcRunnerOutput):
449
+ """
450
+ Write the output to the output directory.
451
+ """
452
+ if not output_dir:
453
+ logger.warning("Output directory is not set. Skipping write.")
454
+ return
455
+
456
+ mode = "offline" if self.config.offline_mode else "online"
457
+ subdir = output_dir / mode
458
+
459
+ if self.append_job:
460
+ job_dir = subdir / f"job_{uuid.uuid4()}"
461
+ else:
462
+ # Clear all previous jobs when not in append mode
463
+ existing_jobs = list(subdir.glob("job_*"))
464
+ if existing_jobs:
465
+ logger.info(f"Clearing {len(existing_jobs)} existing jobs")
466
+ for job in existing_jobs:
467
+ if job.is_dir():
468
+ shutil.rmtree(job)
469
+ # Use timestamp-based naming
470
+ job_dir = subdir / f"job_{int(time.time())}"
471
+
472
+ job_dir.mkdir(parents=True, exist_ok=True)
473
+
474
+ if self.config.plot_data:
475
+ self.plot_concurrency_vs_time_metrics(job_dir)
476
+
477
+ output_path = job_dir / "calc_runner_output.json"
478
+ output_path.write_text(calc_runner_output.model_dump_json(indent=2))
479
+ logger.info("Wrote output to %s", job_dir)
480
+
481
+ def run_offline(self) -> CalcRunnerOutput:
482
+ """
483
+ Run in offline mode.
484
+ 1. Read previous jobs in online mode and create sizing metrics per concurrency
485
+ 2. Calculate GPU estimates
486
+ 3. Write the output to the offline subdirectory
487
+ """
488
+ # Read all jobs in online mode and only append unique concurrency values to metrics_per_concurrency
489
+ online_dir = Path(self.config.output_dir) / "online"
490
+ if not online_dir.exists():
491
+ logger.warning("Online directory %s does not exist. Skipping offline mode.", online_dir)
492
+ return CalcRunnerOutput()
493
+
494
+ # Get all job directories and sort by creation time (most recent first)
495
+ job_dirs = [job_dir for job_dir in online_dir.iterdir() if job_dir.is_dir() and job_dir.name.startswith("job_")]
496
+ job_dirs.sort(key=lambda x: x.stat().st_mtime, reverse=True)
497
+
498
+ logger.info("Found %d job directories, processing from most recent to oldest", len(job_dirs))
499
+
500
+ for job_dir in job_dirs:
501
+ calc_runner_output_path = job_dir / "calc_runner_output.json"
502
+ if not calc_runner_output_path.exists():
503
+ logger.warning("Calc runner output file %s does not exist. Skipping job %s.",
504
+ calc_runner_output_path,
505
+ job_dir.name)
506
+ continue
507
+ try:
508
+ calc_output = CalcRunnerOutput.model_validate_json(calc_runner_output_path.read_text())
509
+ except ValidationError as e:
510
+ logger.exception("Failed to validate calc runner output file %s. Skipping job %s.",
511
+ calc_runner_output_path,
512
+ e,
513
+ exc_info=True)
514
+ continue
515
+
516
+ # Extract sizing metrics from calc_data
517
+ for concurrency, data in calc_output.calc_data.items():
518
+ metrics = data.sizing_metrics
519
+ if concurrency not in self.metrics_per_concurrency:
520
+ logger.info("Adding concurrency %s from job %s (most recent available).", concurrency, job_dir.name)
521
+ logger.info("Sizing metrics: %s", metrics)
522
+ self.metrics_per_concurrency[concurrency] = metrics
523
+ else:
524
+ # Skip since we already have this concurrency from a more recent job
525
+ logger.debug("Concurrency %s already exists from a more recent job. Skipping job %s.",
526
+ concurrency,
527
+ job_dir.name)
528
+
529
+ # calculate gpu estimates
530
+ calc_runner_output = self.generate_calc_runner_output()
531
+
532
+ # write the offline output
533
+ self.write_output(self.config.output_dir, calc_runner_output)
534
+
535
+ return calc_runner_output
536
+
537
+ async def run_online(self) -> CalcRunnerOutput:
538
+ """
539
+ Create a MultiEvaluationRunner with concurrency overrides.
540
+ Run in online mode.
541
+ 1. Run the workflow
542
+ 2. Create sizing metrics per concurrency from the profiler results and usage stats
543
+ 3. Calculate GPU estimates
544
+ 4. Write the output to the online subdirectory
545
+ """
546
+ # Override the concurrency and alias keys in the config
547
+ concurrency_key = "eval.general.max_concurrency"
548
+ alias_key = "eval.general.workflow_alias"
549
+ # Ensure profiler base metrics are enabled via overrides
550
+ profiler_base_metrics_key = "eval.general.profiler.base_metrics"
551
+
552
+ # setup the base config
553
+ eval_run_config = EvaluationRunConfig(config_file=self.config.config_file,
554
+ adjust_dataset_size=True,
555
+ num_passes=self.config.num_passes,
556
+ endpoint=self.config.endpoint,
557
+ endpoint_timeout=self.config.endpoint_timeout)
558
+
559
+ # Create a copy of the base config and apply the overrides for each concurrency
560
+ configs = {}
561
+ for concurrency in self.config.concurrencies:
562
+ config = copy.deepcopy(eval_run_config)
563
+ override = ((concurrency_key, str(concurrency)), (alias_key, "wf_concurrency_" + str(concurrency)),
564
+ (profiler_base_metrics_key, "true"))
565
+ config.override = override
566
+ configs[concurrency] = config
567
+
568
+ # Instantiate the multi-evaluation run config with the overrides for each concurrency
569
+ config = MultiEvaluationRunConfig(configs=configs)
570
+
571
+ # Instantiate and run multi-evaluation runner
572
+ runner = MultiEvaluationRunner(config)
573
+ evaluation_run_outputs = await runner.run_all()
574
+ if not evaluation_run_outputs:
575
+ logger.warning("No evaluation run outputs found. Skipping online mode.")
576
+ return CalcRunnerOutput()
577
+
578
+ # Calculate sizing metrics per concurrency
579
+ # if the workflow was interrupted, the metrics are not eligible for slope-based GPU estimation
580
+ for concurrency, eval_output in evaluation_run_outputs.items():
581
+ profiler_results = eval_output.profiler_results
582
+ usage_stats = eval_output.usage_stats
583
+ workflow_interrupted = eval_output.workflow_interrupted
584
+
585
+ per_item_metrics = {
586
+ item_id:
587
+ SizingMetricPerItem(llm_latency=item_metrics.llm_latency, workflow_runtime=item_metrics.runtime)
588
+ for item_id, item_metrics in eval_output.usage_stats.usage_stats_items.items()
589
+ }
590
+
591
+ # if the workflow was interrupted, the metrics are not eligible for slope-based GPU estimation
592
+ llm_latency_p95 = profiler_results.llm_latency_ci.p95 \
593
+ if profiler_results.llm_latency_ci else 0
594
+ workflow_runtime_p95 = profiler_results.workflow_runtime_metrics.p95 \
595
+ if profiler_results.workflow_runtime_metrics else 0
596
+ self.metrics_per_concurrency[concurrency] = SizingMetrics(
597
+ llm_latency_p95=llm_latency_p95,
598
+ workflow_runtime_p95=workflow_runtime_p95,
599
+ total_runtime=usage_stats.total_runtime,
600
+ per_item_metrics=per_item_metrics,
601
+ alerts=SizingMetricsAlerts(workflow_interrupted=workflow_interrupted))
602
+
603
+ # calculate gpu estimates
604
+ calc_runner_output = self.generate_calc_runner_output()
605
+
606
+ # plot the metrics and write the output
607
+ self.write_output(self.config.output_dir, calc_runner_output)
608
+
609
+ return calc_runner_output
610
+
611
+ async def run(self) -> CalcRunnerOutput:
612
+ """
613
+ online mode:
614
+ 1. Run the workflow
615
+ 2. Collect profiler results and usage stats
616
+ 3. Calculate GPU estimates
617
+ 4. Write the output to the online subdirectory
618
+
619
+ offline mode:
620
+ 1. Read previous jobs in online mode and only append unique concurrency values to metrics_per_concurrency
621
+ 2. Calculate GPU estimates
622
+ 3. Write the output to the offline subdirectory
623
+ """
624
+ if self.config.offline_mode:
625
+ return self.run_offline()
626
+ else:
627
+ return await self.run_online()