aiqtoolkit 1.2.0rc4__py3-none-any.whl → 1.2rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (441) hide show
  1. aiqtoolkit-1.2rc9.dist-info/METADATA +29 -0
  2. aiqtoolkit-1.2rc9.dist-info/RECORD +4 -0
  3. aiqtoolkit-1.2rc9.dist-info/top_level.txt +1 -0
  4. aiq/agent/__init__.py +0 -0
  5. aiq/agent/base.py +0 -239
  6. aiq/agent/dual_node.py +0 -67
  7. aiq/agent/react_agent/__init__.py +0 -0
  8. aiq/agent/react_agent/agent.py +0 -355
  9. aiq/agent/react_agent/output_parser.py +0 -104
  10. aiq/agent/react_agent/prompt.py +0 -41
  11. aiq/agent/react_agent/register.py +0 -149
  12. aiq/agent/reasoning_agent/__init__.py +0 -0
  13. aiq/agent/reasoning_agent/reasoning_agent.py +0 -225
  14. aiq/agent/register.py +0 -23
  15. aiq/agent/rewoo_agent/__init__.py +0 -0
  16. aiq/agent/rewoo_agent/agent.py +0 -411
  17. aiq/agent/rewoo_agent/prompt.py +0 -108
  18. aiq/agent/rewoo_agent/register.py +0 -158
  19. aiq/agent/tool_calling_agent/__init__.py +0 -0
  20. aiq/agent/tool_calling_agent/agent.py +0 -119
  21. aiq/agent/tool_calling_agent/register.py +0 -106
  22. aiq/authentication/__init__.py +0 -14
  23. aiq/authentication/api_key/__init__.py +0 -14
  24. aiq/authentication/api_key/api_key_auth_provider.py +0 -96
  25. aiq/authentication/api_key/api_key_auth_provider_config.py +0 -124
  26. aiq/authentication/api_key/register.py +0 -26
  27. aiq/authentication/exceptions/__init__.py +0 -14
  28. aiq/authentication/exceptions/api_key_exceptions.py +0 -38
  29. aiq/authentication/exceptions/auth_code_grant_exceptions.py +0 -86
  30. aiq/authentication/exceptions/call_back_exceptions.py +0 -38
  31. aiq/authentication/exceptions/request_exceptions.py +0 -54
  32. aiq/authentication/http_basic_auth/__init__.py +0 -0
  33. aiq/authentication/http_basic_auth/http_basic_auth_provider.py +0 -81
  34. aiq/authentication/http_basic_auth/register.py +0 -30
  35. aiq/authentication/interfaces.py +0 -93
  36. aiq/authentication/oauth2/__init__.py +0 -14
  37. aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +0 -107
  38. aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +0 -39
  39. aiq/authentication/oauth2/register.py +0 -25
  40. aiq/authentication/register.py +0 -21
  41. aiq/builder/__init__.py +0 -0
  42. aiq/builder/builder.py +0 -285
  43. aiq/builder/component_utils.py +0 -316
  44. aiq/builder/context.py +0 -264
  45. aiq/builder/embedder.py +0 -24
  46. aiq/builder/eval_builder.py +0 -161
  47. aiq/builder/evaluator.py +0 -29
  48. aiq/builder/framework_enum.py +0 -24
  49. aiq/builder/front_end.py +0 -73
  50. aiq/builder/function.py +0 -344
  51. aiq/builder/function_base.py +0 -380
  52. aiq/builder/function_info.py +0 -627
  53. aiq/builder/intermediate_step_manager.py +0 -174
  54. aiq/builder/llm.py +0 -25
  55. aiq/builder/retriever.py +0 -25
  56. aiq/builder/user_interaction_manager.py +0 -74
  57. aiq/builder/workflow.py +0 -148
  58. aiq/builder/workflow_builder.py +0 -1117
  59. aiq/cli/__init__.py +0 -14
  60. aiq/cli/cli_utils/__init__.py +0 -0
  61. aiq/cli/cli_utils/config_override.py +0 -231
  62. aiq/cli/cli_utils/validation.py +0 -37
  63. aiq/cli/commands/__init__.py +0 -0
  64. aiq/cli/commands/configure/__init__.py +0 -0
  65. aiq/cli/commands/configure/channel/__init__.py +0 -0
  66. aiq/cli/commands/configure/channel/add.py +0 -28
  67. aiq/cli/commands/configure/channel/channel.py +0 -36
  68. aiq/cli/commands/configure/channel/remove.py +0 -30
  69. aiq/cli/commands/configure/channel/update.py +0 -30
  70. aiq/cli/commands/configure/configure.py +0 -33
  71. aiq/cli/commands/evaluate.py +0 -139
  72. aiq/cli/commands/info/__init__.py +0 -14
  73. aiq/cli/commands/info/info.py +0 -39
  74. aiq/cli/commands/info/list_channels.py +0 -32
  75. aiq/cli/commands/info/list_components.py +0 -129
  76. aiq/cli/commands/info/list_mcp.py +0 -213
  77. aiq/cli/commands/registry/__init__.py +0 -14
  78. aiq/cli/commands/registry/publish.py +0 -88
  79. aiq/cli/commands/registry/pull.py +0 -118
  80. aiq/cli/commands/registry/registry.py +0 -38
  81. aiq/cli/commands/registry/remove.py +0 -108
  82. aiq/cli/commands/registry/search.py +0 -155
  83. aiq/cli/commands/sizing/__init__.py +0 -14
  84. aiq/cli/commands/sizing/calc.py +0 -297
  85. aiq/cli/commands/sizing/sizing.py +0 -27
  86. aiq/cli/commands/start.py +0 -246
  87. aiq/cli/commands/uninstall.py +0 -81
  88. aiq/cli/commands/validate.py +0 -47
  89. aiq/cli/commands/workflow/__init__.py +0 -14
  90. aiq/cli/commands/workflow/templates/__init__.py.j2 +0 -0
  91. aiq/cli/commands/workflow/templates/config.yml.j2 +0 -16
  92. aiq/cli/commands/workflow/templates/pyproject.toml.j2 +0 -22
  93. aiq/cli/commands/workflow/templates/register.py.j2 +0 -5
  94. aiq/cli/commands/workflow/templates/workflow.py.j2 +0 -36
  95. aiq/cli/commands/workflow/workflow.py +0 -37
  96. aiq/cli/commands/workflow/workflow_commands.py +0 -313
  97. aiq/cli/entrypoint.py +0 -135
  98. aiq/cli/main.py +0 -44
  99. aiq/cli/register_workflow.py +0 -488
  100. aiq/cli/type_registry.py +0 -1000
  101. aiq/data_models/__init__.py +0 -14
  102. aiq/data_models/api_server.py +0 -694
  103. aiq/data_models/authentication.py +0 -231
  104. aiq/data_models/common.py +0 -171
  105. aiq/data_models/component.py +0 -54
  106. aiq/data_models/component_ref.py +0 -168
  107. aiq/data_models/config.py +0 -406
  108. aiq/data_models/dataset_handler.py +0 -123
  109. aiq/data_models/discovery_metadata.py +0 -335
  110. aiq/data_models/embedder.py +0 -27
  111. aiq/data_models/evaluate.py +0 -127
  112. aiq/data_models/evaluator.py +0 -26
  113. aiq/data_models/front_end.py +0 -26
  114. aiq/data_models/function.py +0 -30
  115. aiq/data_models/function_dependencies.py +0 -72
  116. aiq/data_models/interactive.py +0 -246
  117. aiq/data_models/intermediate_step.py +0 -302
  118. aiq/data_models/invocation_node.py +0 -38
  119. aiq/data_models/its_strategy.py +0 -30
  120. aiq/data_models/llm.py +0 -27
  121. aiq/data_models/logging.py +0 -26
  122. aiq/data_models/memory.py +0 -27
  123. aiq/data_models/object_store.py +0 -44
  124. aiq/data_models/profiler.py +0 -54
  125. aiq/data_models/registry_handler.py +0 -26
  126. aiq/data_models/retriever.py +0 -30
  127. aiq/data_models/retry_mixin.py +0 -35
  128. aiq/data_models/span.py +0 -187
  129. aiq/data_models/step_adaptor.py +0 -64
  130. aiq/data_models/streaming.py +0 -33
  131. aiq/data_models/swe_bench_model.py +0 -54
  132. aiq/data_models/telemetry_exporter.py +0 -26
  133. aiq/embedder/__init__.py +0 -0
  134. aiq/embedder/langchain_client.py +0 -41
  135. aiq/embedder/nim_embedder.py +0 -59
  136. aiq/embedder/openai_embedder.py +0 -43
  137. aiq/embedder/register.py +0 -24
  138. aiq/eval/__init__.py +0 -14
  139. aiq/eval/config.py +0 -60
  140. aiq/eval/dataset_handler/__init__.py +0 -0
  141. aiq/eval/dataset_handler/dataset_downloader.py +0 -106
  142. aiq/eval/dataset_handler/dataset_filter.py +0 -52
  143. aiq/eval/dataset_handler/dataset_handler.py +0 -254
  144. aiq/eval/evaluate.py +0 -506
  145. aiq/eval/evaluator/__init__.py +0 -14
  146. aiq/eval/evaluator/base_evaluator.py +0 -73
  147. aiq/eval/evaluator/evaluator_model.py +0 -45
  148. aiq/eval/intermediate_step_adapter.py +0 -99
  149. aiq/eval/rag_evaluator/__init__.py +0 -0
  150. aiq/eval/rag_evaluator/evaluate.py +0 -178
  151. aiq/eval/rag_evaluator/register.py +0 -143
  152. aiq/eval/register.py +0 -23
  153. aiq/eval/remote_workflow.py +0 -133
  154. aiq/eval/runners/__init__.py +0 -14
  155. aiq/eval/runners/config.py +0 -39
  156. aiq/eval/runners/multi_eval_runner.py +0 -54
  157. aiq/eval/runtime_event_subscriber.py +0 -52
  158. aiq/eval/swe_bench_evaluator/__init__.py +0 -0
  159. aiq/eval/swe_bench_evaluator/evaluate.py +0 -215
  160. aiq/eval/swe_bench_evaluator/register.py +0 -36
  161. aiq/eval/trajectory_evaluator/__init__.py +0 -0
  162. aiq/eval/trajectory_evaluator/evaluate.py +0 -75
  163. aiq/eval/trajectory_evaluator/register.py +0 -40
  164. aiq/eval/tunable_rag_evaluator/__init__.py +0 -0
  165. aiq/eval/tunable_rag_evaluator/evaluate.py +0 -245
  166. aiq/eval/tunable_rag_evaluator/register.py +0 -52
  167. aiq/eval/usage_stats.py +0 -41
  168. aiq/eval/utils/__init__.py +0 -0
  169. aiq/eval/utils/output_uploader.py +0 -140
  170. aiq/eval/utils/tqdm_position_registry.py +0 -40
  171. aiq/eval/utils/weave_eval.py +0 -184
  172. aiq/experimental/__init__.py +0 -0
  173. aiq/experimental/decorators/__init__.py +0 -0
  174. aiq/experimental/decorators/experimental_warning_decorator.py +0 -130
  175. aiq/experimental/inference_time_scaling/__init__.py +0 -0
  176. aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
  177. aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +0 -147
  178. aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +0 -204
  179. aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +0 -107
  180. aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
  181. aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +0 -105
  182. aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +0 -205
  183. aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +0 -146
  184. aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +0 -224
  185. aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
  186. aiq/experimental/inference_time_scaling/models/editor_config.py +0 -132
  187. aiq/experimental/inference_time_scaling/models/its_item.py +0 -48
  188. aiq/experimental/inference_time_scaling/models/scoring_config.py +0 -112
  189. aiq/experimental/inference_time_scaling/models/search_config.py +0 -120
  190. aiq/experimental/inference_time_scaling/models/selection_config.py +0 -154
  191. aiq/experimental/inference_time_scaling/models/stage_enums.py +0 -43
  192. aiq/experimental/inference_time_scaling/models/strategy_base.py +0 -66
  193. aiq/experimental/inference_time_scaling/models/tool_use_config.py +0 -41
  194. aiq/experimental/inference_time_scaling/register.py +0 -36
  195. aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
  196. aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +0 -168
  197. aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +0 -168
  198. aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +0 -111
  199. aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
  200. aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +0 -128
  201. aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +0 -122
  202. aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +0 -128
  203. aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
  204. aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +0 -63
  205. aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +0 -131
  206. aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +0 -159
  207. aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +0 -128
  208. aiq/experimental/inference_time_scaling/selection/threshold_selector.py +0 -58
  209. aiq/front_ends/__init__.py +0 -14
  210. aiq/front_ends/console/__init__.py +0 -14
  211. aiq/front_ends/console/authentication_flow_handler.py +0 -233
  212. aiq/front_ends/console/console_front_end_config.py +0 -32
  213. aiq/front_ends/console/console_front_end_plugin.py +0 -96
  214. aiq/front_ends/console/register.py +0 -25
  215. aiq/front_ends/cron/__init__.py +0 -14
  216. aiq/front_ends/fastapi/__init__.py +0 -14
  217. aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
  218. aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +0 -27
  219. aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +0 -107
  220. aiq/front_ends/fastapi/fastapi_front_end_config.py +0 -234
  221. aiq/front_ends/fastapi/fastapi_front_end_controller.py +0 -68
  222. aiq/front_ends/fastapi/fastapi_front_end_plugin.py +0 -116
  223. aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +0 -1092
  224. aiq/front_ends/fastapi/html_snippets/__init__.py +0 -14
  225. aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +0 -35
  226. aiq/front_ends/fastapi/intermediate_steps_subscriber.py +0 -80
  227. aiq/front_ends/fastapi/job_store.py +0 -183
  228. aiq/front_ends/fastapi/main.py +0 -72
  229. aiq/front_ends/fastapi/message_handler.py +0 -298
  230. aiq/front_ends/fastapi/message_validator.py +0 -345
  231. aiq/front_ends/fastapi/register.py +0 -25
  232. aiq/front_ends/fastapi/response_helpers.py +0 -195
  233. aiq/front_ends/fastapi/step_adaptor.py +0 -321
  234. aiq/front_ends/mcp/__init__.py +0 -14
  235. aiq/front_ends/mcp/mcp_front_end_config.py +0 -32
  236. aiq/front_ends/mcp/mcp_front_end_plugin.py +0 -93
  237. aiq/front_ends/mcp/register.py +0 -27
  238. aiq/front_ends/mcp/tool_converter.py +0 -242
  239. aiq/front_ends/register.py +0 -22
  240. aiq/front_ends/simple_base/__init__.py +0 -14
  241. aiq/front_ends/simple_base/simple_front_end_plugin_base.py +0 -54
  242. aiq/llm/__init__.py +0 -0
  243. aiq/llm/aws_bedrock_llm.py +0 -57
  244. aiq/llm/nim_llm.py +0 -46
  245. aiq/llm/openai_llm.py +0 -46
  246. aiq/llm/register.py +0 -23
  247. aiq/llm/utils/__init__.py +0 -14
  248. aiq/llm/utils/env_config_value.py +0 -94
  249. aiq/llm/utils/error.py +0 -17
  250. aiq/memory/__init__.py +0 -20
  251. aiq/memory/interfaces.py +0 -183
  252. aiq/memory/models.py +0 -112
  253. aiq/meta/module_to_distro.json +0 -3
  254. aiq/meta/pypi.md +0 -58
  255. aiq/object_store/__init__.py +0 -20
  256. aiq/object_store/in_memory_object_store.py +0 -76
  257. aiq/object_store/interfaces.py +0 -84
  258. aiq/object_store/models.py +0 -36
  259. aiq/object_store/register.py +0 -20
  260. aiq/observability/__init__.py +0 -14
  261. aiq/observability/exporter/__init__.py +0 -14
  262. aiq/observability/exporter/base_exporter.py +0 -449
  263. aiq/observability/exporter/exporter.py +0 -78
  264. aiq/observability/exporter/file_exporter.py +0 -33
  265. aiq/observability/exporter/processing_exporter.py +0 -322
  266. aiq/observability/exporter/raw_exporter.py +0 -52
  267. aiq/observability/exporter/span_exporter.py +0 -265
  268. aiq/observability/exporter_manager.py +0 -335
  269. aiq/observability/mixin/__init__.py +0 -14
  270. aiq/observability/mixin/batch_config_mixin.py +0 -26
  271. aiq/observability/mixin/collector_config_mixin.py +0 -23
  272. aiq/observability/mixin/file_mixin.py +0 -288
  273. aiq/observability/mixin/file_mode.py +0 -23
  274. aiq/observability/mixin/resource_conflict_mixin.py +0 -134
  275. aiq/observability/mixin/serialize_mixin.py +0 -61
  276. aiq/observability/mixin/type_introspection_mixin.py +0 -183
  277. aiq/observability/processor/__init__.py +0 -14
  278. aiq/observability/processor/batching_processor.py +0 -309
  279. aiq/observability/processor/callback_processor.py +0 -42
  280. aiq/observability/processor/intermediate_step_serializer.py +0 -28
  281. aiq/observability/processor/processor.py +0 -71
  282. aiq/observability/register.py +0 -96
  283. aiq/observability/utils/__init__.py +0 -14
  284. aiq/observability/utils/dict_utils.py +0 -236
  285. aiq/observability/utils/time_utils.py +0 -31
  286. aiq/plugins/.namespace +0 -1
  287. aiq/profiler/__init__.py +0 -0
  288. aiq/profiler/calc/__init__.py +0 -14
  289. aiq/profiler/calc/calc_runner.py +0 -627
  290. aiq/profiler/calc/calculations.py +0 -288
  291. aiq/profiler/calc/data_models.py +0 -188
  292. aiq/profiler/calc/plot.py +0 -345
  293. aiq/profiler/callbacks/__init__.py +0 -0
  294. aiq/profiler/callbacks/agno_callback_handler.py +0 -295
  295. aiq/profiler/callbacks/base_callback_class.py +0 -20
  296. aiq/profiler/callbacks/langchain_callback_handler.py +0 -290
  297. aiq/profiler/callbacks/llama_index_callback_handler.py +0 -205
  298. aiq/profiler/callbacks/semantic_kernel_callback_handler.py +0 -238
  299. aiq/profiler/callbacks/token_usage_base_model.py +0 -27
  300. aiq/profiler/data_frame_row.py +0 -51
  301. aiq/profiler/data_models.py +0 -24
  302. aiq/profiler/decorators/__init__.py +0 -0
  303. aiq/profiler/decorators/framework_wrapper.py +0 -131
  304. aiq/profiler/decorators/function_tracking.py +0 -254
  305. aiq/profiler/forecasting/__init__.py +0 -0
  306. aiq/profiler/forecasting/config.py +0 -18
  307. aiq/profiler/forecasting/model_trainer.py +0 -75
  308. aiq/profiler/forecasting/models/__init__.py +0 -22
  309. aiq/profiler/forecasting/models/forecasting_base_model.py +0 -40
  310. aiq/profiler/forecasting/models/linear_model.py +0 -196
  311. aiq/profiler/forecasting/models/random_forest_regressor.py +0 -268
  312. aiq/profiler/inference_metrics_model.py +0 -28
  313. aiq/profiler/inference_optimization/__init__.py +0 -0
  314. aiq/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
  315. aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +0 -460
  316. aiq/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +0 -258
  317. aiq/profiler/inference_optimization/data_models.py +0 -386
  318. aiq/profiler/inference_optimization/experimental/__init__.py +0 -0
  319. aiq/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +0 -468
  320. aiq/profiler/inference_optimization/experimental/prefix_span_analysis.py +0 -405
  321. aiq/profiler/inference_optimization/llm_metrics.py +0 -212
  322. aiq/profiler/inference_optimization/prompt_caching.py +0 -163
  323. aiq/profiler/inference_optimization/token_uniqueness.py +0 -107
  324. aiq/profiler/inference_optimization/workflow_runtimes.py +0 -72
  325. aiq/profiler/intermediate_property_adapter.py +0 -102
  326. aiq/profiler/profile_runner.py +0 -473
  327. aiq/profiler/utils.py +0 -184
  328. aiq/registry_handlers/__init__.py +0 -0
  329. aiq/registry_handlers/local/__init__.py +0 -0
  330. aiq/registry_handlers/local/local_handler.py +0 -176
  331. aiq/registry_handlers/local/register_local.py +0 -37
  332. aiq/registry_handlers/metadata_factory.py +0 -60
  333. aiq/registry_handlers/package_utils.py +0 -567
  334. aiq/registry_handlers/pypi/__init__.py +0 -0
  335. aiq/registry_handlers/pypi/pypi_handler.py +0 -251
  336. aiq/registry_handlers/pypi/register_pypi.py +0 -40
  337. aiq/registry_handlers/register.py +0 -21
  338. aiq/registry_handlers/registry_handler_base.py +0 -157
  339. aiq/registry_handlers/rest/__init__.py +0 -0
  340. aiq/registry_handlers/rest/register_rest.py +0 -56
  341. aiq/registry_handlers/rest/rest_handler.py +0 -237
  342. aiq/registry_handlers/schemas/__init__.py +0 -0
  343. aiq/registry_handlers/schemas/headers.py +0 -42
  344. aiq/registry_handlers/schemas/package.py +0 -68
  345. aiq/registry_handlers/schemas/publish.py +0 -63
  346. aiq/registry_handlers/schemas/pull.py +0 -82
  347. aiq/registry_handlers/schemas/remove.py +0 -36
  348. aiq/registry_handlers/schemas/search.py +0 -91
  349. aiq/registry_handlers/schemas/status.py +0 -47
  350. aiq/retriever/__init__.py +0 -0
  351. aiq/retriever/interface.py +0 -37
  352. aiq/retriever/milvus/__init__.py +0 -14
  353. aiq/retriever/milvus/register.py +0 -81
  354. aiq/retriever/milvus/retriever.py +0 -228
  355. aiq/retriever/models.py +0 -74
  356. aiq/retriever/nemo_retriever/__init__.py +0 -14
  357. aiq/retriever/nemo_retriever/register.py +0 -60
  358. aiq/retriever/nemo_retriever/retriever.py +0 -190
  359. aiq/retriever/register.py +0 -22
  360. aiq/runtime/__init__.py +0 -14
  361. aiq/runtime/loader.py +0 -215
  362. aiq/runtime/runner.py +0 -190
  363. aiq/runtime/session.py +0 -158
  364. aiq/runtime/user_metadata.py +0 -130
  365. aiq/settings/__init__.py +0 -0
  366. aiq/settings/global_settings.py +0 -318
  367. aiq/test/.namespace +0 -1
  368. aiq/tool/__init__.py +0 -0
  369. aiq/tool/chat_completion.py +0 -74
  370. aiq/tool/code_execution/README.md +0 -151
  371. aiq/tool/code_execution/__init__.py +0 -0
  372. aiq/tool/code_execution/code_sandbox.py +0 -267
  373. aiq/tool/code_execution/local_sandbox/.gitignore +0 -1
  374. aiq/tool/code_execution/local_sandbox/Dockerfile.sandbox +0 -60
  375. aiq/tool/code_execution/local_sandbox/__init__.py +0 -13
  376. aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +0 -198
  377. aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +0 -6
  378. aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +0 -50
  379. aiq/tool/code_execution/register.py +0 -74
  380. aiq/tool/code_execution/test_code_execution_sandbox.py +0 -414
  381. aiq/tool/code_execution/utils.py +0 -100
  382. aiq/tool/datetime_tools.py +0 -42
  383. aiq/tool/document_search.py +0 -141
  384. aiq/tool/github_tools/__init__.py +0 -0
  385. aiq/tool/github_tools/create_github_commit.py +0 -133
  386. aiq/tool/github_tools/create_github_issue.py +0 -87
  387. aiq/tool/github_tools/create_github_pr.py +0 -106
  388. aiq/tool/github_tools/get_github_file.py +0 -106
  389. aiq/tool/github_tools/get_github_issue.py +0 -166
  390. aiq/tool/github_tools/get_github_pr.py +0 -256
  391. aiq/tool/github_tools/update_github_issue.py +0 -100
  392. aiq/tool/mcp/__init__.py +0 -14
  393. aiq/tool/mcp/exceptions.py +0 -142
  394. aiq/tool/mcp/mcp_client.py +0 -255
  395. aiq/tool/mcp/mcp_tool.py +0 -96
  396. aiq/tool/memory_tools/__init__.py +0 -0
  397. aiq/tool/memory_tools/add_memory_tool.py +0 -79
  398. aiq/tool/memory_tools/delete_memory_tool.py +0 -67
  399. aiq/tool/memory_tools/get_memory_tool.py +0 -72
  400. aiq/tool/nvidia_rag.py +0 -95
  401. aiq/tool/register.py +0 -38
  402. aiq/tool/retriever.py +0 -89
  403. aiq/tool/server_tools.py +0 -66
  404. aiq/utils/__init__.py +0 -0
  405. aiq/utils/data_models/__init__.py +0 -0
  406. aiq/utils/data_models/schema_validator.py +0 -58
  407. aiq/utils/debugging_utils.py +0 -43
  408. aiq/utils/dump_distro_mapping.py +0 -32
  409. aiq/utils/exception_handlers/__init__.py +0 -0
  410. aiq/utils/exception_handlers/automatic_retries.py +0 -289
  411. aiq/utils/exception_handlers/mcp.py +0 -211
  412. aiq/utils/exception_handlers/schemas.py +0 -114
  413. aiq/utils/io/__init__.py +0 -0
  414. aiq/utils/io/model_processing.py +0 -28
  415. aiq/utils/io/yaml_tools.py +0 -119
  416. aiq/utils/log_utils.py +0 -37
  417. aiq/utils/metadata_utils.py +0 -74
  418. aiq/utils/optional_imports.py +0 -142
  419. aiq/utils/producer_consumer_queue.py +0 -178
  420. aiq/utils/reactive/__init__.py +0 -0
  421. aiq/utils/reactive/base/__init__.py +0 -0
  422. aiq/utils/reactive/base/observable_base.py +0 -65
  423. aiq/utils/reactive/base/observer_base.py +0 -55
  424. aiq/utils/reactive/base/subject_base.py +0 -79
  425. aiq/utils/reactive/observable.py +0 -59
  426. aiq/utils/reactive/observer.py +0 -76
  427. aiq/utils/reactive/subject.py +0 -131
  428. aiq/utils/reactive/subscription.py +0 -49
  429. aiq/utils/settings/__init__.py +0 -0
  430. aiq/utils/settings/global_settings.py +0 -197
  431. aiq/utils/string_utils.py +0 -38
  432. aiq/utils/type_converter.py +0 -290
  433. aiq/utils/type_utils.py +0 -484
  434. aiq/utils/url_utils.py +0 -27
  435. aiqtoolkit-1.2.0rc4.dist-info/METADATA +0 -363
  436. aiqtoolkit-1.2.0rc4.dist-info/RECORD +0 -438
  437. aiqtoolkit-1.2.0rc4.dist-info/entry_points.txt +0 -20
  438. aiqtoolkit-1.2.0rc4.dist-info/licenses/LICENSE-3rd-party.txt +0 -3686
  439. aiqtoolkit-1.2.0rc4.dist-info/licenses/LICENSE.md +0 -201
  440. aiqtoolkit-1.2.0rc4.dist-info/top_level.txt +0 -1
  441. {aiqtoolkit-1.2.0rc4.dist-info → aiqtoolkit-1.2rc9.dist-info}/WHEEL +0 -0
@@ -1,627 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
-
16
- import copy
17
- import logging
18
- import shutil
19
- import time
20
- import uuid
21
- from pathlib import Path
22
-
23
- from pydantic import ValidationError
24
-
25
- from aiq.eval.config import EvaluationRunConfig
26
- from aiq.eval.runners.config import MultiEvaluationRunConfig
27
- from aiq.eval.runners.multi_eval_runner import MultiEvaluationRunner
28
- from aiq.profiler.calc.calculations import LinearFitResult
29
- from aiq.profiler.calc.calculations import calc_gpu_estimate_based_on_slope
30
- from aiq.profiler.calc.calculations import calc_gpu_estimate_for_single_concurrency
31
- from aiq.profiler.calc.calculations import compute_slope
32
- from aiq.profiler.calc.data_models import CalcAlerts
33
- from aiq.profiler.calc.data_models import CalcData
34
- from aiq.profiler.calc.data_models import CalcRunnerConfig
35
- from aiq.profiler.calc.data_models import CalcRunnerOutput
36
- from aiq.profiler.calc.data_models import FitConfig
37
- from aiq.profiler.calc.data_models import FitResults
38
- from aiq.profiler.calc.data_models import GPUEstimates
39
- from aiq.profiler.calc.data_models import SizingMetricPerItem
40
- from aiq.profiler.calc.data_models import SizingMetrics
41
- from aiq.profiler.calc.data_models import SizingMetricsAlerts
42
-
43
- logger = logging.getLogger(__name__)
44
-
45
-
46
- class LinearFitAnalyzer:
47
- """Handles linear regression analysis for concurrency vs time metrics."""
48
-
49
- def __init__(self, fit_config: FitConfig):
50
- self.fit_config = fit_config
51
- self.llm_latency_fit: LinearFitResult | None = None
52
- self.wf_runtime_fit: LinearFitResult | None = None
53
-
54
- def analyze_metrics(self, sizing_metrics_per_concurrency: dict[int, SizingMetrics]) -> dict[int, CalcAlerts]:
55
- """
56
- Analyze metrics and return alerts including outlier information.
57
-
58
- Returns:
59
- dict[int, CalcAlerts]: Alerts per concurrency including outlier flags
60
- """
61
- alerts_per_concurrency = {}
62
-
63
- # Need at least 2 points for linear regression
64
- if len(sizing_metrics_per_concurrency) < 2:
65
- logger.warning("Need at least 2 concurrencies for linear analysis")
66
- # Return empty alerts for all concurrencies
67
- for concurrency in sizing_metrics_per_concurrency.keys():
68
- alerts_per_concurrency[concurrency] = CalcAlerts()
69
- return alerts_per_concurrency
70
-
71
- # Calculate linear fits
72
- concurrencies = list(sizing_metrics_per_concurrency.keys())
73
- latencies = [run.llm_latency_p95 for run in sizing_metrics_per_concurrency.values()]
74
- try:
75
- self.llm_latency_fit = compute_slope(concurrencies, latencies, self.fit_config)
76
- logger.info("Computed latency fit: slope=%.4f, R²=%.3f",
77
- self.llm_latency_fit.slope,
78
- self.llm_latency_fit.r_squared)
79
- except ValueError as e:
80
- logger.warning("Failed to compute latency fit: %s", e)
81
- self.llm_latency_fit = None
82
-
83
- runtimes = [run.workflow_runtime_p95 for run in sizing_metrics_per_concurrency.values()]
84
- try:
85
- self.wf_runtime_fit = compute_slope(concurrencies, runtimes, self.fit_config)
86
- logger.info("Computed runtime fit: slope=%.4f, R²=%.3f",
87
- self.wf_runtime_fit.slope,
88
- self.wf_runtime_fit.r_squared)
89
- except ValueError as e:
90
- logger.warning("Failed to compute runtime fit: %s", e)
91
- self.wf_runtime_fit = None
92
-
93
- # Add outlier information to alerts
94
- for concurrency in sizing_metrics_per_concurrency.keys():
95
- alerts = CalcAlerts()
96
-
97
- # Check for latency outliers
98
- if self.llm_latency_fit and concurrency in self.llm_latency_fit.outliers_removed:
99
- alerts.outlier_llm_latency = True
100
-
101
- # Check for runtime outliers
102
- if self.wf_runtime_fit and concurrency in self.wf_runtime_fit.outliers_removed:
103
- alerts.outlier_workflow_runtime = True
104
-
105
- alerts_per_concurrency[concurrency] = alerts
106
-
107
- return alerts_per_concurrency
108
-
109
-
110
- class CalcRunner:
111
- """
112
- Calculator for GPU sizing based on concurrency vs. time metrics.
113
- """
114
-
115
- def __init__(self, config: CalcRunnerConfig):
116
- """
117
- Initialize CalcRunner with a config file and a list of concurrencies.
118
- """
119
- self.config = config
120
-
121
- # Sizing metrics per concurrency, collected from the evaluation runs
122
- # This is used as input to calculate the GPU estimates and alerts
123
- self.metrics_per_concurrency: dict[int, SizingMetrics] = {}
124
-
125
- self.valid_concurrencies: list = []
126
-
127
- # GPU estimates and alerts
128
- self.gpu_estimates_per_concurrency: dict[int, GPUEstimates] = {}
129
- self.alerts_per_concurrency: dict[int, CalcAlerts] = {}
130
-
131
- # Linear fit analyzer for outlier detection and trend analysis
132
- self.linear_analyzer = LinearFitAnalyzer(self.config.fit_config)
133
-
134
- # Validate configuration
135
- self.validate_config()
136
-
137
- def validate_config(self) -> None:
138
- """
139
- Validate the configuration parameters.
140
- Raises ValueError if configuration is invalid.
141
- """
142
- # atleast two concurrencies are needed to estimate the GPU count
143
- if len(self.config.concurrencies) < 2:
144
- raise ValueError("Atleast two concurrencies are needed to estimate the GPU count.")
145
-
146
- # if the same value is repeated in the concurrencies list, raise an error
147
- if len(self.config.concurrencies) != len(set(self.config.concurrencies)):
148
- raise ValueError("Concurrencies list contains duplicate values.")
149
-
150
- # The value of the concurrencies has to be greater than 0
151
- if any(concurrency <= 0 for concurrency in self.config.concurrencies):
152
- raise ValueError("Concurrencies list contains values less than or equal to 0.")
153
-
154
- if self.config.offline_mode:
155
- # In offline mode target test parameters are needed to estimate the GPU count
156
- if self.target_llm_latency <= 0 and self.target_wf_runtime <= 0:
157
- raise ValueError("Both target_llm_latency and target_workflow_runtime are 0. "
158
- "Cannot estimate the GPU count in offline mode.")
159
- if self.test_gpu_count <= 0:
160
- raise ValueError("Test GPU count is 0. Cannot estimate the GPU count in offline mode.")
161
- if self.target_users <= 0:
162
- raise ValueError("Target users is 0. Cannot estimate the GPU count in offline mode.")
163
- if self.append_job:
164
- raise ValueError("Appending jobs is not supported in offline mode.")
165
- if not self.config.output_dir:
166
- raise ValueError("Output directory is required in offline mode.")
167
- else:
168
- # Online mode validation
169
- if not self.config.config_file:
170
- raise ValueError("Config file is required in online mode.")
171
- if self.target_llm_latency <= 0 and self.target_wf_runtime <= 0:
172
- logger.warning("Both target_llm_latency and target_workflow_runtime are 0. "
173
- "No SLA will be enforced.")
174
- if self.test_gpu_count <= 0:
175
- logger.warning("Test GPU count is 0. Tests will be run but the GPU count will not be estimated.")
176
- if self.target_users <= 0:
177
- logger.warning("Target users is 0. Tests will be run but the GPU count will not be estimated.")
178
-
179
- @property
180
- def target_llm_latency(self) -> float:
181
- return self.config.target_llm_latency_p95
182
-
183
- @property
184
- def target_wf_runtime(self) -> float:
185
- return self.config.target_workflow_runtime_p95
186
-
187
- @property
188
- def target_users(self) -> int:
189
- return self.config.target_users
190
-
191
- @property
192
- def test_gpu_count(self) -> int:
193
- return self.config.test_gpu_count
194
-
195
- @property
196
- def append_job(self) -> bool:
197
- return self.config.append_job
198
-
199
- @property
200
- def output_dir(self) -> Path:
201
- return self.config.output_dir
202
-
203
- def _calc_gpu_estimates_based_on_slope(self,
204
- sizing_metrics_per_concurrency: dict[int, SizingMetrics],
205
- use_latency: bool,
206
- use_runtime: bool) -> GPUEstimates:
207
- """
208
- Calculate GPU estimates based on the linear fit results
209
- """
210
- gpu_estimate_by_wf_runtime = None
211
- gpu_estimate_by_llm_latency = None
212
-
213
- if use_runtime and self.linear_analyzer.wf_runtime_fit:
214
- fit = self.linear_analyzer.wf_runtime_fit
215
- gpu_estimate_by_wf_runtime = calc_gpu_estimate_based_on_slope(target_time_metric=self.target_wf_runtime,
216
- target_users=self.target_users,
217
- test_gpu_count=self.test_gpu_count,
218
- observed_slope=fit.slope,
219
- observed_intercept=fit.intercept)
220
- logger.info(
221
- "[GPU Estimation %s] Runtime slope=%.4f, intercept=%.4f, R²=%.3f, outliers_removed=%s, estimate=%.2f",
222
- "offline" if self.config.offline_mode else "online",
223
- fit.slope,
224
- fit.intercept,
225
- fit.r_squared,
226
- fit.outliers_removed,
227
- gpu_estimate_by_wf_runtime)
228
-
229
- if use_latency and self.linear_analyzer.llm_latency_fit:
230
- fit = self.linear_analyzer.llm_latency_fit
231
- gpu_estimate_by_llm_latency = calc_gpu_estimate_based_on_slope(target_time_metric=self.target_llm_latency,
232
- target_users=self.target_users,
233
- test_gpu_count=self.test_gpu_count,
234
- observed_slope=fit.slope,
235
- observed_intercept=fit.intercept)
236
- logger.info(
237
- "[GPU Estimation %s] Latency slope=%.4f, intercept=%.4f, R²=%.3f, outliers_removed=%s, estimate=%.2f",
238
- "offline" if self.config.offline_mode else "online",
239
- fit.slope,
240
- fit.intercept,
241
- fit.r_squared,
242
- fit.outliers_removed,
243
- gpu_estimate_by_llm_latency)
244
-
245
- return GPUEstimates(gpu_estimate_by_wf_runtime=gpu_estimate_by_wf_runtime,
246
- gpu_estimate_by_llm_latency=gpu_estimate_by_llm_latency)
247
-
248
- def _calc_gpu_estimates_per_concurrency(self, sizing_metrics_per_concurrency: dict[int, SizingMetrics]):
249
- """Calculate per-concurrency GPU estimates and existing alerts."""
250
- use_latency = self.target_llm_latency > 0
251
- use_runtime = self.target_wf_runtime > 0
252
-
253
- logger.info("Calculating per-concurrency metrics for %d concurrencies", len(sizing_metrics_per_concurrency))
254
- logger.info("Target users: %d, Test GPU count: %d", self.target_users, self.test_gpu_count)
255
- logger.info("Using targets - Latency: %s, Runtime: %s",
256
- "Yes" if use_latency else "No",
257
- "Yes" if use_runtime else "No")
258
-
259
- for concurrency, metrics_per_concurrency in sizing_metrics_per_concurrency.items():
260
- observed_latency = metrics_per_concurrency.llm_latency_p95
261
- observed_runtime = metrics_per_concurrency.workflow_runtime_p95
262
-
263
- # Get ROUGH GPU estimates per concurrency. This is not used for the final GPU estimation.
264
- # It is only available for information purposes.
265
- gpu_estimates = calc_gpu_estimate_for_single_concurrency(target_llm_latency=self.target_llm_latency,
266
- target_workflow_runtime=self.target_wf_runtime,
267
- target_users=self.target_users,
268
- test_concurrency=concurrency,
269
- test_gpu_count=self.test_gpu_count,
270
- observed_latency=observed_latency,
271
- observed_runtime=observed_runtime)
272
-
273
- # Store the GPU estimates directly (no need to reconstruct the same object)
274
- self.gpu_estimates_per_concurrency[concurrency] = gpu_estimates
275
-
276
- # Calculate out-of-range items based on per-item metrics (only if targets are specified)
277
- num_items_greater_than_target_latency = 0
278
- num_items_greater_than_target_runtime = 0
279
-
280
- if (use_latency or use_runtime) and metrics_per_concurrency.per_item_metrics:
281
- for item_metrics in metrics_per_concurrency.per_item_metrics.values():
282
- if use_latency and item_metrics.llm_latency > self.target_llm_latency:
283
- num_items_greater_than_target_latency += 1
284
- if use_runtime and item_metrics.workflow_runtime > self.target_wf_runtime:
285
- num_items_greater_than_target_runtime += 1
286
- else:
287
- logger.debug("Skipping per-item processing for concurrency %d (no targets or no per-item data)",
288
- concurrency)
289
-
290
- # Update existing alerts with the out-of-range data
291
- existing_alerts = self.alerts_per_concurrency.get(concurrency, CalcAlerts())
292
- existing_alerts.num_items_greater_than_target_latency = num_items_greater_than_target_latency
293
- existing_alerts.num_items_greater_than_target_runtime = num_items_greater_than_target_runtime
294
- self.alerts_per_concurrency[concurrency] = existing_alerts
295
-
296
- logger.debug("Concurrency %d: GPU estimate=%.2f, out-of-range items=%d",
297
- concurrency,
298
- gpu_estimates.gpu_estimate_by_wf_runtime,
299
- num_items_greater_than_target_latency + num_items_greater_than_target_runtime)
300
-
301
- logger.info("Completed per-concurrency calculations:")
302
- logger.info(" - GPU estimates calculated for %d concurrencies", len(self.gpu_estimates_per_concurrency))
303
-
304
- def _validate_gpu_estimation_parameters(self, use_latency: bool, use_runtime: bool) -> bool:
305
- """Validate parameters required for GPU estimation."""
306
- if self.target_users <= 0:
307
- logger.warning("Target users must be greater than 0 for GPU estimation")
308
- return False
309
-
310
- if self.test_gpu_count <= 0:
311
- logger.warning("Test GPU count must be greater than 0 for GPU estimation")
312
- return False
313
-
314
- if not use_latency and not use_runtime:
315
- logger.warning("No targets time metrics specified")
316
- return False
317
-
318
- return True
319
-
320
- def _validate_metrics_data(self, sizing_metrics_per_concurrency: dict) -> dict:
321
- """Validate and filter metrics data."""
322
- valid_metrics = {}
323
- for concurrency, metrics in sizing_metrics_per_concurrency.items():
324
- if not metrics or not metrics.llm_latency_p95 or not metrics.workflow_runtime_p95:
325
- logger.warning("Invalid metrics for concurrency %d: missing required fields", concurrency)
326
- continue
327
- valid_metrics[concurrency] = metrics
328
- return valid_metrics
329
-
330
- def _calc_fit_and_gpu_estimate(self, sizing_metrics_per_concurrency: dict[int, SizingMetrics]) -> GPUEstimates:
331
- """
332
- Estimate GPU count to meet target latency and/or workflow runtime SLA
333
- for a given target user load.
334
-
335
- Returns:
336
- - GPU estimates based on the slope of the time vs concurrency
337
- - GPU estimates per concurrency (rough estimates)
338
- - Alerts per concurrency (outliers, etc.)
339
- """
340
- gpu_estimates = GPUEstimates()
341
- # Filter out concurrencies that are missing required metrics
342
- valid_metrics = self._validate_metrics_data(sizing_metrics_per_concurrency)
343
- if not valid_metrics:
344
- logger.warning("No valid metrics found for metrics calculation")
345
- return gpu_estimates
346
-
347
- # Filter out concurrencies that were interrupted
348
- valid_runs = {
349
- concurrency: metrics
350
- for concurrency, metrics in valid_metrics.items() if not metrics.alerts.workflow_interrupted
351
- }
352
- if not valid_runs:
353
- logger.warning("No valid runs found for slope-based estimation")
354
- return gpu_estimates
355
-
356
- self.valid_concurrencies = valid_runs.keys()
357
-
358
- # Perform linear analysis on valid runs, this is done even if GPU estimation is skipped
359
- self.alerts_per_concurrency = self.linear_analyzer.analyze_metrics(valid_runs)
360
-
361
- # Validate GPU estimation parameters
362
- use_latency = self.target_llm_latency > 0
363
- use_runtime = self.target_wf_runtime > 0
364
- if not self._validate_gpu_estimation_parameters(use_latency, use_runtime):
365
- return gpu_estimates
366
-
367
- logger.info("Starting GPU estimation with %d concurrencies", len(valid_metrics))
368
- logger.info("Target users: %d, Test GPU count: %d", self.target_users, self.test_gpu_count)
369
- logger.info("Target latency: %.3fs, Target runtime: %.3fs",
370
- self.target_llm_latency if self.target_llm_latency > 0 else 0,
371
- self.target_wf_runtime if self.target_wf_runtime > 0 else 0)
372
-
373
- # Calculate GPU estimates per-concurrency
374
- self._calc_gpu_estimates_per_concurrency(valid_runs)
375
-
376
- # Calculate overall gpu estimates using linear fits
377
- gpu_estimates = self._calc_gpu_estimates_based_on_slope(valid_runs, use_latency, use_runtime)
378
-
379
- return gpu_estimates
380
-
381
- def generate_calc_runner_output(self) -> CalcRunnerOutput:
382
- """
383
- Build CalcRunnerOutput from sizing metrics per concurrency.
384
- """
385
- if not self.metrics_per_concurrency:
386
- logger.warning("No metrics per concurrency found. Skipping generation of CalcRunnerOutput.")
387
- return CalcRunnerOutput()
388
-
389
- logger.info("Building CalcRunnerOutput from %d concurrency metrics", len(self.metrics_per_concurrency))
390
-
391
- # Calculate gpu estimates and per-concurrency metrics
392
- gpu_estimates = self._calc_fit_and_gpu_estimate(self.metrics_per_concurrency)
393
-
394
- # Group per-concurrency data (inputs to the calculator and outputs from the calculator)
395
- calc_data = {}
396
- for concurrency in self.metrics_per_concurrency.keys():
397
- # Inputs to the calculator
398
- tmp_sizing_metrics = self.metrics_per_concurrency[concurrency]
399
- # Outputs from the calculator
400
- tmp_gpu_estimates = self.gpu_estimates_per_concurrency.get(concurrency, GPUEstimates())
401
- tmp_alerts = self.alerts_per_concurrency.get(concurrency, CalcAlerts())
402
-
403
- calc_data[concurrency] = CalcData(gpu_estimates=tmp_gpu_estimates,
404
- alerts=tmp_alerts,
405
- sizing_metrics=tmp_sizing_metrics)
406
-
407
- if gpu_estimates.gpu_estimate_by_wf_runtime is not None:
408
- logger.info("GPU estimate by workflow runtime: %.2f", gpu_estimates.gpu_estimate_by_wf_runtime)
409
- if gpu_estimates.gpu_estimate_by_llm_latency is not None:
410
- logger.info("GPU estimate by LLM latency: %.2f", gpu_estimates.gpu_estimate_by_llm_latency)
411
-
412
- return CalcRunnerOutput(gpu_estimates=gpu_estimates,
413
- calc_data=calc_data,
414
- fit_results=FitResults(llm_latency_fit=self.linear_analyzer.llm_latency_fit,
415
- wf_runtime_fit=self.linear_analyzer.wf_runtime_fit))
416
-
417
- def plot_concurrency_vs_time_metrics(self, output_dir: Path):
418
- """Plots concurrency vs. time metrics using pre-computed fits."""
419
- from aiq.profiler.calc.plot import plot_concurrency_vs_time_metrics as plot_metrics
420
-
421
- # Only plot if we have valid metrics and at least one fit
422
- if not self.metrics_per_concurrency:
423
- logger.warning("No metrics available for plotting")
424
- return
425
-
426
- # Filter to only valid runs for plotting
427
- valid_runs = {
428
- concurrency: metrics
429
- for concurrency, metrics in self.metrics_per_concurrency.items() if concurrency in self.valid_concurrencies
430
- }
431
-
432
- if not valid_runs:
433
- logger.warning("No valid runs available for plotting")
434
- return
435
- try:
436
- plot_metrics(
437
- metrics_per_concurrency=valid_runs, # Only valid runs
438
- output_dir=output_dir,
439
- target_llm_latency=self.target_llm_latency,
440
- target_runtime=self.target_wf_runtime,
441
- llm_latency_fit=self.linear_analyzer.llm_latency_fit, # May be None
442
- runtime_fit=self.linear_analyzer.wf_runtime_fit # May be None
443
- )
444
- except Exception as e:
445
- logger.exception("Failed to plot concurrency vs. time metrics: %s", e, exc_info=True)
446
- logger.warning("Skipping plot of concurrency vs. time metrics")
447
-
448
- def write_output(self, output_dir: Path, calc_runner_output: CalcRunnerOutput):
449
- """
450
- Write the output to the output directory.
451
- """
452
- if not output_dir:
453
- logger.warning("Output directory is not set. Skipping write.")
454
- return
455
-
456
- mode = "offline" if self.config.offline_mode else "online"
457
- subdir = output_dir / mode
458
-
459
- if self.append_job:
460
- job_dir = subdir / f"job_{uuid.uuid4()}"
461
- else:
462
- # Clear all previous jobs when not in append mode
463
- existing_jobs = list(subdir.glob("job_*"))
464
- if existing_jobs:
465
- logger.info(f"Clearing {len(existing_jobs)} existing jobs")
466
- for job in existing_jobs:
467
- if job.is_dir():
468
- shutil.rmtree(job)
469
- # Use timestamp-based naming
470
- job_dir = subdir / f"job_{int(time.time())}"
471
-
472
- job_dir.mkdir(parents=True, exist_ok=True)
473
-
474
- if self.config.plot_data:
475
- self.plot_concurrency_vs_time_metrics(job_dir)
476
-
477
- output_path = job_dir / "calc_runner_output.json"
478
- output_path.write_text(calc_runner_output.model_dump_json(indent=2))
479
- logger.info("Wrote output to %s", job_dir)
480
-
481
- def run_offline(self) -> CalcRunnerOutput:
482
- """
483
- Run in offline mode.
484
- 1. Read previous jobs in online mode and create sizing metrics per concurrency
485
- 2. Calculate GPU estimates
486
- 3. Write the output to the offline subdirectory
487
- """
488
- # Read all jobs in online mode and only append unique concurrency values to metrics_per_concurrency
489
- online_dir = Path(self.config.output_dir) / "online"
490
- if not online_dir.exists():
491
- logger.warning("Online directory %s does not exist. Skipping offline mode.", online_dir)
492
- return CalcRunnerOutput()
493
-
494
- # Get all job directories and sort by creation time (most recent first)
495
- job_dirs = [job_dir for job_dir in online_dir.iterdir() if job_dir.is_dir() and job_dir.name.startswith("job_")]
496
- job_dirs.sort(key=lambda x: x.stat().st_mtime, reverse=True)
497
-
498
- logger.info("Found %d job directories, processing from most recent to oldest", len(job_dirs))
499
-
500
- for job_dir in job_dirs:
501
- calc_runner_output_path = job_dir / "calc_runner_output.json"
502
- if not calc_runner_output_path.exists():
503
- logger.warning("Calc runner output file %s does not exist. Skipping job %s.",
504
- calc_runner_output_path,
505
- job_dir.name)
506
- continue
507
- try:
508
- calc_output = CalcRunnerOutput.model_validate_json(calc_runner_output_path.read_text())
509
- except ValidationError as e:
510
- logger.exception("Failed to validate calc runner output file %s. Skipping job %s.",
511
- calc_runner_output_path,
512
- e,
513
- exc_info=True)
514
- continue
515
-
516
- # Extract sizing metrics from calc_data
517
- for concurrency, data in calc_output.calc_data.items():
518
- metrics = data.sizing_metrics
519
- if concurrency not in self.metrics_per_concurrency:
520
- logger.info("Adding concurrency %s from job %s (most recent available).", concurrency, job_dir.name)
521
- logger.info("Sizing metrics: %s", metrics)
522
- self.metrics_per_concurrency[concurrency] = metrics
523
- else:
524
- # Skip since we already have this concurrency from a more recent job
525
- logger.debug("Concurrency %s already exists from a more recent job. Skipping job %s.",
526
- concurrency,
527
- job_dir.name)
528
-
529
- # calculate gpu estimates
530
- calc_runner_output = self.generate_calc_runner_output()
531
-
532
- # write the offline output
533
- self.write_output(self.config.output_dir, calc_runner_output)
534
-
535
- return calc_runner_output
536
-
537
- async def run_online(self) -> CalcRunnerOutput:
538
- """
539
- Create a MultiEvaluationRunner with concurrency overrides.
540
- Run in online mode.
541
- 1. Run the workflow
542
- 2. Create sizing metrics per concurrency from the profiler results and usage stats
543
- 3. Calculate GPU estimates
544
- 4. Write the output to the online subdirectory
545
- """
546
- # Override the concurrency and alias keys in the config
547
- concurrency_key = "eval.general.max_concurrency"
548
- alias_key = "eval.general.workflow_alias"
549
- # Ensure profiler base metrics are enabled via overrides
550
- profiler_base_metrics_key = "eval.general.profiler.base_metrics"
551
-
552
- # setup the base config
553
- eval_run_config = EvaluationRunConfig(config_file=self.config.config_file,
554
- adjust_dataset_size=True,
555
- num_passes=self.config.num_passes,
556
- endpoint=self.config.endpoint,
557
- endpoint_timeout=self.config.endpoint_timeout)
558
-
559
- # Create a copy of the base config and apply the overrides for each concurrency
560
- configs = {}
561
- for concurrency in self.config.concurrencies:
562
- config = copy.deepcopy(eval_run_config)
563
- override = ((concurrency_key, str(concurrency)), (alias_key, "wf_concurrency_" + str(concurrency)),
564
- (profiler_base_metrics_key, "true"))
565
- config.override = override
566
- configs[concurrency] = config
567
-
568
- # Instantiate the multi-evaluation run config with the overrides for each concurrency
569
- config = MultiEvaluationRunConfig(configs=configs)
570
-
571
- # Instantiate and run multi-evaluation runner
572
- runner = MultiEvaluationRunner(config)
573
- evaluation_run_outputs = await runner.run_all()
574
- if not evaluation_run_outputs:
575
- logger.warning("No evaluation run outputs found. Skipping online mode.")
576
- return CalcRunnerOutput()
577
-
578
- # Calculate sizing metrics per concurrency
579
- # if the workflow was interrupted, the metrics are not eligible for slope-based GPU estimation
580
- for concurrency, eval_output in evaluation_run_outputs.items():
581
- profiler_results = eval_output.profiler_results
582
- usage_stats = eval_output.usage_stats
583
- workflow_interrupted = eval_output.workflow_interrupted
584
-
585
- per_item_metrics = {
586
- item_id:
587
- SizingMetricPerItem(llm_latency=item_metrics.llm_latency, workflow_runtime=item_metrics.runtime)
588
- for item_id, item_metrics in eval_output.usage_stats.usage_stats_items.items()
589
- }
590
-
591
- # if the workflow was interrupted, the metrics are not eligible for slope-based GPU estimation
592
- llm_latency_p95 = profiler_results.llm_latency_ci.p95 \
593
- if profiler_results.llm_latency_ci else 0
594
- workflow_runtime_p95 = profiler_results.workflow_runtime_metrics.p95 \
595
- if profiler_results.workflow_runtime_metrics else 0
596
- self.metrics_per_concurrency[concurrency] = SizingMetrics(
597
- llm_latency_p95=llm_latency_p95,
598
- workflow_runtime_p95=workflow_runtime_p95,
599
- total_runtime=usage_stats.total_runtime,
600
- per_item_metrics=per_item_metrics,
601
- alerts=SizingMetricsAlerts(workflow_interrupted=workflow_interrupted))
602
-
603
- # calculate gpu estimates
604
- calc_runner_output = self.generate_calc_runner_output()
605
-
606
- # plot the metrics and write the output
607
- self.write_output(self.config.output_dir, calc_runner_output)
608
-
609
- return calc_runner_output
610
-
611
- async def run(self) -> CalcRunnerOutput:
612
- """
613
- online mode:
614
- 1. Run the workflow
615
- 2. Collect profiler results and usage stats
616
- 3. Calculate GPU estimates
617
- 4. Write the output to the online subdirectory
618
-
619
- offline mode:
620
- 1. Read previous jobs in online mode and only append unique concurrency values to metrics_per_concurrency
621
- 2. Calculate GPU estimates
622
- 3. Write the output to the offline subdirectory
623
- """
624
- if self.config.offline_mode:
625
- return self.run_offline()
626
- else:
627
- return await self.run_online()