aiqtoolkit 1.2.0rc4__py3-none-any.whl → 1.2.0rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (441) hide show
  1. aiqtoolkit-1.2.0rc5.dist-info/METADATA +29 -0
  2. aiqtoolkit-1.2.0rc5.dist-info/RECORD +4 -0
  3. aiqtoolkit-1.2.0rc5.dist-info/top_level.txt +1 -0
  4. aiq/agent/__init__.py +0 -0
  5. aiq/agent/base.py +0 -239
  6. aiq/agent/dual_node.py +0 -67
  7. aiq/agent/react_agent/__init__.py +0 -0
  8. aiq/agent/react_agent/agent.py +0 -355
  9. aiq/agent/react_agent/output_parser.py +0 -104
  10. aiq/agent/react_agent/prompt.py +0 -41
  11. aiq/agent/react_agent/register.py +0 -149
  12. aiq/agent/reasoning_agent/__init__.py +0 -0
  13. aiq/agent/reasoning_agent/reasoning_agent.py +0 -225
  14. aiq/agent/register.py +0 -23
  15. aiq/agent/rewoo_agent/__init__.py +0 -0
  16. aiq/agent/rewoo_agent/agent.py +0 -411
  17. aiq/agent/rewoo_agent/prompt.py +0 -108
  18. aiq/agent/rewoo_agent/register.py +0 -158
  19. aiq/agent/tool_calling_agent/__init__.py +0 -0
  20. aiq/agent/tool_calling_agent/agent.py +0 -119
  21. aiq/agent/tool_calling_agent/register.py +0 -106
  22. aiq/authentication/__init__.py +0 -14
  23. aiq/authentication/api_key/__init__.py +0 -14
  24. aiq/authentication/api_key/api_key_auth_provider.py +0 -96
  25. aiq/authentication/api_key/api_key_auth_provider_config.py +0 -124
  26. aiq/authentication/api_key/register.py +0 -26
  27. aiq/authentication/exceptions/__init__.py +0 -14
  28. aiq/authentication/exceptions/api_key_exceptions.py +0 -38
  29. aiq/authentication/exceptions/auth_code_grant_exceptions.py +0 -86
  30. aiq/authentication/exceptions/call_back_exceptions.py +0 -38
  31. aiq/authentication/exceptions/request_exceptions.py +0 -54
  32. aiq/authentication/http_basic_auth/__init__.py +0 -0
  33. aiq/authentication/http_basic_auth/http_basic_auth_provider.py +0 -81
  34. aiq/authentication/http_basic_auth/register.py +0 -30
  35. aiq/authentication/interfaces.py +0 -93
  36. aiq/authentication/oauth2/__init__.py +0 -14
  37. aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +0 -107
  38. aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +0 -39
  39. aiq/authentication/oauth2/register.py +0 -25
  40. aiq/authentication/register.py +0 -21
  41. aiq/builder/__init__.py +0 -0
  42. aiq/builder/builder.py +0 -285
  43. aiq/builder/component_utils.py +0 -316
  44. aiq/builder/context.py +0 -264
  45. aiq/builder/embedder.py +0 -24
  46. aiq/builder/eval_builder.py +0 -161
  47. aiq/builder/evaluator.py +0 -29
  48. aiq/builder/framework_enum.py +0 -24
  49. aiq/builder/front_end.py +0 -73
  50. aiq/builder/function.py +0 -344
  51. aiq/builder/function_base.py +0 -380
  52. aiq/builder/function_info.py +0 -627
  53. aiq/builder/intermediate_step_manager.py +0 -174
  54. aiq/builder/llm.py +0 -25
  55. aiq/builder/retriever.py +0 -25
  56. aiq/builder/user_interaction_manager.py +0 -74
  57. aiq/builder/workflow.py +0 -148
  58. aiq/builder/workflow_builder.py +0 -1117
  59. aiq/cli/__init__.py +0 -14
  60. aiq/cli/cli_utils/__init__.py +0 -0
  61. aiq/cli/cli_utils/config_override.py +0 -231
  62. aiq/cli/cli_utils/validation.py +0 -37
  63. aiq/cli/commands/__init__.py +0 -0
  64. aiq/cli/commands/configure/__init__.py +0 -0
  65. aiq/cli/commands/configure/channel/__init__.py +0 -0
  66. aiq/cli/commands/configure/channel/add.py +0 -28
  67. aiq/cli/commands/configure/channel/channel.py +0 -36
  68. aiq/cli/commands/configure/channel/remove.py +0 -30
  69. aiq/cli/commands/configure/channel/update.py +0 -30
  70. aiq/cli/commands/configure/configure.py +0 -33
  71. aiq/cli/commands/evaluate.py +0 -139
  72. aiq/cli/commands/info/__init__.py +0 -14
  73. aiq/cli/commands/info/info.py +0 -39
  74. aiq/cli/commands/info/list_channels.py +0 -32
  75. aiq/cli/commands/info/list_components.py +0 -129
  76. aiq/cli/commands/info/list_mcp.py +0 -213
  77. aiq/cli/commands/registry/__init__.py +0 -14
  78. aiq/cli/commands/registry/publish.py +0 -88
  79. aiq/cli/commands/registry/pull.py +0 -118
  80. aiq/cli/commands/registry/registry.py +0 -38
  81. aiq/cli/commands/registry/remove.py +0 -108
  82. aiq/cli/commands/registry/search.py +0 -155
  83. aiq/cli/commands/sizing/__init__.py +0 -14
  84. aiq/cli/commands/sizing/calc.py +0 -297
  85. aiq/cli/commands/sizing/sizing.py +0 -27
  86. aiq/cli/commands/start.py +0 -246
  87. aiq/cli/commands/uninstall.py +0 -81
  88. aiq/cli/commands/validate.py +0 -47
  89. aiq/cli/commands/workflow/__init__.py +0 -14
  90. aiq/cli/commands/workflow/templates/__init__.py.j2 +0 -0
  91. aiq/cli/commands/workflow/templates/config.yml.j2 +0 -16
  92. aiq/cli/commands/workflow/templates/pyproject.toml.j2 +0 -22
  93. aiq/cli/commands/workflow/templates/register.py.j2 +0 -5
  94. aiq/cli/commands/workflow/templates/workflow.py.j2 +0 -36
  95. aiq/cli/commands/workflow/workflow.py +0 -37
  96. aiq/cli/commands/workflow/workflow_commands.py +0 -313
  97. aiq/cli/entrypoint.py +0 -135
  98. aiq/cli/main.py +0 -44
  99. aiq/cli/register_workflow.py +0 -488
  100. aiq/cli/type_registry.py +0 -1000
  101. aiq/data_models/__init__.py +0 -14
  102. aiq/data_models/api_server.py +0 -694
  103. aiq/data_models/authentication.py +0 -231
  104. aiq/data_models/common.py +0 -171
  105. aiq/data_models/component.py +0 -54
  106. aiq/data_models/component_ref.py +0 -168
  107. aiq/data_models/config.py +0 -406
  108. aiq/data_models/dataset_handler.py +0 -123
  109. aiq/data_models/discovery_metadata.py +0 -335
  110. aiq/data_models/embedder.py +0 -27
  111. aiq/data_models/evaluate.py +0 -127
  112. aiq/data_models/evaluator.py +0 -26
  113. aiq/data_models/front_end.py +0 -26
  114. aiq/data_models/function.py +0 -30
  115. aiq/data_models/function_dependencies.py +0 -72
  116. aiq/data_models/interactive.py +0 -246
  117. aiq/data_models/intermediate_step.py +0 -302
  118. aiq/data_models/invocation_node.py +0 -38
  119. aiq/data_models/its_strategy.py +0 -30
  120. aiq/data_models/llm.py +0 -27
  121. aiq/data_models/logging.py +0 -26
  122. aiq/data_models/memory.py +0 -27
  123. aiq/data_models/object_store.py +0 -44
  124. aiq/data_models/profiler.py +0 -54
  125. aiq/data_models/registry_handler.py +0 -26
  126. aiq/data_models/retriever.py +0 -30
  127. aiq/data_models/retry_mixin.py +0 -35
  128. aiq/data_models/span.py +0 -187
  129. aiq/data_models/step_adaptor.py +0 -64
  130. aiq/data_models/streaming.py +0 -33
  131. aiq/data_models/swe_bench_model.py +0 -54
  132. aiq/data_models/telemetry_exporter.py +0 -26
  133. aiq/embedder/__init__.py +0 -0
  134. aiq/embedder/langchain_client.py +0 -41
  135. aiq/embedder/nim_embedder.py +0 -59
  136. aiq/embedder/openai_embedder.py +0 -43
  137. aiq/embedder/register.py +0 -24
  138. aiq/eval/__init__.py +0 -14
  139. aiq/eval/config.py +0 -60
  140. aiq/eval/dataset_handler/__init__.py +0 -0
  141. aiq/eval/dataset_handler/dataset_downloader.py +0 -106
  142. aiq/eval/dataset_handler/dataset_filter.py +0 -52
  143. aiq/eval/dataset_handler/dataset_handler.py +0 -254
  144. aiq/eval/evaluate.py +0 -506
  145. aiq/eval/evaluator/__init__.py +0 -14
  146. aiq/eval/evaluator/base_evaluator.py +0 -73
  147. aiq/eval/evaluator/evaluator_model.py +0 -45
  148. aiq/eval/intermediate_step_adapter.py +0 -99
  149. aiq/eval/rag_evaluator/__init__.py +0 -0
  150. aiq/eval/rag_evaluator/evaluate.py +0 -178
  151. aiq/eval/rag_evaluator/register.py +0 -143
  152. aiq/eval/register.py +0 -23
  153. aiq/eval/remote_workflow.py +0 -133
  154. aiq/eval/runners/__init__.py +0 -14
  155. aiq/eval/runners/config.py +0 -39
  156. aiq/eval/runners/multi_eval_runner.py +0 -54
  157. aiq/eval/runtime_event_subscriber.py +0 -52
  158. aiq/eval/swe_bench_evaluator/__init__.py +0 -0
  159. aiq/eval/swe_bench_evaluator/evaluate.py +0 -215
  160. aiq/eval/swe_bench_evaluator/register.py +0 -36
  161. aiq/eval/trajectory_evaluator/__init__.py +0 -0
  162. aiq/eval/trajectory_evaluator/evaluate.py +0 -75
  163. aiq/eval/trajectory_evaluator/register.py +0 -40
  164. aiq/eval/tunable_rag_evaluator/__init__.py +0 -0
  165. aiq/eval/tunable_rag_evaluator/evaluate.py +0 -245
  166. aiq/eval/tunable_rag_evaluator/register.py +0 -52
  167. aiq/eval/usage_stats.py +0 -41
  168. aiq/eval/utils/__init__.py +0 -0
  169. aiq/eval/utils/output_uploader.py +0 -140
  170. aiq/eval/utils/tqdm_position_registry.py +0 -40
  171. aiq/eval/utils/weave_eval.py +0 -184
  172. aiq/experimental/__init__.py +0 -0
  173. aiq/experimental/decorators/__init__.py +0 -0
  174. aiq/experimental/decorators/experimental_warning_decorator.py +0 -130
  175. aiq/experimental/inference_time_scaling/__init__.py +0 -0
  176. aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
  177. aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +0 -147
  178. aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +0 -204
  179. aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +0 -107
  180. aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
  181. aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +0 -105
  182. aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +0 -205
  183. aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +0 -146
  184. aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +0 -224
  185. aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
  186. aiq/experimental/inference_time_scaling/models/editor_config.py +0 -132
  187. aiq/experimental/inference_time_scaling/models/its_item.py +0 -48
  188. aiq/experimental/inference_time_scaling/models/scoring_config.py +0 -112
  189. aiq/experimental/inference_time_scaling/models/search_config.py +0 -120
  190. aiq/experimental/inference_time_scaling/models/selection_config.py +0 -154
  191. aiq/experimental/inference_time_scaling/models/stage_enums.py +0 -43
  192. aiq/experimental/inference_time_scaling/models/strategy_base.py +0 -66
  193. aiq/experimental/inference_time_scaling/models/tool_use_config.py +0 -41
  194. aiq/experimental/inference_time_scaling/register.py +0 -36
  195. aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
  196. aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +0 -168
  197. aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +0 -168
  198. aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +0 -111
  199. aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
  200. aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +0 -128
  201. aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +0 -122
  202. aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +0 -128
  203. aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
  204. aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +0 -63
  205. aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +0 -131
  206. aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +0 -159
  207. aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +0 -128
  208. aiq/experimental/inference_time_scaling/selection/threshold_selector.py +0 -58
  209. aiq/front_ends/__init__.py +0 -14
  210. aiq/front_ends/console/__init__.py +0 -14
  211. aiq/front_ends/console/authentication_flow_handler.py +0 -233
  212. aiq/front_ends/console/console_front_end_config.py +0 -32
  213. aiq/front_ends/console/console_front_end_plugin.py +0 -96
  214. aiq/front_ends/console/register.py +0 -25
  215. aiq/front_ends/cron/__init__.py +0 -14
  216. aiq/front_ends/fastapi/__init__.py +0 -14
  217. aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
  218. aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +0 -27
  219. aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +0 -107
  220. aiq/front_ends/fastapi/fastapi_front_end_config.py +0 -234
  221. aiq/front_ends/fastapi/fastapi_front_end_controller.py +0 -68
  222. aiq/front_ends/fastapi/fastapi_front_end_plugin.py +0 -116
  223. aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +0 -1092
  224. aiq/front_ends/fastapi/html_snippets/__init__.py +0 -14
  225. aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +0 -35
  226. aiq/front_ends/fastapi/intermediate_steps_subscriber.py +0 -80
  227. aiq/front_ends/fastapi/job_store.py +0 -183
  228. aiq/front_ends/fastapi/main.py +0 -72
  229. aiq/front_ends/fastapi/message_handler.py +0 -298
  230. aiq/front_ends/fastapi/message_validator.py +0 -345
  231. aiq/front_ends/fastapi/register.py +0 -25
  232. aiq/front_ends/fastapi/response_helpers.py +0 -195
  233. aiq/front_ends/fastapi/step_adaptor.py +0 -321
  234. aiq/front_ends/mcp/__init__.py +0 -14
  235. aiq/front_ends/mcp/mcp_front_end_config.py +0 -32
  236. aiq/front_ends/mcp/mcp_front_end_plugin.py +0 -93
  237. aiq/front_ends/mcp/register.py +0 -27
  238. aiq/front_ends/mcp/tool_converter.py +0 -242
  239. aiq/front_ends/register.py +0 -22
  240. aiq/front_ends/simple_base/__init__.py +0 -14
  241. aiq/front_ends/simple_base/simple_front_end_plugin_base.py +0 -54
  242. aiq/llm/__init__.py +0 -0
  243. aiq/llm/aws_bedrock_llm.py +0 -57
  244. aiq/llm/nim_llm.py +0 -46
  245. aiq/llm/openai_llm.py +0 -46
  246. aiq/llm/register.py +0 -23
  247. aiq/llm/utils/__init__.py +0 -14
  248. aiq/llm/utils/env_config_value.py +0 -94
  249. aiq/llm/utils/error.py +0 -17
  250. aiq/memory/__init__.py +0 -20
  251. aiq/memory/interfaces.py +0 -183
  252. aiq/memory/models.py +0 -112
  253. aiq/meta/module_to_distro.json +0 -3
  254. aiq/meta/pypi.md +0 -58
  255. aiq/object_store/__init__.py +0 -20
  256. aiq/object_store/in_memory_object_store.py +0 -76
  257. aiq/object_store/interfaces.py +0 -84
  258. aiq/object_store/models.py +0 -36
  259. aiq/object_store/register.py +0 -20
  260. aiq/observability/__init__.py +0 -14
  261. aiq/observability/exporter/__init__.py +0 -14
  262. aiq/observability/exporter/base_exporter.py +0 -449
  263. aiq/observability/exporter/exporter.py +0 -78
  264. aiq/observability/exporter/file_exporter.py +0 -33
  265. aiq/observability/exporter/processing_exporter.py +0 -322
  266. aiq/observability/exporter/raw_exporter.py +0 -52
  267. aiq/observability/exporter/span_exporter.py +0 -265
  268. aiq/observability/exporter_manager.py +0 -335
  269. aiq/observability/mixin/__init__.py +0 -14
  270. aiq/observability/mixin/batch_config_mixin.py +0 -26
  271. aiq/observability/mixin/collector_config_mixin.py +0 -23
  272. aiq/observability/mixin/file_mixin.py +0 -288
  273. aiq/observability/mixin/file_mode.py +0 -23
  274. aiq/observability/mixin/resource_conflict_mixin.py +0 -134
  275. aiq/observability/mixin/serialize_mixin.py +0 -61
  276. aiq/observability/mixin/type_introspection_mixin.py +0 -183
  277. aiq/observability/processor/__init__.py +0 -14
  278. aiq/observability/processor/batching_processor.py +0 -309
  279. aiq/observability/processor/callback_processor.py +0 -42
  280. aiq/observability/processor/intermediate_step_serializer.py +0 -28
  281. aiq/observability/processor/processor.py +0 -71
  282. aiq/observability/register.py +0 -96
  283. aiq/observability/utils/__init__.py +0 -14
  284. aiq/observability/utils/dict_utils.py +0 -236
  285. aiq/observability/utils/time_utils.py +0 -31
  286. aiq/plugins/.namespace +0 -1
  287. aiq/profiler/__init__.py +0 -0
  288. aiq/profiler/calc/__init__.py +0 -14
  289. aiq/profiler/calc/calc_runner.py +0 -627
  290. aiq/profiler/calc/calculations.py +0 -288
  291. aiq/profiler/calc/data_models.py +0 -188
  292. aiq/profiler/calc/plot.py +0 -345
  293. aiq/profiler/callbacks/__init__.py +0 -0
  294. aiq/profiler/callbacks/agno_callback_handler.py +0 -295
  295. aiq/profiler/callbacks/base_callback_class.py +0 -20
  296. aiq/profiler/callbacks/langchain_callback_handler.py +0 -290
  297. aiq/profiler/callbacks/llama_index_callback_handler.py +0 -205
  298. aiq/profiler/callbacks/semantic_kernel_callback_handler.py +0 -238
  299. aiq/profiler/callbacks/token_usage_base_model.py +0 -27
  300. aiq/profiler/data_frame_row.py +0 -51
  301. aiq/profiler/data_models.py +0 -24
  302. aiq/profiler/decorators/__init__.py +0 -0
  303. aiq/profiler/decorators/framework_wrapper.py +0 -131
  304. aiq/profiler/decorators/function_tracking.py +0 -254
  305. aiq/profiler/forecasting/__init__.py +0 -0
  306. aiq/profiler/forecasting/config.py +0 -18
  307. aiq/profiler/forecasting/model_trainer.py +0 -75
  308. aiq/profiler/forecasting/models/__init__.py +0 -22
  309. aiq/profiler/forecasting/models/forecasting_base_model.py +0 -40
  310. aiq/profiler/forecasting/models/linear_model.py +0 -196
  311. aiq/profiler/forecasting/models/random_forest_regressor.py +0 -268
  312. aiq/profiler/inference_metrics_model.py +0 -28
  313. aiq/profiler/inference_optimization/__init__.py +0 -0
  314. aiq/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
  315. aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +0 -460
  316. aiq/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +0 -258
  317. aiq/profiler/inference_optimization/data_models.py +0 -386
  318. aiq/profiler/inference_optimization/experimental/__init__.py +0 -0
  319. aiq/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +0 -468
  320. aiq/profiler/inference_optimization/experimental/prefix_span_analysis.py +0 -405
  321. aiq/profiler/inference_optimization/llm_metrics.py +0 -212
  322. aiq/profiler/inference_optimization/prompt_caching.py +0 -163
  323. aiq/profiler/inference_optimization/token_uniqueness.py +0 -107
  324. aiq/profiler/inference_optimization/workflow_runtimes.py +0 -72
  325. aiq/profiler/intermediate_property_adapter.py +0 -102
  326. aiq/profiler/profile_runner.py +0 -473
  327. aiq/profiler/utils.py +0 -184
  328. aiq/registry_handlers/__init__.py +0 -0
  329. aiq/registry_handlers/local/__init__.py +0 -0
  330. aiq/registry_handlers/local/local_handler.py +0 -176
  331. aiq/registry_handlers/local/register_local.py +0 -37
  332. aiq/registry_handlers/metadata_factory.py +0 -60
  333. aiq/registry_handlers/package_utils.py +0 -567
  334. aiq/registry_handlers/pypi/__init__.py +0 -0
  335. aiq/registry_handlers/pypi/pypi_handler.py +0 -251
  336. aiq/registry_handlers/pypi/register_pypi.py +0 -40
  337. aiq/registry_handlers/register.py +0 -21
  338. aiq/registry_handlers/registry_handler_base.py +0 -157
  339. aiq/registry_handlers/rest/__init__.py +0 -0
  340. aiq/registry_handlers/rest/register_rest.py +0 -56
  341. aiq/registry_handlers/rest/rest_handler.py +0 -237
  342. aiq/registry_handlers/schemas/__init__.py +0 -0
  343. aiq/registry_handlers/schemas/headers.py +0 -42
  344. aiq/registry_handlers/schemas/package.py +0 -68
  345. aiq/registry_handlers/schemas/publish.py +0 -63
  346. aiq/registry_handlers/schemas/pull.py +0 -82
  347. aiq/registry_handlers/schemas/remove.py +0 -36
  348. aiq/registry_handlers/schemas/search.py +0 -91
  349. aiq/registry_handlers/schemas/status.py +0 -47
  350. aiq/retriever/__init__.py +0 -0
  351. aiq/retriever/interface.py +0 -37
  352. aiq/retriever/milvus/__init__.py +0 -14
  353. aiq/retriever/milvus/register.py +0 -81
  354. aiq/retriever/milvus/retriever.py +0 -228
  355. aiq/retriever/models.py +0 -74
  356. aiq/retriever/nemo_retriever/__init__.py +0 -14
  357. aiq/retriever/nemo_retriever/register.py +0 -60
  358. aiq/retriever/nemo_retriever/retriever.py +0 -190
  359. aiq/retriever/register.py +0 -22
  360. aiq/runtime/__init__.py +0 -14
  361. aiq/runtime/loader.py +0 -215
  362. aiq/runtime/runner.py +0 -190
  363. aiq/runtime/session.py +0 -158
  364. aiq/runtime/user_metadata.py +0 -130
  365. aiq/settings/__init__.py +0 -0
  366. aiq/settings/global_settings.py +0 -318
  367. aiq/test/.namespace +0 -1
  368. aiq/tool/__init__.py +0 -0
  369. aiq/tool/chat_completion.py +0 -74
  370. aiq/tool/code_execution/README.md +0 -151
  371. aiq/tool/code_execution/__init__.py +0 -0
  372. aiq/tool/code_execution/code_sandbox.py +0 -267
  373. aiq/tool/code_execution/local_sandbox/.gitignore +0 -1
  374. aiq/tool/code_execution/local_sandbox/Dockerfile.sandbox +0 -60
  375. aiq/tool/code_execution/local_sandbox/__init__.py +0 -13
  376. aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +0 -198
  377. aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +0 -6
  378. aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +0 -50
  379. aiq/tool/code_execution/register.py +0 -74
  380. aiq/tool/code_execution/test_code_execution_sandbox.py +0 -414
  381. aiq/tool/code_execution/utils.py +0 -100
  382. aiq/tool/datetime_tools.py +0 -42
  383. aiq/tool/document_search.py +0 -141
  384. aiq/tool/github_tools/__init__.py +0 -0
  385. aiq/tool/github_tools/create_github_commit.py +0 -133
  386. aiq/tool/github_tools/create_github_issue.py +0 -87
  387. aiq/tool/github_tools/create_github_pr.py +0 -106
  388. aiq/tool/github_tools/get_github_file.py +0 -106
  389. aiq/tool/github_tools/get_github_issue.py +0 -166
  390. aiq/tool/github_tools/get_github_pr.py +0 -256
  391. aiq/tool/github_tools/update_github_issue.py +0 -100
  392. aiq/tool/mcp/__init__.py +0 -14
  393. aiq/tool/mcp/exceptions.py +0 -142
  394. aiq/tool/mcp/mcp_client.py +0 -255
  395. aiq/tool/mcp/mcp_tool.py +0 -96
  396. aiq/tool/memory_tools/__init__.py +0 -0
  397. aiq/tool/memory_tools/add_memory_tool.py +0 -79
  398. aiq/tool/memory_tools/delete_memory_tool.py +0 -67
  399. aiq/tool/memory_tools/get_memory_tool.py +0 -72
  400. aiq/tool/nvidia_rag.py +0 -95
  401. aiq/tool/register.py +0 -38
  402. aiq/tool/retriever.py +0 -89
  403. aiq/tool/server_tools.py +0 -66
  404. aiq/utils/__init__.py +0 -0
  405. aiq/utils/data_models/__init__.py +0 -0
  406. aiq/utils/data_models/schema_validator.py +0 -58
  407. aiq/utils/debugging_utils.py +0 -43
  408. aiq/utils/dump_distro_mapping.py +0 -32
  409. aiq/utils/exception_handlers/__init__.py +0 -0
  410. aiq/utils/exception_handlers/automatic_retries.py +0 -289
  411. aiq/utils/exception_handlers/mcp.py +0 -211
  412. aiq/utils/exception_handlers/schemas.py +0 -114
  413. aiq/utils/io/__init__.py +0 -0
  414. aiq/utils/io/model_processing.py +0 -28
  415. aiq/utils/io/yaml_tools.py +0 -119
  416. aiq/utils/log_utils.py +0 -37
  417. aiq/utils/metadata_utils.py +0 -74
  418. aiq/utils/optional_imports.py +0 -142
  419. aiq/utils/producer_consumer_queue.py +0 -178
  420. aiq/utils/reactive/__init__.py +0 -0
  421. aiq/utils/reactive/base/__init__.py +0 -0
  422. aiq/utils/reactive/base/observable_base.py +0 -65
  423. aiq/utils/reactive/base/observer_base.py +0 -55
  424. aiq/utils/reactive/base/subject_base.py +0 -79
  425. aiq/utils/reactive/observable.py +0 -59
  426. aiq/utils/reactive/observer.py +0 -76
  427. aiq/utils/reactive/subject.py +0 -131
  428. aiq/utils/reactive/subscription.py +0 -49
  429. aiq/utils/settings/__init__.py +0 -0
  430. aiq/utils/settings/global_settings.py +0 -197
  431. aiq/utils/string_utils.py +0 -38
  432. aiq/utils/type_converter.py +0 -290
  433. aiq/utils/type_utils.py +0 -484
  434. aiq/utils/url_utils.py +0 -27
  435. aiqtoolkit-1.2.0rc4.dist-info/METADATA +0 -363
  436. aiqtoolkit-1.2.0rc4.dist-info/RECORD +0 -438
  437. aiqtoolkit-1.2.0rc4.dist-info/entry_points.txt +0 -20
  438. aiqtoolkit-1.2.0rc4.dist-info/licenses/LICENSE-3rd-party.txt +0 -3686
  439. aiqtoolkit-1.2.0rc4.dist-info/licenses/LICENSE.md +0 -201
  440. aiqtoolkit-1.2.0rc4.dist-info/top_level.txt +0 -1
  441. {aiqtoolkit-1.2.0rc4.dist-info → aiqtoolkit-1.2.0rc5.dist-info}/WHEEL +0 -0
aiq/eval/evaluate.py DELETED
@@ -1,506 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
-
16
- import asyncio
17
- import logging
18
- import shutil
19
- from pathlib import Path
20
- from typing import Any
21
- from uuid import uuid4
22
-
23
- from pydantic import BaseModel
24
- from tqdm import tqdm
25
-
26
- from aiq.data_models.evaluate import EvalConfig
27
- from aiq.data_models.evaluate import JobEvictionPolicy
28
- from aiq.eval.config import EvaluationRunConfig
29
- from aiq.eval.config import EvaluationRunOutput
30
- from aiq.eval.dataset_handler.dataset_handler import DatasetHandler
31
- from aiq.eval.evaluator.evaluator_model import EvalInput
32
- from aiq.eval.evaluator.evaluator_model import EvalInputItem
33
- from aiq.eval.evaluator.evaluator_model import EvalOutput
34
- from aiq.eval.usage_stats import UsageStats
35
- from aiq.eval.usage_stats import UsageStatsItem
36
- from aiq.eval.usage_stats import UsageStatsLLM
37
- from aiq.eval.utils.output_uploader import OutputUploader
38
- from aiq.eval.utils.weave_eval import WeaveEvaluationIntegration
39
- from aiq.profiler.data_models import ProfilerResults
40
- from aiq.runtime.session import AIQSessionManager
41
-
42
- logger = logging.getLogger(__name__)
43
-
44
-
45
- class EvaluationRun: # pylint: disable=too-many-public-methods
46
- """
47
- Instantiated for each evaluation run and used to store data for that single run.
48
- """
49
-
50
- def __init__(self, config: EvaluationRunConfig):
51
- """
52
- Initialize an EvaluationRun with configuration.
53
- """
54
- from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
55
-
56
- # Run-specific configuration
57
- self.config: EvaluationRunConfig = config
58
- self.eval_config: EvalConfig | None = None
59
-
60
- # Helpers
61
- self.intermediate_step_adapter: IntermediateStepAdapter = IntermediateStepAdapter()
62
- self.weave_eval: WeaveEvaluationIntegration = WeaveEvaluationIntegration()
63
- # Metadata
64
- self.eval_input: EvalInput | None = None
65
- self.workflow_interrupted: bool = False
66
-
67
- # evaluation_results is list of tuples (evaluator_name, EvalOutput)
68
- self.evaluation_results: list[tuple[str, EvalOutput]] = []
69
-
70
- # usage stats
71
- self.usage_stats: UsageStats = UsageStats()
72
-
73
- # workflow output file
74
- self.workflow_output_file: Path | None = None
75
-
76
- # evaluation output files
77
- self.evaluator_output_files: list[Path] = []
78
-
79
- def _compute_usage_stats(self, item: EvalInputItem):
80
- """Compute usage stats for a single item using the intermediate steps"""
81
- # get the prompt and completion tokens from the intermediate steps
82
- from aiq.profiler.intermediate_property_adapter import IntermediatePropertyAdaptor
83
- steps = [IntermediatePropertyAdaptor.from_intermediate_step(step) for step in item.trajectory]
84
- usage_stats_per_llm = {}
85
- total_tokens = 0
86
- for step in steps:
87
- if step.event_type == "LLM_END":
88
- llm_name = step.llm_name
89
- if llm_name not in usage_stats_per_llm:
90
- usage_stats_per_llm[llm_name] = UsageStatsLLM()
91
- usage_stats_per_llm[llm_name].prompt_tokens += step.token_usage.prompt_tokens
92
- usage_stats_per_llm[llm_name].completion_tokens += step.token_usage.completion_tokens
93
- usage_stats_per_llm[llm_name].total_tokens += step.token_usage.total_tokens
94
- total_tokens += step.token_usage.total_tokens
95
-
96
- # find min and max event timestamps
97
- if item.trajectory:
98
- min_timestamp = min(step.event_timestamp for step in item.trajectory)
99
- max_timestamp = max(step.event_timestamp for step in item.trajectory)
100
- runtime = max_timestamp - min_timestamp
101
- else:
102
- min_timestamp = 0.0
103
- max_timestamp = 0.0
104
- runtime = 0.0
105
-
106
- # find llm latency by calculating p95 of all llm calls
107
- llm_latencies = []
108
- previous_llm_start_time = None
109
- for step in steps:
110
- if step.event_type == "LLM_START":
111
- previous_llm_start_time = step.event_timestamp
112
- elif step.event_type == "LLM_END" and previous_llm_start_time is not None:
113
- llm_latencies.append(step.event_timestamp - previous_llm_start_time)
114
- previous_llm_start_time = None
115
-
116
- # Calculate p95 LLM latency (or 0 if no LLM calls)
117
- if llm_latencies:
118
- import numpy as np
119
- llm_latency = float(np.percentile(llm_latencies, 95))
120
- else:
121
- llm_latency = 0.0
122
-
123
- # add the usage stats to the usage stats dict
124
- self.usage_stats.usage_stats_items[item.id] = UsageStatsItem(usage_stats_per_llm=usage_stats_per_llm,
125
- runtime=runtime,
126
- total_tokens=total_tokens,
127
- min_timestamp=min_timestamp,
128
- max_timestamp=max_timestamp,
129
- llm_latency=llm_latency)
130
- return self.usage_stats.usage_stats_items[item.id]
131
-
132
- async def run_workflow_local(self, session_manager: AIQSessionManager):
133
- '''
134
- Launch the workflow with the specified questions and extract the output using the jsonpath
135
- '''
136
- # import function level dependencies
137
- from jsonpath_ng import parse
138
-
139
- from aiq.eval.runtime_event_subscriber import pull_intermediate
140
-
141
- # Run the workflow
142
- jsonpath_expr = parse(self.config.result_json_path)
143
- stop_event = asyncio.Event()
144
-
145
- async def run_one(item: EvalInputItem):
146
- if stop_event.is_set():
147
- return "", []
148
-
149
- async with session_manager.run(item.input_obj) as runner:
150
- if not session_manager.workflow.has_single_output:
151
- # raise an error if the workflow has multiple outputs
152
- raise NotImplementedError("Multiple outputs are not supported")
153
-
154
- runner_result = None
155
- intermediate_future = None
156
-
157
- try:
158
-
159
- # Start usage stats and intermediate steps collection in parallel
160
- intermediate_future = pull_intermediate()
161
- runner_result = runner.result()
162
- base_output = await runner_result
163
- intermediate_steps = await intermediate_future
164
- except NotImplementedError as e:
165
- # raise original error
166
- raise e
167
- except Exception as e:
168
- logger.exception("Failed to run the workflow: %s", e, exc_info=True)
169
- # stop processing if a workflow error occurs
170
- self.workflow_interrupted = True
171
-
172
- # Cancel any coroutines that are still running, avoiding a warning about unawaited coroutines
173
- # (typically one of these two is what raised the exception and the other is still running)
174
- for coro in (runner_result, intermediate_future):
175
- if coro is not None:
176
- asyncio.ensure_future(coro).cancel()
177
-
178
- stop_event.set()
179
- return
180
-
181
- try:
182
- base_output = runner.convert(base_output, to_type=str)
183
- except ValueError:
184
- pass
185
-
186
- # if base_output is a pydantic model dump it to json
187
- if isinstance(base_output, BaseModel):
188
- output = base_output.model_dump_json(indent=2)
189
- else:
190
- m = jsonpath_expr.find(base_output)
191
- if (not m):
192
- raise RuntimeError(f"Failed to extract output using jsonpath: {self.config.result_json_path}")
193
- if (len(m) > 1):
194
- logger.warning("Multiple matches found for jsonpath at row '%s'. Matches: %s. Using the first",
195
- base_output,
196
- m)
197
- output = m[0].value
198
-
199
- item.output_obj = output
200
- item.trajectory = self.intermediate_step_adapter.validate_intermediate_steps(intermediate_steps)
201
- usage_stats_item = self._compute_usage_stats(item)
202
-
203
- self.weave_eval.log_prediction(item, output)
204
- await self.weave_eval.log_usage_stats(item, usage_stats_item)
205
-
206
- async def wrapped_run(item: EvalInputItem) -> None:
207
- await run_one(item)
208
- pbar.update(1)
209
-
210
- # if self.config.skip_complete is set skip eval_input_items with a non-empty output_obj
211
- if self.config.skip_completed_entries:
212
- eval_input_items = [item for item in self.eval_input.eval_input_items if not item.output_obj]
213
- if not eval_input_items:
214
- logger.warning("All items have a non-empty output. Skipping workflow pass altogether.")
215
- return
216
- else:
217
- eval_input_items = self.eval_input.eval_input_items
218
- pbar = tqdm(total=len(eval_input_items), desc="Running workflow")
219
- await asyncio.gather(*[wrapped_run(item) for item in eval_input_items])
220
- pbar.close()
221
-
222
- async def run_workflow_remote(self):
223
- from aiq.eval.remote_workflow import EvaluationRemoteWorkflowHandler
224
- handler = EvaluationRemoteWorkflowHandler(self.config, self.eval_config.general.max_concurrency)
225
- await handler.run_workflow_remote(self.eval_input)
226
- for item in self.eval_input.eval_input_items:
227
- usage_stats_item = self._compute_usage_stats(item)
228
- self.weave_eval.log_prediction(item, item.output_obj)
229
- await self.weave_eval.log_usage_stats(item, usage_stats_item)
230
-
231
- async def profile_workflow(self) -> ProfilerResults:
232
- """
233
- Profile a dataset
234
- """
235
-
236
- if not self.eval_config.general.profiler:
237
- logger.info("Profiler is not enabled. Skipping profiling.")
238
- return ProfilerResults()
239
-
240
- from aiq.profiler.profile_runner import ProfilerRunner
241
-
242
- all_stats = []
243
- for input_item in self.eval_input.eval_input_items:
244
- all_stats.append(input_item.trajectory)
245
-
246
- profiler_runner = ProfilerRunner(self.eval_config.general.profiler,
247
- self.eval_config.general.output_dir,
248
- write_output=self.config.write_output)
249
-
250
- return await profiler_runner.run(all_stats)
251
-
252
- def cleanup_output_directory(self):
253
- '''Remove contents of the output directory if it exists'''
254
- output_config = self.eval_config.general.output
255
- output_dir = output_config.dir
256
-
257
- if not (output_config and output_dir.exists()):
258
- return
259
-
260
- # If cleanup is true, remove the entire directory and we are done
261
- if output_config.cleanup:
262
- logger.info("Cleaning up entire output directory: %s", output_config.dir)
263
- shutil.rmtree(output_config.dir)
264
- return
265
-
266
- if output_config.job_management.max_jobs == 0:
267
- # No eviction policy
268
- return
269
-
270
- base_dir = output_dir / "jobs"
271
- if not base_dir.exists():
272
- return
273
-
274
- # Get all subdirectories, which represent individual job runs
275
- job_dirs = [d for d in base_dir.iterdir() if d.is_dir()]
276
- if len(job_dirs) <= output_config.job_management.max_jobs:
277
- return
278
-
279
- # Determine sort key based on eviction_policy, defaulting to creation time
280
- if output_config.job_management.eviction_policy == JobEvictionPolicy.TIME_MODIFIED:
281
-
282
- def sort_key(x):
283
- return x.stat().st_mtime
284
-
285
- logger.info("Using last modified time for job eviction policy.")
286
- else:
287
-
288
- def sort_key(x):
289
- return x.stat().st_ctime
290
-
291
- logger.info("Using creation time for job eviction policy.")
292
-
293
- # Sort directories (oldest first)
294
- job_dirs.sort(key=sort_key)
295
- num_to_delete = len(job_dirs) - output_config.job_management.max_jobs
296
-
297
- logger.info("Found %d jobs, exceeding limit of %d. Removing %d oldest jobs.",
298
- len(job_dirs),
299
- output_config.job_management.max_jobs,
300
- num_to_delete)
301
-
302
- for dir_to_delete in job_dirs[:num_to_delete]:
303
- try:
304
- logger.info("Deleting old job directory: %s", dir_to_delete)
305
- shutil.rmtree(dir_to_delete)
306
- except Exception as e:
307
- logger.exception("Failed to delete old job directory: %s: %s", dir_to_delete, e, exc_info=True)
308
-
309
- def write_output(self, dataset_handler: DatasetHandler, profiler_results: ProfilerResults):
310
- workflow_output_file = self.eval_config.general.output_dir / "workflow_output.json"
311
- workflow_output_file.parent.mkdir(parents=True, exist_ok=True)
312
-
313
- # Write the workflow output to a file (this can be used for re-running the evaluation)
314
-
315
- step_filter = self.eval_config.general.output.workflow_output_step_filter \
316
- if self.eval_config.general.output else None
317
- workflow_output = dataset_handler.publish_eval_input(self.eval_input, step_filter)
318
- with open(workflow_output_file, "w", encoding="utf-8") as f:
319
- # set indent to 2 for pretty printing
320
- f.write(workflow_output)
321
- self.workflow_output_file = workflow_output_file
322
- logger.info("Workflow output written to %s", workflow_output_file)
323
-
324
- # Write the output of each evaluator to a separate json file
325
- for evaluator_name, eval_output in self.evaluation_results:
326
- output_file = self.eval_config.general.output_dir / f"{evaluator_name}_output.json"
327
- output_file.parent.mkdir(parents=True, exist_ok=True)
328
- # create json content using the evaluation results
329
- output = eval_output.model_dump_json(indent=2)
330
- with open(output_file, "w", encoding="utf-8") as f:
331
- f.write(output)
332
- self.evaluator_output_files.append(output_file)
333
- logger.info("Evaluation results written to %s", output_file)
334
-
335
- def publish_output(self, dataset_handler: DatasetHandler, profiler_results: ProfilerResults):
336
- """Publish the output"""
337
- if self.config.write_output:
338
- self.write_output(dataset_handler, profiler_results)
339
-
340
- if self.workflow_interrupted:
341
- # Issue a warning if the workflow was not completed on all datasets
342
- msg = ("Workflow execution was interrupted due to an error. The results may be incomplete. "
343
- "You can re-execute evaluation for incomplete results by running "
344
- "`eval` with the --skip_completed_entries flag.")
345
- logger.warning(msg)
346
-
347
- self.weave_eval.log_summary(self.usage_stats, self.evaluation_results, profiler_results)
348
-
349
- async def run_single_evaluator(self, evaluator_name: str, evaluator: Any):
350
- """Run a single evaluator and store its results."""
351
- try:
352
- eval_output = await evaluator.evaluate_fn(self.eval_input)
353
- self.evaluation_results.append((evaluator_name, eval_output))
354
-
355
- await self.weave_eval.alog_score(eval_output, evaluator_name)
356
- except Exception as e:
357
- logger.exception("An error occurred while running evaluator %s: %s", evaluator_name, e, exc_info=True)
358
-
359
- async def run_evaluators(self, evaluators: dict[str, Any]):
360
- """Run all configured evaluators asynchronously."""
361
- tasks = [self.run_single_evaluator(name, evaluator) for name, evaluator in evaluators.items() if evaluator]
362
-
363
- if not tasks:
364
- logger.warning("All evaluators were empty or invalid.")
365
- return
366
-
367
- try:
368
- await asyncio.gather(*tasks)
369
- except Exception as e:
370
- logger.exception("An error occurred while running evaluators: %s", e, exc_info=True)
371
- raise
372
- finally:
373
- # Finish prediction loggers in Weave
374
- await self.weave_eval.afinish_loggers()
375
-
376
- def apply_overrides(self):
377
- from aiq.cli.cli_utils.config_override import load_and_override_config
378
- from aiq.data_models.config import AIQConfig
379
- from aiq.runtime.loader import PluginTypes
380
- from aiq.runtime.loader import discover_and_register_plugins
381
- from aiq.utils.data_models.schema_validator import validate_schema
382
-
383
- # Register plugins before validation
384
- discover_and_register_plugins(PluginTypes.CONFIG_OBJECT)
385
-
386
- config_dict = load_and_override_config(self.config.config_file, self.config.override)
387
- config = validate_schema(config_dict, AIQConfig)
388
- return config
389
-
390
- def _get_workflow_alias(self, workflow_type: str | None = None):
391
- """Get the workflow alias for displaying in evaluation UI."""
392
- if self.eval_config.general.workflow_alias:
393
- return self.eval_config.general.workflow_alias
394
-
395
- if not workflow_type or workflow_type == "EmptyFunctionConfig":
396
- return "aiqtoolkit-eval"
397
-
398
- return workflow_type
399
-
400
- async def run_and_evaluate(self,
401
- session_manager: AIQSessionManager | None = None,
402
- job_id: str | None = None) -> EvaluationRunOutput:
403
- """
404
- Run the workflow with the specified config file and evaluate the dataset
405
- """
406
- logger.info("Starting evaluation run with config file: %s", self.config.config_file)
407
-
408
- from aiq.builder.eval_builder import WorkflowEvalBuilder
409
- from aiq.runtime.loader import load_config
410
-
411
- # Load and override the config
412
- if self.config.override:
413
- config = self.apply_overrides()
414
- else:
415
- config = load_config(self.config.config_file)
416
- self.eval_config = config.eval
417
- workflow_alias = self._get_workflow_alias(config.workflow.type)
418
- logger.debug("Loaded %s evaluation configuration: %s", workflow_alias, self.eval_config)
419
-
420
- # Cleanup the output directory
421
- if self.eval_config.general.output:
422
- self.cleanup_output_directory()
423
-
424
- # Generate a job_id if append_job_id_to_output_dir is enabled and no job_id provided
425
- if (self.eval_config.general.output
426
- and self.eval_config.general.output.job_management.append_job_id_to_output_dir and not job_id):
427
- job_id = "job_" + str(uuid4())
428
- logger.info("Generated job ID for output directory: %s", job_id)
429
-
430
- # If a job id is provided keep the data per-job
431
- if job_id:
432
- self.eval_config.general.output_dir = self.eval_config.general.output_dir / f"jobs/{job_id}"
433
- if self.eval_config.general.output:
434
- self.eval_config.general.output.dir = self.eval_config.general.output_dir
435
-
436
- # Load the input dataset
437
- # For multiple datasets, one handler per dataset can be created
438
- dataset_config = self.eval_config.general.dataset # Currently only one dataset is supported
439
- if not dataset_config:
440
- logger.info("No dataset found, nothing to evaluate")
441
- return EvaluationRunOutput(
442
- workflow_output_file=self.workflow_output_file,
443
- evaluator_output_files=self.evaluator_output_files,
444
- workflow_interrupted=self.workflow_interrupted,
445
- )
446
-
447
- dataset_handler = DatasetHandler(dataset_config=dataset_config,
448
- reps=self.config.reps,
449
- concurrency=self.eval_config.general.max_concurrency,
450
- num_passes=self.config.num_passes,
451
- adjust_dataset_size=self.config.adjust_dataset_size)
452
- self.eval_input = dataset_handler.get_eval_input_from_dataset(self.config.dataset)
453
- if not self.eval_input.eval_input_items:
454
- logger.info("Dataset is empty. Nothing to evaluate.")
455
- return EvaluationRunOutput(
456
- workflow_output_file=self.workflow_output_file,
457
- evaluator_output_files=self.evaluator_output_files,
458
- workflow_interrupted=self.workflow_interrupted,
459
- )
460
-
461
- # Run workflow and evaluate
462
- async with WorkflowEvalBuilder.from_config(config=config) as eval_workflow:
463
- # Initialize Weave integration
464
- self.weave_eval.initialize_logger(workflow_alias, self.eval_input, config)
465
-
466
- # Run workflow
467
- if self.config.endpoint:
468
- await self.run_workflow_remote()
469
- else:
470
- if not self.config.skip_workflow:
471
- if session_manager is None:
472
- session_manager = AIQSessionManager(eval_workflow.build(),
473
- max_concurrency=self.eval_config.general.max_concurrency)
474
- await self.run_workflow_local(session_manager)
475
-
476
- # Evaluate
477
- evaluators = {name: eval_workflow.get_evaluator(name) for name in self.eval_config.evaluators}
478
- await self.run_evaluators(evaluators)
479
-
480
- # Profile the workflow
481
- profiler_results = await self.profile_workflow()
482
-
483
- # compute total runtime
484
- if self.usage_stats.usage_stats_items:
485
- self.usage_stats.total_runtime = max(self.usage_stats.usage_stats_items.values(),
486
- key=lambda x: x.max_timestamp).max_timestamp - \
487
- min(self.usage_stats.usage_stats_items.values(), key=lambda x: x.min_timestamp).min_timestamp
488
- else:
489
- self.usage_stats.total_runtime = 0.0
490
-
491
- # Publish the results
492
- self.publish_output(dataset_handler, profiler_results)
493
-
494
- # Run custom scripts and upload evaluation outputs to S3
495
- if self.eval_config.general.output:
496
- output_uploader = OutputUploader(self.eval_config.general.output, job_id=job_id)
497
- output_uploader.run_custom_scripts()
498
- await output_uploader.upload_directory()
499
-
500
- return EvaluationRunOutput(workflow_output_file=self.workflow_output_file,
501
- evaluator_output_files=self.evaluator_output_files,
502
- workflow_interrupted=self.workflow_interrupted,
503
- eval_input=self.eval_input,
504
- evaluation_results=self.evaluation_results,
505
- usage_stats=self.usage_stats,
506
- profiler_results=profiler_results)
@@ -1,14 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
@@ -1,73 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
-
16
- import asyncio
17
- from abc import ABC
18
- from abc import abstractmethod
19
-
20
- from tqdm import tqdm
21
-
22
- from aiq.eval.evaluator.evaluator_model import EvalInput
23
- from aiq.eval.evaluator.evaluator_model import EvalInputItem
24
- from aiq.eval.evaluator.evaluator_model import EvalOutput
25
- from aiq.eval.evaluator.evaluator_model import EvalOutputItem
26
- from aiq.eval.utils.tqdm_position_registry import TqdmPositionRegistry
27
-
28
-
29
- class BaseEvaluator(ABC):
30
- """
31
- Base class for custom evaluators.
32
-
33
- Each custom evaluator must implement the `evaluate_item` method which is used to evaluate a
34
- single EvalInputItem.
35
- """
36
-
37
- def __init__(self, max_concurrency: int = 4, tqdm_desc: str = "Evaluating"):
38
- self.max_concurrency = max_concurrency
39
- self.semaphore = asyncio.Semaphore(max_concurrency)
40
- self.tqdm_desc = tqdm_desc
41
-
42
- @abstractmethod
43
- async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:
44
- """Each evaluator must implement this for item-level evaluation"""
45
- pass
46
-
47
- async def evaluate(self, eval_input: EvalInput) -> EvalOutput:
48
- pbar = None
49
- try:
50
- tqdm_position = TqdmPositionRegistry.claim()
51
- pbar = tqdm(total=len(eval_input.eval_input_items), desc=self.tqdm_desc, position=tqdm_position)
52
-
53
- async def wrapped(item):
54
- async with self.semaphore:
55
- try:
56
- output_item = await self.evaluate_item(item)
57
- pbar.update(1)
58
- return output_item
59
- except Exception as e:
60
- # If the evaluator fails, return an error item with a score of 0.0
61
- pbar.update(1)
62
- return EvalOutputItem(id=item.id, score=0.0, reasoning={"error": f"Evaluator error: {str(e)}"})
63
-
64
- output_items = await asyncio.gather(*[wrapped(item) for item in eval_input.eval_input_items])
65
- finally:
66
- pbar.close()
67
- TqdmPositionRegistry.release(tqdm_position)
68
-
69
- # Compute average if possible
70
- numeric_scores = [item.score for item in output_items if isinstance(item.score, (int, float))]
71
- avg_score = round(sum(numeric_scores) / len(numeric_scores), 2) if numeric_scores else None
72
-
73
- return EvalOutput(average_score=avg_score, eval_output_items=output_items)
@@ -1,45 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
-
16
- import typing
17
-
18
- from pydantic import BaseModel
19
-
20
- from aiq.data_models.intermediate_step import IntermediateStep
21
-
22
-
23
- class EvalInputItem(BaseModel):
24
- id: typing.Any
25
- input_obj: typing.Any
26
- expected_output_obj: typing.Any
27
- output_obj: typing.Any
28
- expected_trajectory: list[IntermediateStep]
29
- trajectory: list[IntermediateStep]
30
- full_dataset_entry: typing.Any
31
-
32
-
33
- class EvalInput(BaseModel):
34
- eval_input_items: list[EvalInputItem]
35
-
36
-
37
- class EvalOutputItem(BaseModel):
38
- id: typing.Any # id or input_obj from EvalInputItem
39
- score: typing.Any # float or any serializable type
40
- reasoning: typing.Any
41
-
42
-
43
- class EvalOutput(BaseModel):
44
- average_score: typing.Any # float or any serializable type
45
- eval_output_items: list[EvalOutputItem]