aiqtoolkit 1.1.0a20250515__py3-none-any.whl → 1.1.0a20251020__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (319) hide show
  1. aiqtoolkit-1.1.0a20251020.dist-info/METADATA +37 -0
  2. aiqtoolkit-1.1.0a20251020.dist-info/RECORD +4 -0
  3. {aiqtoolkit-1.1.0a20250515.dist-info → aiqtoolkit-1.1.0a20251020.dist-info}/WHEEL +1 -1
  4. aiqtoolkit-1.1.0a20251020.dist-info/top_level.txt +1 -0
  5. aiq/agent/__init__.py +0 -0
  6. aiq/agent/base.py +0 -76
  7. aiq/agent/dual_node.py +0 -67
  8. aiq/agent/react_agent/__init__.py +0 -0
  9. aiq/agent/react_agent/agent.py +0 -322
  10. aiq/agent/react_agent/output_parser.py +0 -104
  11. aiq/agent/react_agent/prompt.py +0 -46
  12. aiq/agent/react_agent/register.py +0 -148
  13. aiq/agent/reasoning_agent/__init__.py +0 -0
  14. aiq/agent/reasoning_agent/reasoning_agent.py +0 -224
  15. aiq/agent/register.py +0 -23
  16. aiq/agent/rewoo_agent/__init__.py +0 -0
  17. aiq/agent/rewoo_agent/agent.py +0 -410
  18. aiq/agent/rewoo_agent/prompt.py +0 -108
  19. aiq/agent/rewoo_agent/register.py +0 -158
  20. aiq/agent/tool_calling_agent/__init__.py +0 -0
  21. aiq/agent/tool_calling_agent/agent.py +0 -123
  22. aiq/agent/tool_calling_agent/register.py +0 -105
  23. aiq/builder/__init__.py +0 -0
  24. aiq/builder/builder.py +0 -223
  25. aiq/builder/component_utils.py +0 -303
  26. aiq/builder/context.py +0 -227
  27. aiq/builder/embedder.py +0 -24
  28. aiq/builder/eval_builder.py +0 -120
  29. aiq/builder/evaluator.py +0 -29
  30. aiq/builder/framework_enum.py +0 -24
  31. aiq/builder/front_end.py +0 -73
  32. aiq/builder/function.py +0 -297
  33. aiq/builder/function_base.py +0 -376
  34. aiq/builder/function_info.py +0 -627
  35. aiq/builder/intermediate_step_manager.py +0 -135
  36. aiq/builder/llm.py +0 -25
  37. aiq/builder/retriever.py +0 -25
  38. aiq/builder/user_interaction_manager.py +0 -71
  39. aiq/builder/workflow.py +0 -143
  40. aiq/builder/workflow_builder.py +0 -757
  41. aiq/cli/__init__.py +0 -14
  42. aiq/cli/cli_utils/__init__.py +0 -0
  43. aiq/cli/cli_utils/config_override.py +0 -231
  44. aiq/cli/cli_utils/validation.py +0 -37
  45. aiq/cli/commands/__init__.py +0 -0
  46. aiq/cli/commands/configure/__init__.py +0 -0
  47. aiq/cli/commands/configure/channel/__init__.py +0 -0
  48. aiq/cli/commands/configure/channel/add.py +0 -28
  49. aiq/cli/commands/configure/channel/channel.py +0 -36
  50. aiq/cli/commands/configure/channel/remove.py +0 -30
  51. aiq/cli/commands/configure/channel/update.py +0 -30
  52. aiq/cli/commands/configure/configure.py +0 -33
  53. aiq/cli/commands/evaluate.py +0 -139
  54. aiq/cli/commands/info/__init__.py +0 -14
  55. aiq/cli/commands/info/info.py +0 -39
  56. aiq/cli/commands/info/list_channels.py +0 -32
  57. aiq/cli/commands/info/list_components.py +0 -129
  58. aiq/cli/commands/info/list_mcp.py +0 -126
  59. aiq/cli/commands/registry/__init__.py +0 -14
  60. aiq/cli/commands/registry/publish.py +0 -88
  61. aiq/cli/commands/registry/pull.py +0 -118
  62. aiq/cli/commands/registry/registry.py +0 -38
  63. aiq/cli/commands/registry/remove.py +0 -108
  64. aiq/cli/commands/registry/search.py +0 -155
  65. aiq/cli/commands/start.py +0 -250
  66. aiq/cli/commands/uninstall.py +0 -83
  67. aiq/cli/commands/validate.py +0 -47
  68. aiq/cli/commands/workflow/__init__.py +0 -14
  69. aiq/cli/commands/workflow/templates/__init__.py.j2 +0 -0
  70. aiq/cli/commands/workflow/templates/config.yml.j2 +0 -16
  71. aiq/cli/commands/workflow/templates/pyproject.toml.j2 +0 -22
  72. aiq/cli/commands/workflow/templates/register.py.j2 +0 -5
  73. aiq/cli/commands/workflow/templates/workflow.py.j2 +0 -36
  74. aiq/cli/commands/workflow/workflow.py +0 -37
  75. aiq/cli/commands/workflow/workflow_commands.py +0 -313
  76. aiq/cli/entrypoint.py +0 -133
  77. aiq/cli/main.py +0 -44
  78. aiq/cli/register_workflow.py +0 -408
  79. aiq/cli/type_registry.py +0 -879
  80. aiq/data_models/__init__.py +0 -14
  81. aiq/data_models/api_server.py +0 -588
  82. aiq/data_models/common.py +0 -143
  83. aiq/data_models/component.py +0 -46
  84. aiq/data_models/component_ref.py +0 -135
  85. aiq/data_models/config.py +0 -349
  86. aiq/data_models/dataset_handler.py +0 -122
  87. aiq/data_models/discovery_metadata.py +0 -286
  88. aiq/data_models/embedder.py +0 -26
  89. aiq/data_models/evaluate.py +0 -104
  90. aiq/data_models/evaluator.py +0 -26
  91. aiq/data_models/front_end.py +0 -26
  92. aiq/data_models/function.py +0 -30
  93. aiq/data_models/function_dependencies.py +0 -64
  94. aiq/data_models/interactive.py +0 -237
  95. aiq/data_models/intermediate_step.py +0 -269
  96. aiq/data_models/invocation_node.py +0 -38
  97. aiq/data_models/llm.py +0 -26
  98. aiq/data_models/logging.py +0 -26
  99. aiq/data_models/memory.py +0 -26
  100. aiq/data_models/profiler.py +0 -53
  101. aiq/data_models/registry_handler.py +0 -26
  102. aiq/data_models/retriever.py +0 -30
  103. aiq/data_models/step_adaptor.py +0 -64
  104. aiq/data_models/streaming.py +0 -33
  105. aiq/data_models/swe_bench_model.py +0 -54
  106. aiq/data_models/telemetry_exporter.py +0 -26
  107. aiq/embedder/__init__.py +0 -0
  108. aiq/embedder/langchain_client.py +0 -41
  109. aiq/embedder/nim_embedder.py +0 -58
  110. aiq/embedder/openai_embedder.py +0 -42
  111. aiq/embedder/register.py +0 -24
  112. aiq/eval/__init__.py +0 -14
  113. aiq/eval/config.py +0 -42
  114. aiq/eval/dataset_handler/__init__.py +0 -0
  115. aiq/eval/dataset_handler/dataset_downloader.py +0 -106
  116. aiq/eval/dataset_handler/dataset_filter.py +0 -52
  117. aiq/eval/dataset_handler/dataset_handler.py +0 -169
  118. aiq/eval/evaluate.py +0 -325
  119. aiq/eval/evaluator/__init__.py +0 -14
  120. aiq/eval/evaluator/evaluator_model.py +0 -44
  121. aiq/eval/intermediate_step_adapter.py +0 -93
  122. aiq/eval/rag_evaluator/__init__.py +0 -0
  123. aiq/eval/rag_evaluator/evaluate.py +0 -138
  124. aiq/eval/rag_evaluator/register.py +0 -138
  125. aiq/eval/register.py +0 -23
  126. aiq/eval/remote_workflow.py +0 -128
  127. aiq/eval/runtime_event_subscriber.py +0 -52
  128. aiq/eval/swe_bench_evaluator/__init__.py +0 -0
  129. aiq/eval/swe_bench_evaluator/evaluate.py +0 -215
  130. aiq/eval/swe_bench_evaluator/register.py +0 -36
  131. aiq/eval/trajectory_evaluator/__init__.py +0 -0
  132. aiq/eval/trajectory_evaluator/evaluate.py +0 -118
  133. aiq/eval/trajectory_evaluator/register.py +0 -40
  134. aiq/eval/tunable_rag_evaluator/__init__.py +0 -0
  135. aiq/eval/tunable_rag_evaluator/evaluate.py +0 -263
  136. aiq/eval/tunable_rag_evaluator/register.py +0 -50
  137. aiq/eval/utils/__init__.py +0 -0
  138. aiq/eval/utils/output_uploader.py +0 -131
  139. aiq/eval/utils/tqdm_position_registry.py +0 -40
  140. aiq/front_ends/__init__.py +0 -14
  141. aiq/front_ends/console/__init__.py +0 -14
  142. aiq/front_ends/console/console_front_end_config.py +0 -32
  143. aiq/front_ends/console/console_front_end_plugin.py +0 -107
  144. aiq/front_ends/console/register.py +0 -25
  145. aiq/front_ends/cron/__init__.py +0 -14
  146. aiq/front_ends/fastapi/__init__.py +0 -14
  147. aiq/front_ends/fastapi/fastapi_front_end_config.py +0 -150
  148. aiq/front_ends/fastapi/fastapi_front_end_plugin.py +0 -103
  149. aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +0 -607
  150. aiq/front_ends/fastapi/intermediate_steps_subscriber.py +0 -80
  151. aiq/front_ends/fastapi/job_store.py +0 -161
  152. aiq/front_ends/fastapi/main.py +0 -70
  153. aiq/front_ends/fastapi/message_handler.py +0 -279
  154. aiq/front_ends/fastapi/message_validator.py +0 -345
  155. aiq/front_ends/fastapi/register.py +0 -25
  156. aiq/front_ends/fastapi/response_helpers.py +0 -195
  157. aiq/front_ends/fastapi/step_adaptor.py +0 -320
  158. aiq/front_ends/fastapi/websocket.py +0 -148
  159. aiq/front_ends/mcp/__init__.py +0 -14
  160. aiq/front_ends/mcp/mcp_front_end_config.py +0 -32
  161. aiq/front_ends/mcp/mcp_front_end_plugin.py +0 -93
  162. aiq/front_ends/mcp/register.py +0 -27
  163. aiq/front_ends/mcp/tool_converter.py +0 -242
  164. aiq/front_ends/register.py +0 -22
  165. aiq/front_ends/simple_base/__init__.py +0 -14
  166. aiq/front_ends/simple_base/simple_front_end_plugin_base.py +0 -52
  167. aiq/llm/__init__.py +0 -0
  168. aiq/llm/nim_llm.py +0 -45
  169. aiq/llm/openai_llm.py +0 -45
  170. aiq/llm/register.py +0 -22
  171. aiq/llm/utils/__init__.py +0 -14
  172. aiq/llm/utils/env_config_value.py +0 -94
  173. aiq/llm/utils/error.py +0 -17
  174. aiq/memory/__init__.py +0 -20
  175. aiq/memory/interfaces.py +0 -183
  176. aiq/memory/models.py +0 -112
  177. aiq/meta/module_to_distro.json +0 -3
  178. aiq/meta/pypi.md +0 -58
  179. aiq/observability/__init__.py +0 -0
  180. aiq/observability/async_otel_listener.py +0 -429
  181. aiq/observability/register.py +0 -99
  182. aiq/plugins/.namespace +0 -1
  183. aiq/profiler/__init__.py +0 -0
  184. aiq/profiler/callbacks/__init__.py +0 -0
  185. aiq/profiler/callbacks/agno_callback_handler.py +0 -295
  186. aiq/profiler/callbacks/base_callback_class.py +0 -20
  187. aiq/profiler/callbacks/langchain_callback_handler.py +0 -278
  188. aiq/profiler/callbacks/llama_index_callback_handler.py +0 -205
  189. aiq/profiler/callbacks/semantic_kernel_callback_handler.py +0 -238
  190. aiq/profiler/callbacks/token_usage_base_model.py +0 -27
  191. aiq/profiler/data_frame_row.py +0 -51
  192. aiq/profiler/decorators/__init__.py +0 -0
  193. aiq/profiler/decorators/framework_wrapper.py +0 -131
  194. aiq/profiler/decorators/function_tracking.py +0 -254
  195. aiq/profiler/forecasting/__init__.py +0 -0
  196. aiq/profiler/forecasting/config.py +0 -18
  197. aiq/profiler/forecasting/model_trainer.py +0 -75
  198. aiq/profiler/forecasting/models/__init__.py +0 -22
  199. aiq/profiler/forecasting/models/forecasting_base_model.py +0 -40
  200. aiq/profiler/forecasting/models/linear_model.py +0 -196
  201. aiq/profiler/forecasting/models/random_forest_regressor.py +0 -268
  202. aiq/profiler/inference_metrics_model.py +0 -25
  203. aiq/profiler/inference_optimization/__init__.py +0 -0
  204. aiq/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
  205. aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +0 -452
  206. aiq/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +0 -258
  207. aiq/profiler/inference_optimization/data_models.py +0 -386
  208. aiq/profiler/inference_optimization/experimental/__init__.py +0 -0
  209. aiq/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +0 -468
  210. aiq/profiler/inference_optimization/experimental/prefix_span_analysis.py +0 -405
  211. aiq/profiler/inference_optimization/llm_metrics.py +0 -212
  212. aiq/profiler/inference_optimization/prompt_caching.py +0 -163
  213. aiq/profiler/inference_optimization/token_uniqueness.py +0 -107
  214. aiq/profiler/inference_optimization/workflow_runtimes.py +0 -72
  215. aiq/profiler/intermediate_property_adapter.py +0 -102
  216. aiq/profiler/profile_runner.py +0 -433
  217. aiq/profiler/utils.py +0 -184
  218. aiq/registry_handlers/__init__.py +0 -0
  219. aiq/registry_handlers/local/__init__.py +0 -0
  220. aiq/registry_handlers/local/local_handler.py +0 -176
  221. aiq/registry_handlers/local/register_local.py +0 -37
  222. aiq/registry_handlers/metadata_factory.py +0 -60
  223. aiq/registry_handlers/package_utils.py +0 -198
  224. aiq/registry_handlers/pypi/__init__.py +0 -0
  225. aiq/registry_handlers/pypi/pypi_handler.py +0 -251
  226. aiq/registry_handlers/pypi/register_pypi.py +0 -40
  227. aiq/registry_handlers/register.py +0 -21
  228. aiq/registry_handlers/registry_handler_base.py +0 -157
  229. aiq/registry_handlers/rest/__init__.py +0 -0
  230. aiq/registry_handlers/rest/register_rest.py +0 -56
  231. aiq/registry_handlers/rest/rest_handler.py +0 -237
  232. aiq/registry_handlers/schemas/__init__.py +0 -0
  233. aiq/registry_handlers/schemas/headers.py +0 -42
  234. aiq/registry_handlers/schemas/package.py +0 -68
  235. aiq/registry_handlers/schemas/publish.py +0 -63
  236. aiq/registry_handlers/schemas/pull.py +0 -82
  237. aiq/registry_handlers/schemas/remove.py +0 -36
  238. aiq/registry_handlers/schemas/search.py +0 -91
  239. aiq/registry_handlers/schemas/status.py +0 -47
  240. aiq/retriever/__init__.py +0 -0
  241. aiq/retriever/interface.py +0 -37
  242. aiq/retriever/milvus/__init__.py +0 -14
  243. aiq/retriever/milvus/register.py +0 -81
  244. aiq/retriever/milvus/retriever.py +0 -228
  245. aiq/retriever/models.py +0 -74
  246. aiq/retriever/nemo_retriever/__init__.py +0 -14
  247. aiq/retriever/nemo_retriever/register.py +0 -60
  248. aiq/retriever/nemo_retriever/retriever.py +0 -190
  249. aiq/retriever/register.py +0 -22
  250. aiq/runtime/__init__.py +0 -14
  251. aiq/runtime/loader.py +0 -188
  252. aiq/runtime/runner.py +0 -176
  253. aiq/runtime/session.py +0 -140
  254. aiq/runtime/user_metadata.py +0 -131
  255. aiq/settings/__init__.py +0 -0
  256. aiq/settings/global_settings.py +0 -318
  257. aiq/test/.namespace +0 -1
  258. aiq/tool/__init__.py +0 -0
  259. aiq/tool/code_execution/__init__.py +0 -0
  260. aiq/tool/code_execution/code_sandbox.py +0 -188
  261. aiq/tool/code_execution/local_sandbox/Dockerfile.sandbox +0 -60
  262. aiq/tool/code_execution/local_sandbox/__init__.py +0 -13
  263. aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +0 -83
  264. aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +0 -4
  265. aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +0 -25
  266. aiq/tool/code_execution/register.py +0 -70
  267. aiq/tool/code_execution/utils.py +0 -100
  268. aiq/tool/datetime_tools.py +0 -42
  269. aiq/tool/document_search.py +0 -141
  270. aiq/tool/github_tools/__init__.py +0 -0
  271. aiq/tool/github_tools/create_github_commit.py +0 -133
  272. aiq/tool/github_tools/create_github_issue.py +0 -87
  273. aiq/tool/github_tools/create_github_pr.py +0 -106
  274. aiq/tool/github_tools/get_github_file.py +0 -106
  275. aiq/tool/github_tools/get_github_issue.py +0 -166
  276. aiq/tool/github_tools/get_github_pr.py +0 -256
  277. aiq/tool/github_tools/update_github_issue.py +0 -100
  278. aiq/tool/mcp/__init__.py +0 -14
  279. aiq/tool/mcp/mcp_client.py +0 -220
  280. aiq/tool/mcp/mcp_tool.py +0 -95
  281. aiq/tool/memory_tools/__init__.py +0 -0
  282. aiq/tool/memory_tools/add_memory_tool.py +0 -79
  283. aiq/tool/memory_tools/delete_memory_tool.py +0 -67
  284. aiq/tool/memory_tools/get_memory_tool.py +0 -72
  285. aiq/tool/nvidia_rag.py +0 -95
  286. aiq/tool/register.py +0 -37
  287. aiq/tool/retriever.py +0 -89
  288. aiq/tool/server_tools.py +0 -63
  289. aiq/utils/__init__.py +0 -0
  290. aiq/utils/data_models/__init__.py +0 -0
  291. aiq/utils/data_models/schema_validator.py +0 -58
  292. aiq/utils/debugging_utils.py +0 -43
  293. aiq/utils/exception_handlers/__init__.py +0 -0
  294. aiq/utils/exception_handlers/schemas.py +0 -114
  295. aiq/utils/io/__init__.py +0 -0
  296. aiq/utils/io/yaml_tools.py +0 -119
  297. aiq/utils/metadata_utils.py +0 -74
  298. aiq/utils/optional_imports.py +0 -142
  299. aiq/utils/producer_consumer_queue.py +0 -178
  300. aiq/utils/reactive/__init__.py +0 -0
  301. aiq/utils/reactive/base/__init__.py +0 -0
  302. aiq/utils/reactive/base/observable_base.py +0 -65
  303. aiq/utils/reactive/base/observer_base.py +0 -55
  304. aiq/utils/reactive/base/subject_base.py +0 -79
  305. aiq/utils/reactive/observable.py +0 -59
  306. aiq/utils/reactive/observer.py +0 -76
  307. aiq/utils/reactive/subject.py +0 -131
  308. aiq/utils/reactive/subscription.py +0 -49
  309. aiq/utils/settings/__init__.py +0 -0
  310. aiq/utils/settings/global_settings.py +0 -197
  311. aiq/utils/type_converter.py +0 -232
  312. aiq/utils/type_utils.py +0 -397
  313. aiq/utils/url_utils.py +0 -27
  314. aiqtoolkit-1.1.0a20250515.dist-info/METADATA +0 -331
  315. aiqtoolkit-1.1.0a20250515.dist-info/RECORD +0 -316
  316. aiqtoolkit-1.1.0a20250515.dist-info/entry_points.txt +0 -17
  317. aiqtoolkit-1.1.0a20250515.dist-info/licenses/LICENSE-3rd-party.txt +0 -3686
  318. aiqtoolkit-1.1.0a20250515.dist-info/licenses/LICENSE.md +0 -201
  319. aiqtoolkit-1.1.0a20250515.dist-info/top_level.txt +0 -1
@@ -1,433 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
-
16
- import json
17
- import logging
18
- import math
19
- import os
20
- import statistics
21
- from pathlib import Path
22
- from typing import Any
23
-
24
- from pydantic import BaseModel
25
-
26
- from aiq.data_models.evaluate import ProfilerConfig
27
- from aiq.data_models.intermediate_step import IntermediateStep
28
- from aiq.profiler.forecasting.model_trainer import ModelTrainer
29
- from aiq.profiler.inference_metrics_model import InferenceMetricsModel
30
- from aiq.profiler.utils import create_standardized_dataframe
31
- from aiq.utils.type_converter import TypeConverter
32
-
33
- logger = logging.getLogger(__name__)
34
-
35
-
36
- class SimpleMetricsHolder(BaseModel):
37
- workflow_run_time_confidence_intervals: Any
38
- llm_latency_confidence_intervals: Any
39
- throughput_estimate_confidence_interval: Any
40
-
41
-
42
- class InferenceOptimizationHolder(BaseModel):
43
- confidence_intervals: SimpleMetricsHolder
44
- common_prefixes: Any
45
- token_uniqueness: Any
46
- workflow_runtimes: Any
47
-
48
-
49
- class ProfilerRunner:
50
- """
51
- A utility to run a series of prompts through an AIQ Toolkit workflow for profiling:
52
-
53
- - can load prompts from a file
54
- - or generate them via an LLM
55
- - collect usage stats for each run
56
- - store them in a configured directory
57
-
58
- Updated version with additional metrics:
59
-
60
- - For each request, we collect a list of UsageStatistic objects, store them individually,
61
- and also keep a final large JSON of all requests.
62
- - We then compute:
63
- 1. 90, 95, 99% confidence intervals for the mean total workflow run time.
64
- 2. 90, 95, 99% confidence intervals for the mean LLM latency.
65
- 3. 90, 95, 99% estimates of throughput.
66
-
67
- All computed metrics are saved to a metrics JSON file at the end.
68
- """
69
-
70
- def __init__(self, profiler_config: ProfilerConfig, output_dir: Path):
71
- self.profile_config = profiler_config
72
- self.output_dir = output_dir
73
- self._converter = TypeConverter([])
74
-
75
- # Holds per-request data (prompt, output, usage_stats, etc.)
76
- # This will be saved at the end to a big JSON file
77
- self.all_requests_data: list[dict] = []
78
- self.all_steps = []
79
-
80
- # Ensure output directory
81
- os.makedirs(output_dir, exist_ok=True)
82
-
83
- async def run(self, all_steps: list[list[IntermediateStep]]):
84
- """
85
- Main entrypoint: Works on Input DataFrame generated from eval to fit forecasting model,
86
- writes out combined requests JSON, then computes and saves additional metrics,
87
- and optionally fits a forecasting model.
88
- """
89
- from aiq.profiler.inference_optimization.bottleneck_analysis.nested_stack_analysis import \
90
- multi_example_call_profiling
91
- from aiq.profiler.inference_optimization.bottleneck_analysis.simple_stack_analysis import \
92
- profile_workflow_bottlenecks
93
- from aiq.profiler.inference_optimization.experimental.concurrency_spike_analysis import \
94
- concurrency_spike_analysis
95
- from aiq.profiler.inference_optimization.experimental.prefix_span_analysis import \
96
- prefixspan_subworkflow_with_text
97
- from aiq.profiler.inference_optimization.llm_metrics import LLMMetrics
98
- from aiq.profiler.inference_optimization.prompt_caching import get_common_prefixes
99
- from aiq.profiler.inference_optimization.token_uniqueness import compute_inter_query_token_uniqueness_by_llm
100
- from aiq.profiler.inference_optimization.workflow_runtimes import compute_workflow_runtime_metrics
101
- from aiq.profiler.intermediate_property_adapter import IntermediatePropertyAdaptor
102
-
103
- # Convert the incoming DataFrame to a list of dicts and store
104
- all_steps = [[IntermediatePropertyAdaptor.from_intermediate_step(step) for step in steps]
105
- for steps in all_steps] # Add adapter properties to each step
106
-
107
- self.all_steps = all_steps
108
- self.all_requests_data = []
109
- for i, steps in enumerate(all_steps):
110
- request_data = []
111
- for step in steps:
112
- request_data.append(step.model_dump())
113
- self.all_requests_data.append({"request_number": i, "intermediate_steps": request_data})
114
-
115
- # Write the final big JSON (all requests)
116
- final_path = os.path.join(self.output_dir, "all_requests_profiler_traces.json")
117
- with open(final_path, 'w', encoding='utf-8') as f:
118
- json.dump(self.all_requests_data, f, indent=2, default=str)
119
- logger.info("Wrote combined data to: %s", final_path)
120
-
121
- # ------------------------------------------------------------
122
- # Generate one standardized dataframe for all usage stats
123
- # ------------------------------------------------------------
124
- merged_df = create_standardized_dataframe(all_steps)
125
-
126
- if self.profile_config.compute_llm_metrics and not merged_df.empty:
127
- merged_df = LLMMetrics.compute_profiling_metrics(all_steps)
128
-
129
- output_df = merged_df.copy()
130
-
131
- if self.profile_config.csv_exclude_io_text and not output_df.empty:
132
- # Exclude text fields from CSV
133
- output_df = output_df.drop(columns=['llm_text_input', 'llm_text_output', 'llm_new_token'])
134
-
135
- # Write this single CSV
136
- csv_path = os.path.join(self.output_dir, "standardized_data_all.csv")
137
- output_df.to_csv(csv_path, index=False, encoding='utf-8')
138
- logger.info("Wrote merged standardized DataFrame to %s", csv_path)
139
-
140
- # ------------------------------------------------------------
141
- # Compute and save additional performance metrics
142
- # ------------------------------------------------------------
143
- workflow_run_time_ci: InferenceMetricsModel = self._compute_workflow_run_time_confidence_intervals()
144
-
145
- # 2. 90, 95, 99% confidence intervals of mean LLM latency
146
- llm_latency_ci: InferenceMetricsModel = self._compute_llm_latency_confidence_intervals()
147
-
148
- # 3. 90, 95, 99% estimates of throughput
149
- throughput_ci: InferenceMetricsModel = self._compute_throughput_estimates()
150
-
151
- # Collect all computed metrics
152
- simple_metrics = SimpleMetricsHolder(workflow_run_time_confidence_intervals=workflow_run_time_ci.model_dump(),
153
- llm_latency_confidence_intervals=llm_latency_ci.model_dump(),
154
- throughput_estimate_confidence_interval=throughput_ci.model_dump())
155
-
156
- common_prefix_results = token_uniqueness_results = workflow_runtimes_results = None
157
-
158
- if self.profile_config.prompt_caching_prefixes.enable:
159
- # ------------------------------------------------------------
160
- # Compute and save common prefixes
161
- # ------------------------------------------------------------
162
-
163
- prefixes = get_common_prefixes(all_steps, self.profile_config.prompt_caching_prefixes.min_frequency)
164
- common_prefix_results = prefixes
165
-
166
- if self.profile_config.token_uniqueness_forecast:
167
- # ------------------------------------------------------------
168
- # Compute and save inter-query token uniqueness
169
- # ------------------------------------------------------------
170
-
171
- uniqueness = compute_inter_query_token_uniqueness_by_llm(all_steps)
172
- token_uniqueness_results = uniqueness
173
-
174
- if self.profile_config.workflow_runtime_forecast:
175
- # ------------------------------------------------------------
176
- # Compute and save workflow runtime metrics
177
- # ------------------------------------------------------------
178
-
179
- workflow_runtimes = compute_workflow_runtime_metrics(all_steps)
180
- workflow_runtimes_results = workflow_runtimes
181
-
182
- inference_optimization_results = InferenceOptimizationHolder(confidence_intervals=simple_metrics,
183
- common_prefixes=common_prefix_results,
184
- token_uniqueness=token_uniqueness_results,
185
- workflow_runtimes=workflow_runtimes_results)
186
-
187
- if inference_optimization_results:
188
- # Save to JSON
189
- optimization_results_path = os.path.join(self.output_dir, "inference_optimization.json")
190
- with open(optimization_results_path, 'w', encoding='utf-8') as f:
191
- json.dump(inference_optimization_results.model_dump(), f, indent=2)
192
- logger.info("Wrote inference optimization results to: %s", optimization_results_path)
193
-
194
- workflow_profiling_reports = ""
195
- workflow_profiling_metrics = {}
196
-
197
- if self.profile_config.bottleneck_analysis.enable_simple_stack:
198
- # ------------------------------------------------------------
199
- # Profile workflow bottlenecks
200
- # ------------------------------------------------------------
201
-
202
- workflow_bottlenecks = profile_workflow_bottlenecks(all_steps)
203
- workflow_bottlenecks = workflow_bottlenecks.model_dump()
204
- workflow_profiling_reports += "\n\n\n" + workflow_bottlenecks["summary"]
205
- workflow_profiling_metrics["simple_stack_analysis"] = workflow_bottlenecks["stats"]
206
- logger.info("Simple stack analysis complete")
207
-
208
- if self.profile_config.bottleneck_analysis.enable_nested_stack:
209
- # ------------------------------------------------------------
210
- # Profile workflow bottlenecks with nested stack analysis
211
- # ------------------------------------------------------------
212
- nested_bottlenecks = multi_example_call_profiling(all_steps, output_dir=str(self.output_dir))
213
- workflow_profiling_reports += "\n\n\n" + nested_bottlenecks.textual_report
214
- workflow_profiling_metrics["nested_stack_analysis"] = nested_bottlenecks.model_dump(
215
- exclude=["textual_report"])
216
- logger.info("Nested stack analysis complete")
217
-
218
- if self.profile_config.concurrency_spike_analysis.enable:
219
- # ------------------------------------------------------------
220
- # Profile concurrency spikes
221
- # ------------------------------------------------------------
222
- concurrency_metrics = concurrency_spike_analysis(
223
- all_steps, self.profile_config.concurrency_spike_analysis.spike_threshold)
224
- workflow_profiling_reports += "\n\n\n" + concurrency_metrics.textual_report
225
- workflow_profiling_metrics["concurrency_spike_analysis"] = concurrency_metrics.model_dump(
226
- exclude=["textual_report"])
227
- logger.info("Concurrency spike analysis complete")
228
-
229
- if self.profile_config.prefix_span_analysis.enable:
230
- # ------------------------------------------------------------
231
- # Profile prefix span analysis
232
- # ------------------------------------------------------------
233
- prefix_list = []
234
- if (self.profile_config.prefix_span_analysis.chain_with_common_prefixes
235
- and "common_prefixes" in inference_optimization_results):
236
- logger.info("Using common prefixes for prefix span analysis")
237
- for _, llm_data in inference_optimization_results["common_prefixes"].items():
238
- for prefix_data in llm_data["prefix_info"]:
239
- prefix_list.append(prefix_data["prefix"])
240
-
241
- prefix_span_analysis = prefixspan_subworkflow_with_text(
242
- all_steps,
243
- **self.profile_config.prefix_span_analysis.model_dump(exclude=["enable", "chain_with_common_prefixes"]),
244
- prefix_list=prefix_list)
245
-
246
- workflow_profiling_reports += "\n\n\n" + prefix_span_analysis.textual_report
247
- workflow_profiling_metrics["prefix_span_analysis"] = prefix_span_analysis.model_dump(
248
- exclude=["textual_report"])
249
- logger.info("Prefix span analysis complete")
250
-
251
- if workflow_profiling_reports:
252
- # Save to text file
253
- profiling_report_path = os.path.join(self.output_dir, "workflow_profiling_report.txt")
254
- with open(profiling_report_path, 'w', encoding='utf-8') as f:
255
- f.write(workflow_profiling_reports)
256
- logger.info("Wrote workflow profiling report to: %s", profiling_report_path)
257
-
258
- if workflow_profiling_metrics:
259
- # Save to JSON
260
- profiling_metrics_path = os.path.join(self.output_dir, "workflow_profiling_metrics.json")
261
- with open(profiling_metrics_path, 'w', encoding='utf-8') as f:
262
- json.dump(workflow_profiling_metrics, f, indent=2)
263
- logger.info("Wrote workflow profiling metrics to: %s", profiling_metrics_path)
264
-
265
- if self.profile_config.token_usage_forecast:
266
- # ------------------------------------------------------------
267
- # Fit forecasting model and save
268
- # ------------------------------------------------------------
269
-
270
- logger.info("Fitting model for forecasting.")
271
- model_trainer = ModelTrainer()
272
-
273
- try:
274
- fitted_model = model_trainer.train(all_steps)
275
- logger.info("Fitted model for forecasting.")
276
- except Exception as e:
277
- logger.exception("Fitting model failed. %s", e, exc_info=True)
278
- return
279
-
280
- os.makedirs(self.output_dir, exist_ok=True)
281
-
282
- import pickle
283
- with open(os.path.join(self.output_dir, "fitted_model.pkl"), 'wb') as f:
284
- pickle.dump(fitted_model, f)
285
-
286
- logger.info("Saved fitted model to disk.")
287
-
288
- # -------------------------------------------------------------------
289
- # Confidence Intervals / Metrics
290
- # -------------------------------------------------------------------
291
- def _compute_workflow_run_time_confidence_intervals(self) -> InferenceMetricsModel:
292
- """
293
- Computes 90, 95, 99% confidence intervals for the mean total workflow run time (in seconds).
294
- The total workflow run time for each request is the difference between the last and first
295
- event timestamps in usage_stats.
296
- """
297
- run_times = []
298
- for req_data in self.all_steps:
299
- # Find the min and max event_timestamp
300
- timestamps = [u.event_timestamp for u in req_data]
301
- if not timestamps:
302
- continue
303
-
304
- start_time = min(timestamps)
305
- end_time = max(timestamps)
306
- run_times.append(end_time - start_time)
307
-
308
- return self._compute_confidence_intervals(run_times, "Workflow Run Time")
309
-
310
- def _compute_llm_latency_confidence_intervals(self) -> InferenceMetricsModel:
311
- """
312
- Computes 90, 95, 99% confidence intervals for the mean LLM latency.
313
- LLM latency is defined as the difference between an LLM_END event_timestamp and
314
- the immediately preceding LLM_START event_timestamp, across all usage_stats.
315
- """
316
- latencies = []
317
- for req_data in self.all_steps:
318
-
319
- usage_stats_sorted = sorted(req_data, key=lambda x: x.event_timestamp)
320
-
321
- previous_llm_start_time = None
322
- for u in usage_stats_sorted:
323
- event_type = u.event_type.value
324
- ts = u.event_timestamp
325
- if event_type == "LLM_START":
326
- previous_llm_start_time = ts
327
- elif event_type == "LLM_END" and previous_llm_start_time is not None:
328
- latencies.append(ts - previous_llm_start_time)
329
- previous_llm_start_time = None
330
-
331
- return self._compute_confidence_intervals(latencies, "LLM Latency")
332
-
333
- def _compute_throughput_estimates(self) -> InferenceMetricsModel:
334
- """
335
- Computes 90, 95, 99% confidence intervals for throughput, defined as:
336
-
337
- | throughput = (total number of requests) / (total time window),
338
-
339
- where total time window is from the earliest usage_stats event across all requests
340
- to the latest usage_stats event.
341
- Note: This is a simple approximate measure of overall throughput for the entire run.
342
- """
343
- # Gather min timestamp and max timestamp across ALL requests
344
- all_timestamps = []
345
- for req_data in self.all_steps:
346
- for u in req_data:
347
- all_timestamps.append(u.event_timestamp)
348
-
349
- if not all_timestamps:
350
- return InferenceMetricsModel()
351
-
352
- min_ts = min(all_timestamps)
353
- max_ts = max(all_timestamps)
354
- total_time = max_ts - min_ts
355
- if total_time <= 0:
356
- # Can't compute a meaningful throughput if time <= 0
357
- return InferenceMetricsModel()
358
-
359
- total_requests = len(self.all_requests_data)
360
- # Single estimate of throughput
361
- throughput_value = total_requests / total_time
362
-
363
- # For confidence intervals of throughput, we do a simplistic assumption:
364
- # We treat each request's contribution as 1 occurrence, and approximate
365
- # the distribution as if these arrivals were uniform. This is quite simplified.
366
- # We can compute a standard error: SE = sqrt(throughput_value / total_time)
367
- # However, a more accurate approach might require a different method (e.g., Poisson).
368
- # We'll do a naive normal approximation here.
369
-
370
- # We'll guess that the standard deviation of #requests is sqrt(N), so stdev_n ~ sqrt(N).
371
- # stdev_time is quite small though. We'll do a naive approach:
372
- # We'll treat the throughput as a sample mean with n=total_requests.
373
- # Then standard error is (throughput_value / sqrt(n)).
374
- # This is purely heuristic.
375
- n = total_requests
376
- if n <= 1:
377
- return InferenceMetricsModel()
378
-
379
- # A rough standard error for throughput:
380
- standard_error = throughput_value / math.sqrt(n)
381
-
382
- # Build confidence intervals using z-scores for 90%, 95%, 99%
383
- intervals = {'n': total_requests, 'mean': throughput_value}
384
- for confidence, zvalue in \
385
- [("ninetieth_interval", 1.645), ("ninety_fifth_interval", 1.96), ("ninety_ninth_interval", 2.576)]:
386
- ci_lower = throughput_value - zvalue * standard_error
387
- ci_upper = throughput_value + zvalue * standard_error
388
- intervals[confidence] = (max(ci_lower, 0.0), ci_upper)
389
-
390
- return InferenceMetricsModel(**intervals)
391
-
392
- def _compute_confidence_intervals(self, data: list[float], metric_name: str) -> InferenceMetricsModel:
393
- """
394
- Helper to compute 90, 95, 99% confidence intervals for the mean of a dataset.
395
- Uses a z-score from the normal approximation for large samples.
396
-
397
- Returns a dict like::
398
-
399
- {
400
- 'ninetieth_interval': (lower, upper),
401
- 'ninety_fifth_interval': (lower, upper),
402
- 'ninety_ninth_interval': (lower, upper),
403
- }
404
- """
405
- if not data:
406
- logger.warning("No data points for %s, cannot compute intervals.", metric_name)
407
- return InferenceMetricsModel()
408
-
409
- n = len(data)
410
- mean_val = statistics.mean(data)
411
- if n <= 1:
412
- return InferenceMetricsModel(n=n,
413
- mean=mean_val,
414
- ninetieth_interval=(mean_val, mean_val),
415
- ninety_fifth_interval=(mean_val, mean_val),
416
- ninety_ninth_interval=(mean_val, mean_val))
417
-
418
- stdev_val = statistics.pstdev(data) # population stdev or use stdev for sample
419
- # standard error
420
- se = stdev_val / math.sqrt(n)
421
-
422
- intervals = {}
423
- for confidence, zvalue in \
424
- [("ninetieth_interval", 1.645), ("ninety_fifth_interval", 1.96), ("ninety_ninth_interval", 2.576)]:
425
- margin = zvalue * se
426
- lower = mean_val - margin
427
- upper = mean_val + margin
428
- intervals[confidence] = (lower, upper)
429
-
430
- # Optionally, store more info
431
- intervals["n"] = n
432
- intervals["mean"] = mean_val
433
- return InferenceMetricsModel(**intervals)
aiq/profiler/utils.py DELETED
@@ -1,184 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
-
16
- import inspect
17
- import logging
18
- import re
19
- from collections.abc import Callable
20
- from typing import Any
21
-
22
- import pandas as pd
23
-
24
- from aiq.builder.framework_enum import LLMFrameworkEnum
25
- from aiq.cli.type_registry import RegisteredFunctionInfo
26
- from aiq.data_models.intermediate_step import IntermediateStep
27
- from aiq.profiler.data_frame_row import DataFrameRow
28
-
29
- # A simple set of regex patterns to scan for direct references to LLMFrameworkEnum
30
- _FRAMEWORK_REGEX_MAP = {t: fr'\b{t._name_}\b' for t in LLMFrameworkEnum}
31
-
32
- logger = logging.getLogger(__name__)
33
-
34
-
35
- def detect_llm_frameworks_in_build_fn(registration: RegisteredFunctionInfo) -> list[LLMFrameworkEnum]:
36
- """
37
- Analyze a function's source (the build_fn) to see which LLM frameworks it uses. Also recurses
38
- into any additional Python functions that the build_fn calls while passing `builder`, so that
39
- references to LLMFrameworkEnum in those helper calls are also detected.
40
-
41
- 1. If `registration.framework_wrappers` is non-empty, we return that first.
42
- (We do convert them to LLMFrameworkEnum if possible.)
43
- 2. Otherwise, we attempt to:
44
-
45
- - Get the build_fn's source via `inspect.getsource(...)`
46
- - Parse it for references to LLMFrameworkEnum
47
- - Find any function calls that include the word "builder" in the arguments
48
-
49
- - Recursively parse those functions' source code for frameworks
50
-
51
- 3. If we cannot parse the source at all (e.g. OSError), we return a list of all frameworks.
52
- """
53
- # ----------------------------------------------------------------
54
- # 1) If frameworks were explicitly declared in registration.framework_wrappers, use them:
55
- if registration.framework_wrappers:
56
- results: list[LLMFrameworkEnum] = []
57
- for fw_str in registration.framework_wrappers:
58
- try:
59
- results.append(LLMFrameworkEnum(fw_str))
60
- except ValueError:
61
- # If it's not recognized, ignore or log
62
- logger.warning("Unrecognized framework %s in registration.framework_wrappers", fw_str)
63
-
64
- return list(set(results)) # unique
65
- # ----------------------------------------------------------------
66
-
67
- # Because we want to recursively parse code, we'll keep track of visited function objects
68
- visited_fns: set[Callable[..., Any]] = set()
69
- # We also need a place to store discovered frameworks
70
- discovered: set[LLMFrameworkEnum] = set()
71
-
72
- def _parse_source_for_frameworks(src: str) -> None:
73
- """Check lines for any direct references to LLMFrameworkEnum.* or placeholders in the map."""
74
- for fw_enum_member, pattern in _FRAMEWORK_REGEX_MAP.items():
75
- if re.search(pattern, src):
76
- discovered.add(fw_enum_member)
77
-
78
- def _find_builder_func_calls(src: str) -> list[str]:
79
- """
80
- Look for calls of the form: some_func(..., builder, ...)
81
- or some_func(..., builder=..., ...)
82
-
83
- This returns the name of each function we found being called, e.g. 'some_func'.
84
- It's a naive best-effort approach
85
- and group(1) is the function name.
86
- """
87
- # E.g. foo(builder) or foo( param=..., builder=builder )
88
- pattern = r'(\w+)\s*\([^)]*\bbuilder\b[^)]*\)'
89
- return re.findall(pattern, src)
90
-
91
- def _recurse_parse(fn: Callable[..., Any], visited: set[Callable[..., Any]]) -> None:
92
- """Recursively parse the source code of `fn`, add discovered frameworks,
93
- and parse any new functions that get called with 'builder'."""
94
- if fn in visited:
95
- return
96
- visited.add(fn)
97
-
98
- try:
99
- src = inspect.getsource(fn)
100
- except OSError:
101
- # If we can't parse source, we add all frameworks and bail
102
- discovered.update([k for k, v in _FRAMEWORK_REGEX_MAP.items()])
103
- return
104
-
105
- # parse direct references
106
- _parse_source_for_frameworks(src)
107
-
108
- # parse any function calls that pass in "builder"
109
- child_func_names = _find_builder_func_calls(src)
110
- if not child_func_names:
111
- return
112
-
113
- # We'll try to find these child functions in the same module as `fn`
114
- mod = inspect.getmodule(fn)
115
- if not mod:
116
- return
117
- # We'll see if the child function is a top-level in that module
118
- for child_name in child_func_names:
119
- # get the function object if it exists in the module
120
- child_obj = getattr(mod, child_name, None)
121
- if callable(child_obj):
122
- _recurse_parse(child_obj, visited)
123
-
124
- # ----------------------------------------------------------------
125
- # 2) Actually do the BFS/DFS parse on `registration.build_fn`
126
- main_fn = registration.build_fn
127
-
128
- try:
129
- _recurse_parse(main_fn, visited_fns)
130
- except Exception:
131
- # If an unexpected error occurs, fallback to "all frameworks"
132
- discovered.update([k for k, v in _FRAMEWORK_REGEX_MAP.items()])
133
- # ----------------------------------------------------------------
134
- if len(discovered) > 0:
135
- logger.warning(
136
- "Discovered frameworks: %s in function %s by inspecting "
137
- "source. It is recommended and more reliable to instead add the used LLMFrameworkEnum "
138
- "types in the framework_wrappers argument when calling @register_function.",
139
- discovered,
140
- main_fn.__name__)
141
-
142
- return list(discovered)
143
-
144
-
145
- # -------------------------------------------------------------------
146
- # Create a single standardized DataFrame for all usage stats
147
- # -------------------------------------------------------------------
148
- def create_standardized_dataframe(requests_data: list[list[IntermediateStep]]) -> pd.DataFrame:
149
- """
150
- Merge usage stats for *all* requests into one DataFrame, each row representing a usage_stats entry.
151
- - Include a column 'example_number' to mark which request it originated from.
152
- """
153
- all_rows = []
154
- try:
155
- for i, steps in enumerate(requests_data):
156
- for step in steps:
157
- # Create a DataFrameRow
158
- all_rows.append(
159
- DataFrameRow(event_timestamp=step.event_timestamp,
160
- example_number=i,
161
- prompt_tokens=step.token_usage.prompt_tokens,
162
- completion_tokens=step.token_usage.completion_tokens,
163
- total_tokens=step.token_usage.total_tokens,
164
- llm_text_input=step.llm_text_input,
165
- llm_text_output=step.llm_text_output,
166
- llm_new_token=step.llm_text_chunk,
167
- llm_name=step.llm_name,
168
- tool_name=step.tool_name,
169
- function_name=step.function_name,
170
- function_id=step.function_id,
171
- parent_function_name=step.parent_function_name,
172
- parent_function_id=step.parent_function_id,
173
- UUID=step.payload.UUID,
174
- framework=step.framework,
175
- event_type=step.event_type).model_dump(), )
176
-
177
- except Exception as e:
178
- logger.exception("Error creating standardized DataFrame: %s", e, exc_info=True)
179
- return pd.DataFrame()
180
-
181
- if not all_rows:
182
- return pd.DataFrame()
183
-
184
- return pd.DataFrame.from_records(all_rows)
File without changes
File without changes