nvidia-nat 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. aiq/__init__.py +66 -0
  2. nat/agent/__init__.py +0 -0
  3. nat/agent/base.py +256 -0
  4. nat/agent/dual_node.py +67 -0
  5. nat/agent/react_agent/__init__.py +0 -0
  6. nat/agent/react_agent/agent.py +363 -0
  7. nat/agent/react_agent/output_parser.py +104 -0
  8. nat/agent/react_agent/prompt.py +44 -0
  9. nat/agent/react_agent/register.py +149 -0
  10. nat/agent/reasoning_agent/__init__.py +0 -0
  11. nat/agent/reasoning_agent/reasoning_agent.py +225 -0
  12. nat/agent/register.py +23 -0
  13. nat/agent/rewoo_agent/__init__.py +0 -0
  14. nat/agent/rewoo_agent/agent.py +415 -0
  15. nat/agent/rewoo_agent/prompt.py +110 -0
  16. nat/agent/rewoo_agent/register.py +157 -0
  17. nat/agent/tool_calling_agent/__init__.py +0 -0
  18. nat/agent/tool_calling_agent/agent.py +119 -0
  19. nat/agent/tool_calling_agent/register.py +106 -0
  20. nat/authentication/__init__.py +14 -0
  21. nat/authentication/api_key/__init__.py +14 -0
  22. nat/authentication/api_key/api_key_auth_provider.py +96 -0
  23. nat/authentication/api_key/api_key_auth_provider_config.py +124 -0
  24. nat/authentication/api_key/register.py +26 -0
  25. nat/authentication/exceptions/__init__.py +14 -0
  26. nat/authentication/exceptions/api_key_exceptions.py +38 -0
  27. nat/authentication/http_basic_auth/__init__.py +0 -0
  28. nat/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
  29. nat/authentication/http_basic_auth/register.py +30 -0
  30. nat/authentication/interfaces.py +93 -0
  31. nat/authentication/oauth2/__init__.py +14 -0
  32. nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
  33. nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
  34. nat/authentication/oauth2/register.py +25 -0
  35. nat/authentication/register.py +21 -0
  36. nat/builder/__init__.py +0 -0
  37. nat/builder/builder.py +285 -0
  38. nat/builder/component_utils.py +316 -0
  39. nat/builder/context.py +270 -0
  40. nat/builder/embedder.py +24 -0
  41. nat/builder/eval_builder.py +161 -0
  42. nat/builder/evaluator.py +29 -0
  43. nat/builder/framework_enum.py +24 -0
  44. nat/builder/front_end.py +73 -0
  45. nat/builder/function.py +344 -0
  46. nat/builder/function_base.py +380 -0
  47. nat/builder/function_info.py +627 -0
  48. nat/builder/intermediate_step_manager.py +174 -0
  49. nat/builder/llm.py +25 -0
  50. nat/builder/retriever.py +25 -0
  51. nat/builder/user_interaction_manager.py +78 -0
  52. nat/builder/workflow.py +148 -0
  53. nat/builder/workflow_builder.py +1117 -0
  54. nat/cli/__init__.py +14 -0
  55. nat/cli/cli_utils/__init__.py +0 -0
  56. nat/cli/cli_utils/config_override.py +231 -0
  57. nat/cli/cli_utils/validation.py +37 -0
  58. nat/cli/commands/__init__.py +0 -0
  59. nat/cli/commands/configure/__init__.py +0 -0
  60. nat/cli/commands/configure/channel/__init__.py +0 -0
  61. nat/cli/commands/configure/channel/add.py +28 -0
  62. nat/cli/commands/configure/channel/channel.py +34 -0
  63. nat/cli/commands/configure/channel/remove.py +30 -0
  64. nat/cli/commands/configure/channel/update.py +30 -0
  65. nat/cli/commands/configure/configure.py +33 -0
  66. nat/cli/commands/evaluate.py +139 -0
  67. nat/cli/commands/info/__init__.py +14 -0
  68. nat/cli/commands/info/info.py +37 -0
  69. nat/cli/commands/info/list_channels.py +32 -0
  70. nat/cli/commands/info/list_components.py +129 -0
  71. nat/cli/commands/info/list_mcp.py +304 -0
  72. nat/cli/commands/registry/__init__.py +14 -0
  73. nat/cli/commands/registry/publish.py +88 -0
  74. nat/cli/commands/registry/pull.py +118 -0
  75. nat/cli/commands/registry/registry.py +36 -0
  76. nat/cli/commands/registry/remove.py +108 -0
  77. nat/cli/commands/registry/search.py +155 -0
  78. nat/cli/commands/sizing/__init__.py +14 -0
  79. nat/cli/commands/sizing/calc.py +297 -0
  80. nat/cli/commands/sizing/sizing.py +27 -0
  81. nat/cli/commands/start.py +246 -0
  82. nat/cli/commands/uninstall.py +81 -0
  83. nat/cli/commands/validate.py +47 -0
  84. nat/cli/commands/workflow/__init__.py +14 -0
  85. nat/cli/commands/workflow/templates/__init__.py.j2 +0 -0
  86. nat/cli/commands/workflow/templates/config.yml.j2 +16 -0
  87. nat/cli/commands/workflow/templates/pyproject.toml.j2 +22 -0
  88. nat/cli/commands/workflow/templates/register.py.j2 +5 -0
  89. nat/cli/commands/workflow/templates/workflow.py.j2 +36 -0
  90. nat/cli/commands/workflow/workflow.py +37 -0
  91. nat/cli/commands/workflow/workflow_commands.py +317 -0
  92. nat/cli/entrypoint.py +135 -0
  93. nat/cli/main.py +57 -0
  94. nat/cli/register_workflow.py +488 -0
  95. nat/cli/type_registry.py +1000 -0
  96. nat/data_models/__init__.py +14 -0
  97. nat/data_models/api_server.py +716 -0
  98. nat/data_models/authentication.py +231 -0
  99. nat/data_models/common.py +171 -0
  100. nat/data_models/component.py +58 -0
  101. nat/data_models/component_ref.py +168 -0
  102. nat/data_models/config.py +410 -0
  103. nat/data_models/dataset_handler.py +169 -0
  104. nat/data_models/discovery_metadata.py +305 -0
  105. nat/data_models/embedder.py +27 -0
  106. nat/data_models/evaluate.py +127 -0
  107. nat/data_models/evaluator.py +26 -0
  108. nat/data_models/front_end.py +26 -0
  109. nat/data_models/function.py +30 -0
  110. nat/data_models/function_dependencies.py +72 -0
  111. nat/data_models/interactive.py +246 -0
  112. nat/data_models/intermediate_step.py +302 -0
  113. nat/data_models/invocation_node.py +38 -0
  114. nat/data_models/llm.py +27 -0
  115. nat/data_models/logging.py +26 -0
  116. nat/data_models/memory.py +27 -0
  117. nat/data_models/object_store.py +44 -0
  118. nat/data_models/profiler.py +54 -0
  119. nat/data_models/registry_handler.py +26 -0
  120. nat/data_models/retriever.py +30 -0
  121. nat/data_models/retry_mixin.py +35 -0
  122. nat/data_models/span.py +190 -0
  123. nat/data_models/step_adaptor.py +64 -0
  124. nat/data_models/streaming.py +33 -0
  125. nat/data_models/swe_bench_model.py +54 -0
  126. nat/data_models/telemetry_exporter.py +26 -0
  127. nat/data_models/ttc_strategy.py +30 -0
  128. nat/embedder/__init__.py +0 -0
  129. nat/embedder/nim_embedder.py +59 -0
  130. nat/embedder/openai_embedder.py +43 -0
  131. nat/embedder/register.py +22 -0
  132. nat/eval/__init__.py +14 -0
  133. nat/eval/config.py +60 -0
  134. nat/eval/dataset_handler/__init__.py +0 -0
  135. nat/eval/dataset_handler/dataset_downloader.py +106 -0
  136. nat/eval/dataset_handler/dataset_filter.py +52 -0
  137. nat/eval/dataset_handler/dataset_handler.py +367 -0
  138. nat/eval/evaluate.py +510 -0
  139. nat/eval/evaluator/__init__.py +14 -0
  140. nat/eval/evaluator/base_evaluator.py +77 -0
  141. nat/eval/evaluator/evaluator_model.py +45 -0
  142. nat/eval/intermediate_step_adapter.py +99 -0
  143. nat/eval/rag_evaluator/__init__.py +0 -0
  144. nat/eval/rag_evaluator/evaluate.py +178 -0
  145. nat/eval/rag_evaluator/register.py +143 -0
  146. nat/eval/register.py +23 -0
  147. nat/eval/remote_workflow.py +133 -0
  148. nat/eval/runners/__init__.py +14 -0
  149. nat/eval/runners/config.py +39 -0
  150. nat/eval/runners/multi_eval_runner.py +54 -0
  151. nat/eval/runtime_event_subscriber.py +52 -0
  152. nat/eval/swe_bench_evaluator/__init__.py +0 -0
  153. nat/eval/swe_bench_evaluator/evaluate.py +215 -0
  154. nat/eval/swe_bench_evaluator/register.py +36 -0
  155. nat/eval/trajectory_evaluator/__init__.py +0 -0
  156. nat/eval/trajectory_evaluator/evaluate.py +75 -0
  157. nat/eval/trajectory_evaluator/register.py +40 -0
  158. nat/eval/tunable_rag_evaluator/__init__.py +0 -0
  159. nat/eval/tunable_rag_evaluator/evaluate.py +245 -0
  160. nat/eval/tunable_rag_evaluator/register.py +52 -0
  161. nat/eval/usage_stats.py +41 -0
  162. nat/eval/utils/__init__.py +0 -0
  163. nat/eval/utils/output_uploader.py +140 -0
  164. nat/eval/utils/tqdm_position_registry.py +40 -0
  165. nat/eval/utils/weave_eval.py +184 -0
  166. nat/experimental/__init__.py +0 -0
  167. nat/experimental/decorators/__init__.py +0 -0
  168. nat/experimental/decorators/experimental_warning_decorator.py +134 -0
  169. nat/experimental/test_time_compute/__init__.py +0 -0
  170. nat/experimental/test_time_compute/editing/__init__.py +0 -0
  171. nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +147 -0
  172. nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +204 -0
  173. nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +107 -0
  174. nat/experimental/test_time_compute/functions/__init__.py +0 -0
  175. nat/experimental/test_time_compute/functions/execute_score_select_function.py +105 -0
  176. nat/experimental/test_time_compute/functions/plan_select_execute_function.py +224 -0
  177. nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +205 -0
  178. nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +146 -0
  179. nat/experimental/test_time_compute/models/__init__.py +0 -0
  180. nat/experimental/test_time_compute/models/editor_config.py +132 -0
  181. nat/experimental/test_time_compute/models/scoring_config.py +112 -0
  182. nat/experimental/test_time_compute/models/search_config.py +120 -0
  183. nat/experimental/test_time_compute/models/selection_config.py +154 -0
  184. nat/experimental/test_time_compute/models/stage_enums.py +43 -0
  185. nat/experimental/test_time_compute/models/strategy_base.py +66 -0
  186. nat/experimental/test_time_compute/models/tool_use_config.py +41 -0
  187. nat/experimental/test_time_compute/models/ttc_item.py +48 -0
  188. nat/experimental/test_time_compute/register.py +36 -0
  189. nat/experimental/test_time_compute/scoring/__init__.py +0 -0
  190. nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +168 -0
  191. nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +168 -0
  192. nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +111 -0
  193. nat/experimental/test_time_compute/search/__init__.py +0 -0
  194. nat/experimental/test_time_compute/search/multi_llm_planner.py +128 -0
  195. nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +122 -0
  196. nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +128 -0
  197. nat/experimental/test_time_compute/selection/__init__.py +0 -0
  198. nat/experimental/test_time_compute/selection/best_of_n_selector.py +63 -0
  199. nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +131 -0
  200. nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +159 -0
  201. nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +128 -0
  202. nat/experimental/test_time_compute/selection/threshold_selector.py +58 -0
  203. nat/front_ends/__init__.py +14 -0
  204. nat/front_ends/console/__init__.py +14 -0
  205. nat/front_ends/console/authentication_flow_handler.py +233 -0
  206. nat/front_ends/console/console_front_end_config.py +32 -0
  207. nat/front_ends/console/console_front_end_plugin.py +96 -0
  208. nat/front_ends/console/register.py +25 -0
  209. nat/front_ends/cron/__init__.py +14 -0
  210. nat/front_ends/fastapi/__init__.py +14 -0
  211. nat/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
  212. nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
  213. nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
  214. nat/front_ends/fastapi/fastapi_front_end_config.py +241 -0
  215. nat/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
  216. nat/front_ends/fastapi/fastapi_front_end_plugin.py +116 -0
  217. nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +1087 -0
  218. nat/front_ends/fastapi/html_snippets/__init__.py +14 -0
  219. nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
  220. nat/front_ends/fastapi/intermediate_steps_subscriber.py +80 -0
  221. nat/front_ends/fastapi/job_store.py +183 -0
  222. nat/front_ends/fastapi/main.py +72 -0
  223. nat/front_ends/fastapi/message_handler.py +320 -0
  224. nat/front_ends/fastapi/message_validator.py +352 -0
  225. nat/front_ends/fastapi/register.py +25 -0
  226. nat/front_ends/fastapi/response_helpers.py +195 -0
  227. nat/front_ends/fastapi/step_adaptor.py +319 -0
  228. nat/front_ends/mcp/__init__.py +14 -0
  229. nat/front_ends/mcp/mcp_front_end_config.py +36 -0
  230. nat/front_ends/mcp/mcp_front_end_plugin.py +81 -0
  231. nat/front_ends/mcp/mcp_front_end_plugin_worker.py +143 -0
  232. nat/front_ends/mcp/register.py +27 -0
  233. nat/front_ends/mcp/tool_converter.py +241 -0
  234. nat/front_ends/register.py +22 -0
  235. nat/front_ends/simple_base/__init__.py +14 -0
  236. nat/front_ends/simple_base/simple_front_end_plugin_base.py +54 -0
  237. nat/llm/__init__.py +0 -0
  238. nat/llm/aws_bedrock_llm.py +57 -0
  239. nat/llm/nim_llm.py +46 -0
  240. nat/llm/openai_llm.py +46 -0
  241. nat/llm/register.py +23 -0
  242. nat/llm/utils/__init__.py +14 -0
  243. nat/llm/utils/env_config_value.py +94 -0
  244. nat/llm/utils/error.py +17 -0
  245. nat/memory/__init__.py +20 -0
  246. nat/memory/interfaces.py +183 -0
  247. nat/memory/models.py +112 -0
  248. nat/meta/pypi.md +58 -0
  249. nat/object_store/__init__.py +20 -0
  250. nat/object_store/in_memory_object_store.py +76 -0
  251. nat/object_store/interfaces.py +84 -0
  252. nat/object_store/models.py +38 -0
  253. nat/object_store/register.py +20 -0
  254. nat/observability/__init__.py +14 -0
  255. nat/observability/exporter/__init__.py +14 -0
  256. nat/observability/exporter/base_exporter.py +449 -0
  257. nat/observability/exporter/exporter.py +78 -0
  258. nat/observability/exporter/file_exporter.py +33 -0
  259. nat/observability/exporter/processing_exporter.py +322 -0
  260. nat/observability/exporter/raw_exporter.py +52 -0
  261. nat/observability/exporter/span_exporter.py +288 -0
  262. nat/observability/exporter_manager.py +335 -0
  263. nat/observability/mixin/__init__.py +14 -0
  264. nat/observability/mixin/batch_config_mixin.py +26 -0
  265. nat/observability/mixin/collector_config_mixin.py +23 -0
  266. nat/observability/mixin/file_mixin.py +288 -0
  267. nat/observability/mixin/file_mode.py +23 -0
  268. nat/observability/mixin/resource_conflict_mixin.py +134 -0
  269. nat/observability/mixin/serialize_mixin.py +61 -0
  270. nat/observability/mixin/type_introspection_mixin.py +183 -0
  271. nat/observability/processor/__init__.py +14 -0
  272. nat/observability/processor/batching_processor.py +310 -0
  273. nat/observability/processor/callback_processor.py +42 -0
  274. nat/observability/processor/intermediate_step_serializer.py +28 -0
  275. nat/observability/processor/processor.py +71 -0
  276. nat/observability/register.py +96 -0
  277. nat/observability/utils/__init__.py +14 -0
  278. nat/observability/utils/dict_utils.py +236 -0
  279. nat/observability/utils/time_utils.py +31 -0
  280. nat/plugins/.namespace +1 -0
  281. nat/profiler/__init__.py +0 -0
  282. nat/profiler/calc/__init__.py +14 -0
  283. nat/profiler/calc/calc_runner.py +627 -0
  284. nat/profiler/calc/calculations.py +288 -0
  285. nat/profiler/calc/data_models.py +188 -0
  286. nat/profiler/calc/plot.py +345 -0
  287. nat/profiler/callbacks/__init__.py +0 -0
  288. nat/profiler/callbacks/agno_callback_handler.py +295 -0
  289. nat/profiler/callbacks/base_callback_class.py +20 -0
  290. nat/profiler/callbacks/langchain_callback_handler.py +290 -0
  291. nat/profiler/callbacks/llama_index_callback_handler.py +205 -0
  292. nat/profiler/callbacks/semantic_kernel_callback_handler.py +238 -0
  293. nat/profiler/callbacks/token_usage_base_model.py +27 -0
  294. nat/profiler/data_frame_row.py +51 -0
  295. nat/profiler/data_models.py +24 -0
  296. nat/profiler/decorators/__init__.py +0 -0
  297. nat/profiler/decorators/framework_wrapper.py +131 -0
  298. nat/profiler/decorators/function_tracking.py +254 -0
  299. nat/profiler/forecasting/__init__.py +0 -0
  300. nat/profiler/forecasting/config.py +18 -0
  301. nat/profiler/forecasting/model_trainer.py +75 -0
  302. nat/profiler/forecasting/models/__init__.py +22 -0
  303. nat/profiler/forecasting/models/forecasting_base_model.py +40 -0
  304. nat/profiler/forecasting/models/linear_model.py +197 -0
  305. nat/profiler/forecasting/models/random_forest_regressor.py +269 -0
  306. nat/profiler/inference_metrics_model.py +28 -0
  307. nat/profiler/inference_optimization/__init__.py +0 -0
  308. nat/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
  309. nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +460 -0
  310. nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +258 -0
  311. nat/profiler/inference_optimization/data_models.py +386 -0
  312. nat/profiler/inference_optimization/experimental/__init__.py +0 -0
  313. nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +468 -0
  314. nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +405 -0
  315. nat/profiler/inference_optimization/llm_metrics.py +212 -0
  316. nat/profiler/inference_optimization/prompt_caching.py +163 -0
  317. nat/profiler/inference_optimization/token_uniqueness.py +107 -0
  318. nat/profiler/inference_optimization/workflow_runtimes.py +72 -0
  319. nat/profiler/intermediate_property_adapter.py +102 -0
  320. nat/profiler/profile_runner.py +473 -0
  321. nat/profiler/utils.py +184 -0
  322. nat/registry_handlers/__init__.py +0 -0
  323. nat/registry_handlers/local/__init__.py +0 -0
  324. nat/registry_handlers/local/local_handler.py +176 -0
  325. nat/registry_handlers/local/register_local.py +37 -0
  326. nat/registry_handlers/metadata_factory.py +60 -0
  327. nat/registry_handlers/package_utils.py +571 -0
  328. nat/registry_handlers/pypi/__init__.py +0 -0
  329. nat/registry_handlers/pypi/pypi_handler.py +251 -0
  330. nat/registry_handlers/pypi/register_pypi.py +40 -0
  331. nat/registry_handlers/register.py +21 -0
  332. nat/registry_handlers/registry_handler_base.py +157 -0
  333. nat/registry_handlers/rest/__init__.py +0 -0
  334. nat/registry_handlers/rest/register_rest.py +56 -0
  335. nat/registry_handlers/rest/rest_handler.py +237 -0
  336. nat/registry_handlers/schemas/__init__.py +0 -0
  337. nat/registry_handlers/schemas/headers.py +42 -0
  338. nat/registry_handlers/schemas/package.py +68 -0
  339. nat/registry_handlers/schemas/publish.py +68 -0
  340. nat/registry_handlers/schemas/pull.py +82 -0
  341. nat/registry_handlers/schemas/remove.py +36 -0
  342. nat/registry_handlers/schemas/search.py +91 -0
  343. nat/registry_handlers/schemas/status.py +47 -0
  344. nat/retriever/__init__.py +0 -0
  345. nat/retriever/interface.py +41 -0
  346. nat/retriever/milvus/__init__.py +14 -0
  347. nat/retriever/milvus/register.py +81 -0
  348. nat/retriever/milvus/retriever.py +228 -0
  349. nat/retriever/models.py +77 -0
  350. nat/retriever/nemo_retriever/__init__.py +14 -0
  351. nat/retriever/nemo_retriever/register.py +60 -0
  352. nat/retriever/nemo_retriever/retriever.py +190 -0
  353. nat/retriever/register.py +22 -0
  354. nat/runtime/__init__.py +14 -0
  355. nat/runtime/loader.py +220 -0
  356. nat/runtime/runner.py +195 -0
  357. nat/runtime/session.py +162 -0
  358. nat/runtime/user_metadata.py +130 -0
  359. nat/settings/__init__.py +0 -0
  360. nat/settings/global_settings.py +318 -0
  361. nat/test/.namespace +1 -0
  362. nat/tool/__init__.py +0 -0
  363. nat/tool/chat_completion.py +74 -0
  364. nat/tool/code_execution/README.md +151 -0
  365. nat/tool/code_execution/__init__.py +0 -0
  366. nat/tool/code_execution/code_sandbox.py +267 -0
  367. nat/tool/code_execution/local_sandbox/.gitignore +1 -0
  368. nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +60 -0
  369. nat/tool/code_execution/local_sandbox/__init__.py +13 -0
  370. nat/tool/code_execution/local_sandbox/local_sandbox_server.py +198 -0
  371. nat/tool/code_execution/local_sandbox/sandbox.requirements.txt +6 -0
  372. nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +50 -0
  373. nat/tool/code_execution/register.py +74 -0
  374. nat/tool/code_execution/test_code_execution_sandbox.py +414 -0
  375. nat/tool/code_execution/utils.py +100 -0
  376. nat/tool/datetime_tools.py +42 -0
  377. nat/tool/document_search.py +141 -0
  378. nat/tool/github_tools/__init__.py +0 -0
  379. nat/tool/github_tools/create_github_commit.py +133 -0
  380. nat/tool/github_tools/create_github_issue.py +87 -0
  381. nat/tool/github_tools/create_github_pr.py +106 -0
  382. nat/tool/github_tools/get_github_file.py +106 -0
  383. nat/tool/github_tools/get_github_issue.py +166 -0
  384. nat/tool/github_tools/get_github_pr.py +256 -0
  385. nat/tool/github_tools/update_github_issue.py +100 -0
  386. nat/tool/mcp/__init__.py +14 -0
  387. nat/tool/mcp/exceptions.py +142 -0
  388. nat/tool/mcp/mcp_client.py +255 -0
  389. nat/tool/mcp/mcp_tool.py +96 -0
  390. nat/tool/memory_tools/__init__.py +0 -0
  391. nat/tool/memory_tools/add_memory_tool.py +79 -0
  392. nat/tool/memory_tools/delete_memory_tool.py +67 -0
  393. nat/tool/memory_tools/get_memory_tool.py +72 -0
  394. nat/tool/nvidia_rag.py +95 -0
  395. nat/tool/register.py +38 -0
  396. nat/tool/retriever.py +94 -0
  397. nat/tool/server_tools.py +66 -0
  398. nat/utils/__init__.py +0 -0
  399. nat/utils/data_models/__init__.py +0 -0
  400. nat/utils/data_models/schema_validator.py +58 -0
  401. nat/utils/debugging_utils.py +43 -0
  402. nat/utils/dump_distro_mapping.py +32 -0
  403. nat/utils/exception_handlers/__init__.py +0 -0
  404. nat/utils/exception_handlers/automatic_retries.py +289 -0
  405. nat/utils/exception_handlers/mcp.py +211 -0
  406. nat/utils/exception_handlers/schemas.py +114 -0
  407. nat/utils/io/__init__.py +0 -0
  408. nat/utils/io/model_processing.py +28 -0
  409. nat/utils/io/yaml_tools.py +119 -0
  410. nat/utils/log_utils.py +37 -0
  411. nat/utils/metadata_utils.py +74 -0
  412. nat/utils/optional_imports.py +142 -0
  413. nat/utils/producer_consumer_queue.py +178 -0
  414. nat/utils/reactive/__init__.py +0 -0
  415. nat/utils/reactive/base/__init__.py +0 -0
  416. nat/utils/reactive/base/observable_base.py +65 -0
  417. nat/utils/reactive/base/observer_base.py +55 -0
  418. nat/utils/reactive/base/subject_base.py +79 -0
  419. nat/utils/reactive/observable.py +59 -0
  420. nat/utils/reactive/observer.py +76 -0
  421. nat/utils/reactive/subject.py +131 -0
  422. nat/utils/reactive/subscription.py +49 -0
  423. nat/utils/settings/__init__.py +0 -0
  424. nat/utils/settings/global_settings.py +197 -0
  425. nat/utils/string_utils.py +38 -0
  426. nat/utils/type_converter.py +290 -0
  427. nat/utils/type_utils.py +484 -0
  428. nat/utils/url_utils.py +27 -0
  429. nvidia_nat-1.2.0.dist-info/METADATA +365 -0
  430. nvidia_nat-1.2.0.dist-info/RECORD +435 -0
  431. nvidia_nat-1.2.0.dist-info/WHEEL +5 -0
  432. nvidia_nat-1.2.0.dist-info/entry_points.txt +21 -0
  433. nvidia_nat-1.2.0.dist-info/licenses/LICENSE-3rd-party.txt +5478 -0
  434. nvidia_nat-1.2.0.dist-info/licenses/LICENSE.md +201 -0
  435. nvidia_nat-1.2.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,405 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """
16
+ An advanced script that:
17
+
18
+ 1. Builds chronological call sequences (LLM or TOOL) from a DataFrame of events.
19
+ 2. Incorporates llm_text_input for LLM calls into the token used by PrefixSpan.
20
+ 3. Runs PrefixSpan to discover frequent sub-sequences (patterns) across examples.
21
+ 4. Computes coverage (fraction of examples containing each pattern) and average sub-sequence duration.
22
+ 5. Returns a Pydantic model with the top patterns plus a textual report.
23
+
24
+ Main use case:
25
+
26
+ - Identify recurring sequences of calls + repeated LLM text inputs, which can help with caching or further optimization
27
+ (deduplicate repeated calls or pre-load certain tokens).
28
+ """
29
+
30
+ import logging
31
+
32
+ import numpy as np
33
+ import pandas as pd
34
+
35
+ from nat.data_models.intermediate_step import IntermediateStep
36
+ from nat.profiler.inference_optimization.data_models import FrequentPattern
37
+ from nat.profiler.inference_optimization.data_models import PrefixCallNode
38
+ from nat.profiler.inference_optimization.data_models import PrefixSpanSubworkflowResult
39
+ from nat.profiler.utils import create_standardized_dataframe
40
+
41
+ logger = logging.getLogger(__name__)
42
+
43
+ # --------------------------------------------------------------------------------
44
+ # 1) Building Sequences (Including llm_text_input)
45
+ # --------------------------------------------------------------------------------
46
+
47
+
48
+ def parse_op_type(evt: str) -> str | None:
49
+ """Map event_type => 'LLM' or 'TOOL' if it starts with those prefixes."""
50
+ et = evt.upper()
51
+ if et.startswith("LLM_"):
52
+ return "LLM"
53
+ if et.startswith("TOOL_"):
54
+ return "TOOL"
55
+ return None
56
+
57
+
58
+ def get_op_name(row: pd.Series, op_type: str) -> str:
59
+ """Pick the operation_name from either llm_name or tool_name based on op_type."""
60
+ if op_type == "LLM":
61
+ return row.get("llm_name") or "unknown_llm"
62
+ if op_type == "TOOL":
63
+ return row.get("tool_name") or "unknown_tool"
64
+ return "unknown_op"
65
+
66
+
67
+ def build_call_sequence_for_example(example_df: pd.DataFrame) -> list[PrefixCallNode]:
68
+ """
69
+ For a single example's events, pair START/END calls and build a chronological list of PrefixCallNodes,
70
+ storing llm_text_input if op_type=LLM and it's available at START or END.
71
+
72
+ """
73
+ example_df = example_df.sort_values("event_timestamp")
74
+ example_num = int(example_df["example_number"].iloc[0])
75
+
76
+ partial_map: dict[str, dict] = {}
77
+ calls_list: list[PrefixCallNode] = []
78
+
79
+ for _, row in example_df.iterrows():
80
+ evt_type = row["event_type"].value.upper()
81
+ uuid = str(row["UUID"])
82
+ ts = float(row["event_timestamp"])
83
+
84
+ op_type = parse_op_type(evt_type)
85
+ if not op_type:
86
+ # ignore events that are not LLM_/TOOL_
87
+ continue
88
+
89
+ if evt_type.endswith("_START"):
90
+ op_name = get_op_name(row, op_type)
91
+ call_info = {
92
+ "uuid": uuid,
93
+ "example_number": example_num,
94
+ "operation_type": op_type,
95
+ "operation_name": op_name,
96
+ "start_time": ts,
97
+ "llm_text_input": None
98
+ }
99
+ # If llm_text_input is present in START
100
+ if op_type == "LLM" and "llm_text_input" in row and pd.notna(row["llm_text_input"]):
101
+ call_info["llm_text_input"] = str(row["llm_text_input"])
102
+ partial_map[uuid] = call_info
103
+
104
+ elif evt_type.endswith("_END"):
105
+ if uuid in partial_map:
106
+ # finalize
107
+ start_info = partial_map[uuid]
108
+ end_time = ts
109
+ duration = max(0.0, end_time - start_info["start_time"])
110
+ # If we only have llm_text_input at END, override if not present
111
+ if op_type == "LLM" and "llm_text_input" in row and pd.notna(row["llm_text_input"]):
112
+ start_info["llm_text_input"] = str(row["llm_text_input"])
113
+
114
+ node = PrefixCallNode(uuid=uuid,
115
+ example_number=example_num,
116
+ operation_type=start_info["operation_type"],
117
+ operation_name=start_info["operation_name"],
118
+ start_time=start_info["start_time"],
119
+ end_time=end_time,
120
+ duration=duration,
121
+ llm_text_input=start_info["llm_text_input"])
122
+ calls_list.append(node)
123
+ del partial_map[uuid]
124
+
125
+ # Sort final calls by start_time
126
+ calls_list.sort(key=lambda c: c.start_time)
127
+ return calls_list
128
+
129
+
130
+ def build_sequences(df: pd.DataFrame) -> dict[int, list[PrefixCallNode]]:
131
+ """
132
+ Group events by example_number, build a chronological list of PrefixCallNode for each example,
133
+ including the LLM text input if present.
134
+ """
135
+ dfc = df.copy()
136
+ dfc.sort_values(["example_number", "event_timestamp"], inplace=True)
137
+
138
+ sequences_map = {}
139
+ for ex_num, group_df in dfc.groupby("example_number"):
140
+ seq_calls = build_call_sequence_for_example(group_df)
141
+ sequences_map[ex_num] = seq_calls
142
+ return sequences_map
143
+
144
+
145
+ # --------------------------------------------------------------------------------
146
+ # 2) Token Construction & PrefixSpan
147
+ # --------------------------------------------------------------------------------
148
+
149
+
150
+ def build_token(call: PrefixCallNode, max_text_len: int = 20, prefix_list: list[str] = None) -> str:
151
+ """
152
+ Construct a token for prefixspan from a PrefixCallNode.
153
+ - We do "LLM:{operation_name}|{text}" if it's an LLM call and text is available
154
+ - We optionally truncate or hash the text for length. Here we just do naive truncation
155
+ - For a tool call, we do "TOOL:{operation_name}"
156
+ """
157
+ if call.operation_type == "LLM":
158
+ text_part = ""
159
+ if call.llm_text_input:
160
+ # naive truncation
161
+ truncated = call.llm_text_input
162
+
163
+ # Check truncated text for an exact match of any string in prefix_list
164
+ # Does not have to be in just the prefix, but anywhere
165
+ # Replaces the matched string with <common_prefix>
166
+ if prefix_list:
167
+ for prefix in prefix_list:
168
+ for i in range(len(prefix), 0, -1):
169
+ if truncated.startswith(prefix[:i]):
170
+ truncated = truncated.replace(prefix[:i], "<common_prefix>")
171
+ break
172
+
173
+ truncated = truncated[:max_text_len].replace("\n", " ")
174
+ text_part = f"|{truncated}"
175
+ return f"LLM:{call.operation_name}{text_part}"
176
+
177
+ return f"TOOL:{call.operation_name}"
178
+
179
+
180
+ def convert_sequences_for_prefixspan(sequences_map: dict[int, list[PrefixCallNode]],
181
+ max_text_len: int = 20,
182
+ prefix_list: list[str] = None) -> list[list[str]]:
183
+ """
184
+ Convert each example's list of PrefixCallNode into a list of tokens. Return a list-of-lists
185
+ suitable for prefixspan. E.g.::
186
+
187
+ [
188
+ ["LLM:llama-3|Hello", "TOOL:internet-search", "LLM:llama-3|How are you?"],
189
+ ["LLM:davinci|some prompt", "TOOL:vector-db"]
190
+ ...
191
+ ]
192
+
193
+ """
194
+ result = []
195
+ for _, call_list in sequences_map.items():
196
+ token_list = [build_token(c, max_text_len, prefix_list) for c in call_list]
197
+ result.append(token_list)
198
+ return result
199
+
200
+
201
+ def run_prefixspan(sequences_map: dict[int, list[PrefixCallNode]],
202
+ min_support: int | float,
203
+ max_text_len: int = 20,
204
+ prefix_list: list[str] = None) -> list[tuple[list[str], int]]:
205
+ """
206
+ 1) Convert all example sequences => tokens
207
+ 2) Run prefixspan with min_support
208
+ 3) Return (pattern, freq) list
209
+ """
210
+
211
+ try:
212
+ from prefixspan import PrefixSpan
213
+ except ImportError:
214
+ logger.error("prefixspan is not installed. Please install prefixspan to run the prefix analysis in the "
215
+ "profiler or install `nvidia-nat[profiler]` to install all necessary profiling packages.")
216
+
217
+ raise
218
+
219
+ token_seqs = convert_sequences_for_prefixspan(sequences_map, max_text_len, prefix_list)
220
+
221
+ ps = PrefixSpan(token_seqs)
222
+
223
+ # Convert min_support if float => absolute freq
224
+ # prefixspan interprets min_support as an absolute occurrence count
225
+ if isinstance(min_support, float):
226
+ total_seq_count = len(token_seqs)
227
+ abs_min_support = max(1, int(round(min_support * total_seq_count)))
228
+ else:
229
+ abs_min_support = min_support
230
+
231
+ freq_patterns = ps.frequent(abs_min_support) # pylint: disable=not-callable
232
+ # freq_patterns => [(count, [item1, item2, ...])]
233
+
234
+ results = []
235
+ for (count, pat) in freq_patterns:
236
+ results.append((pat, count))
237
+ return results
238
+
239
+
240
+ # --------------------------------------------------------------------------------
241
+ # 3) Coverage & Duration Computation
242
+ # --------------------------------------------------------------------------------
243
+
244
+
245
+ def find_contiguous_matches(pattern: list[str], seq: list[str]) -> list[tuple[int, int]]:
246
+ """
247
+ Look for contiguous matches of 'pattern' in 'seq' by naive scanning.
248
+ e.g. pattern=["LLM:llama-3|Hello", "TOOL:internet-search"], seq=...
249
+ Return list of (start_idx, end_idx).
250
+ """
251
+ matches = []
252
+ plen = len(pattern)
253
+ slen = len(seq)
254
+ for start in range(slen - plen + 1):
255
+ if seq[start:start + plen] == pattern:
256
+ matches.append((start, start + plen - 1))
257
+ return matches
258
+
259
+
260
+ def compute_coverage_and_duration(sequences_map: dict[int, list[PrefixCallNode]],
261
+ prefixspan_patterns: list[tuple[list[str], int]],
262
+ top_k: int,
263
+ min_coverage: float = 0.0,
264
+ max_text_len: int = 20) -> list[FrequentPattern]:
265
+ """
266
+ For each pattern from prefixspan, compute:
267
+
268
+ - coverage: fraction of examples that contain it
269
+ - average_duration: sum of durations of calls in sub-sequence / total occurrences
270
+
271
+ Then filter by min_coverage and pick top_k, sorted by frequency, coverage, avg_duration desc.
272
+ """
273
+ # We'll also rebuild token sequences for matching
274
+ token_sequences = {}
275
+ call_sequences = {}
276
+ for ex_num, call_list in sequences_map.items():
277
+ token_seq = [build_token(c, max_text_len) for c in call_list]
278
+ token_sequences[ex_num] = token_seq
279
+ call_sequences[ex_num] = call_list
280
+
281
+ total_examples = len(token_sequences)
282
+ results: list[FrequentPattern] = []
283
+
284
+ for (pat, freq) in prefixspan_patterns:
285
+ # coverage => how many distinct example_num have at least one contiguous match
286
+ examples_with_pattern = []
287
+ total_occ = 0
288
+ total_dur = 0.0
289
+
290
+ for ex_num, token_seq in token_sequences.items():
291
+ matches = find_contiguous_matches(pat, token_seq)
292
+ if matches:
293
+ examples_with_pattern.append(ex_num)
294
+ # sum durations for each occurrence
295
+ calls = call_sequences[ex_num]
296
+ for (start_idx, end_idx) in matches:
297
+ dur_sum = float(np.sum([calls[i].duration for i in range(start_idx, end_idx + 1)]))
298
+ total_dur += dur_sum
299
+ total_occ += 1
300
+
301
+ coverage_val = len(examples_with_pattern) / total_examples if total_examples > 0 else 0.0
302
+ if coverage_val < min_coverage:
303
+ continue
304
+
305
+ avg_dur = total_dur / total_occ if total_occ > 0 else 0.0
306
+
307
+ fp = FrequentPattern(pattern=pat,
308
+ frequency=freq,
309
+ coverage=coverage_val,
310
+ average_duration=avg_dur,
311
+ examples_containing=sorted(examples_with_pattern))
312
+ results.append(fp)
313
+
314
+ # sort & top_k
315
+ results.sort(key=lambda p: (p.frequency, p.coverage, p.average_duration), reverse=True)
316
+ return results[:top_k]
317
+
318
+
319
+ # --------------------------------------------------------------------------------
320
+ # 4) Main Entry Function
321
+ # --------------------------------------------------------------------------------
322
+
323
+
324
+ def prefixspan_subworkflow_with_text( # pylint: disable=too-many-positional-arguments
325
+ all_steps: list[list[IntermediateStep]],
326
+ min_support: int | float = 2,
327
+ top_k: int = 10,
328
+ min_coverage: float = 0.0,
329
+ max_text_len: int = 700,
330
+ prefix_list: list[str] = None) -> PrefixSpanSubworkflowResult:
331
+ """
332
+ 1) Build sequences of calls for each example (with llm_text_input).
333
+ 2) Convert to token lists, run PrefixSpan with min_support.
334
+ 3) Compute coverage & average duration for each pattern, filter by min_coverage, pick top_k.
335
+ 4) Return Pydantic model with final patterns & textual report.
336
+
337
+ :param all_steps: Intermediate steps
338
+ :param min_support: minimal # of times (int) or fraction (float) for prefixspan
339
+ :param top_k: how many patterns to keep
340
+ :param min_coverage: discard patterns that appear in fewer than this fraction of examples
341
+ :param max_text_len: how many chars of llm_text_input to incorporate in the token
342
+ :param prefix_list: list of prefixes to filter on and exclude from pattern matching
343
+ """
344
+ df = create_standardized_dataframe(all_steps)
345
+ # Validate columns
346
+ required_cols = {
347
+ "framework",
348
+ "tool_name",
349
+ "llm_name",
350
+ "llm_text_input",
351
+ "llm_text_output",
352
+ "event_timestamp",
353
+ "event_type",
354
+ "UUID",
355
+ "example_number",
356
+ "prompt_tokens",
357
+ "completion_tokens",
358
+ "total_tokens"
359
+ }
360
+ missing = required_cols - set(df.columns)
361
+ if missing:
362
+ raise ValueError(f"DataFrame missing required columns: {missing}")
363
+
364
+ # 1) Build sequences
365
+ sequences_map = build_sequences(df)
366
+ total_examples = len(sequences_map)
367
+
368
+ # 2) prefixspan
369
+ prefixspan_patterns = run_prefixspan(sequences_map,
370
+ min_support=min_support,
371
+ max_text_len=max_text_len,
372
+ prefix_list=prefix_list)
373
+ if not prefixspan_patterns:
374
+ return PrefixSpanSubworkflowResult(
375
+ patterns=[], textual_report="No frequent patterns found by PrefixSpan with the given min_support.")
376
+
377
+ # 3) coverage & duration
378
+ final_patterns = compute_coverage_and_duration(sequences_map,
379
+ prefixspan_patterns,
380
+ top_k=top_k,
381
+ min_coverage=min_coverage,
382
+ max_text_len=max_text_len)
383
+ if not final_patterns:
384
+ return PrefixSpanSubworkflowResult(patterns=[],
385
+ textual_report="No patterns passed coverage/duration thresholds.")
386
+
387
+ # 4) Build textual report
388
+ lines = []
389
+ lines.append("=== PrefixSpan Sub-Workflow Mining w/ LLM Text ===")
390
+ lines.append(f"Total examples: {total_examples}")
391
+ lines.append(f"min_support={min_support}, top_k={top_k}, min_coverage={min_coverage}, max_text_len={max_text_len}")
392
+ lines.append(f"Patterns discovered: {len(final_patterns)}")
393
+
394
+ for i, pat in enumerate(final_patterns, start=1):
395
+ chain_str = " -> ".join(pat.pattern)
396
+ lines.append(f"\n{i}) Pattern: {chain_str}")
397
+ lines.append(f" Frequency: {pat.frequency}")
398
+ lines.append(f" Coverage: {pat.coverage:.2f} (appears in {len(pat.examples_containing)} examples)")
399
+ lines.append(f" Avg Duration: {pat.average_duration:.2f} seconds")
400
+ lines.append(f" Examples containing: {pat.examples_containing}")
401
+
402
+ report_text = "\n".join(lines)
403
+
404
+ # 5) Return final model
405
+ return PrefixSpanSubworkflowResult(patterns=final_patterns, textual_report=report_text)
@@ -0,0 +1,212 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import numpy as np
17
+ import pandas as pd
18
+
19
+ from nat.data_models.intermediate_step import IntermediateStep
20
+ from nat.profiler.utils import create_standardized_dataframe
21
+
22
+
23
+ class LLMMetrics:
24
+ """
25
+ A utility class for computing derived metrics on standardized LLM call logs.
26
+ """
27
+
28
+ @staticmethod
29
+ def compute_profiling_metrics(all_steps: list[list[IntermediateStep]]) -> pd.DataFrame:
30
+ """
31
+ Compute and append the following columns to the provided DataFrame:
32
+
33
+ 1. NOVA-Event-ID (str):
34
+
35
+ - The name of the calling function (`function_name`).
36
+
37
+ 2. NOVA-Requests-Remaining-In-Event (int):
38
+
39
+ - For each row, how many future LLM_START events will occur (strictly after
40
+ this row's event_timestamp) in the same (example_number, function_name).
41
+
42
+ 3. NOVA-Time-To-Next-Event (float):
43
+
44
+ - For each row, the number of milliseconds until the next LLM_START event in
45
+ the same (example_number, function_name). If no future event, set to -1.
46
+
47
+ 4. NOVA-Time-To-Event-End (float):
48
+
49
+ - For each row, the number of milliseconds until the last future LLM_START
50
+ event in the same (example_number, function_name). If no future event, set to -1.
51
+
52
+ 5. NOVA-Predicted-OSL (float or int):
53
+
54
+ - For rows where event_type == 'LLM_START', this column will hold the
55
+ `completion_tokens` of the corresponding LLM_END (matched by UUID). If no match,
56
+ set to NaN (or another sentinel).
57
+
58
+ 6. NOVA-Time-To-Session-End (float):
59
+
60
+ - For each row, the total milliseconds remaining in the workflow invocation,
61
+ i.e. until the max event_timestamp within that example_number.
62
+
63
+ Assumptions:
64
+
65
+ - event_timestamp is an epoch timestamp in *seconds*.
66
+ - Columns required in the input df (at minimum)::
67
+
68
+ ['example_number', 'event_timestamp', 'event_type', 'function_name', 'UUID', 'completion_tokens']
69
+
70
+ - 'LLM_START' / 'LLM_END' events share the same UUID.
71
+ - The DataFrame may have additional columns such as 'llm_text_input', 'llm_text_output',
72
+ 'function_id', 'parent_function_name', 'parent_function_id', etc.
73
+
74
+ :param all_steps: All intermediate steps for each example.
75
+ :return: The same DataFrame with the six NOVA- columns appended.
76
+ """
77
+
78
+ df = create_standardized_dataframe(all_steps)
79
+
80
+ if df.empty:
81
+ return df
82
+
83
+ # ---------------------------------------------------------------------
84
+ # 1. NOVA-Event-ID
85
+ # This is simply the function_name.
86
+ # ---------------------------------------------------------------------
87
+ df['NOVA-Event-ID'] = df['function_name']
88
+
89
+ # ---------------------------------------------------------------------
90
+ # 2. NOVA-Requests-Remaining-In-Event,
91
+ # 3. NOVA-Time-To-Next-Event,
92
+ # 4. NOVA-Time-To-Event-End
93
+ #
94
+ # We'll compute these by grouping on (example_number, function_name),
95
+ # sorting by event_timestamp, and for each row calculating:
96
+ #
97
+ # - how many LLM_START events lie strictly in the future,
98
+ # - the time to the next LLM_START event in the future,
99
+ # - the time to the last LLM_START event in the future.
100
+ #
101
+ # For times, we convert to milliseconds by multiplying by 1000,
102
+ # assuming event_timestamp is in seconds.
103
+ # ---------------------------------------------------------------------
104
+
105
+ # Initialize columns with default values.
106
+ df['NOVA-Requests-Remaining-In-Event'] = -1
107
+ df['NOVA-Time-To-Next-Event'] = -1.0
108
+ df['NOVA-Time-To-Event-End'] = -1.0
109
+
110
+ def _compute_group_metrics(subdf: pd.DataFrame) -> pd.DataFrame:
111
+ """
112
+ For a sub-DataFrame with a unique (example_number, function_name),
113
+ compute the requested columns for each row.
114
+ """
115
+ # Sort by time to ensure chronological order.
116
+ subdf = subdf.sort_values('event_timestamp').copy()
117
+
118
+ # Collect all LLM_START timestamps in this group as a sorted array.
119
+ llm_start_mask = (subdf['event_type'] == 'LLM_START')
120
+ llm_start_ts = subdf.loc[llm_start_mask, 'event_timestamp'].values
121
+
122
+ # If no LLM_START events present, we can return immediately.
123
+ if len(llm_start_ts) == 0:
124
+ # No future LLM_START events to compute; everything stays default -1.
125
+ return subdf
126
+
127
+ def _rowwise_calc(row):
128
+ """
129
+ For each row, compute:
130
+ - how many LLM_START events lie strictly in the future,
131
+ - time to the next LLM_START event,
132
+ - time to the last LLM_START event (in the future).
133
+ """
134
+ row_ts = row['event_timestamp']
135
+
136
+ # Use searchsorted to find how many LLM_START events lie after this row's timestamp.
137
+ # side='right' means we treat any LLM_START at exactly row_ts as not 'in the future'.
138
+ insertion_idx = np.searchsorted(llm_start_ts, row_ts, side='right')
139
+
140
+ # (A) Requests remaining = how many LLM_START events are strictly after row_ts
141
+ requests_remaining = len(llm_start_ts) - insertion_idx
142
+
143
+ # (B) Time to next LLM_START (if any)
144
+ if insertion_idx < len(llm_start_ts):
145
+ next_event_time = llm_start_ts[insertion_idx]
146
+ time_to_next_event = (next_event_time - row_ts) * 1000.0
147
+ else:
148
+ time_to_next_event = -1.0
149
+
150
+ # (C) Time to the last LLM_START in the future (if any).
151
+ # The last LLM_START in the future is simply the last entry of llm_start_ts
152
+ # if there's at least one future LLM_START. We'll check that it is strictly > row_ts.
153
+ if requests_remaining > 0:
154
+ last_future_llm_start = llm_start_ts[-1]
155
+ # double-check that it's truly in the future
156
+ if last_future_llm_start > row_ts:
157
+ time_to_event_end = (last_future_llm_start - row_ts) * 1000.0
158
+ else:
159
+ time_to_event_end = -1.0
160
+ else:
161
+ time_to_event_end = -1.0
162
+
163
+ return pd.Series({
164
+ 'NOVA-Requests-Remaining-In-Event': requests_remaining,
165
+ 'NOVA-Time-To-Next-Event': time_to_next_event,
166
+ 'NOVA-Time-To-Event-End': time_to_event_end
167
+ })
168
+
169
+ # Apply row-wise calculations
170
+ metrics_df = subdf.apply(_rowwise_calc, axis=1)
171
+
172
+ # Merge back into subdf
173
+ subdf[['NOVA-Requests-Remaining-In-Event', 'NOVA-Time-To-Next-Event',
174
+ 'NOVA-Time-To-Event-End']] = metrics_df
175
+
176
+ return subdf
177
+
178
+ # Apply the group metrics
179
+ df_group = df.groupby(['example_number', 'function_name'], group_keys=False)
180
+ df = df_group[df.columns].apply(_compute_group_metrics).sort_index()
181
+
182
+ # ---------------------------------------------------------------------
183
+ # 5. NOVA-Predicted-OSL
184
+ #
185
+ # For each LLM_START event, we want the completion_tokens from its
186
+ # corresponding LLM_END event. Both share the same UUID.
187
+ # We'll do a map from UUID -> completion_tokens for LLM_END rows.
188
+ # ---------------------------------------------------------------------
189
+ df['NOVA-Predicted-OSL'] = np.nan
190
+
191
+ # Build a map of UUID -> completion_tokens from LLM_END
192
+ llm_end_map = (df.loc[df['event_type'] == 'LLM_END', ['UUID', 'completion_tokens']].dropna(
193
+ subset=['UUID']).set_index('UUID')['completion_tokens'].to_dict())
194
+
195
+ # Only assign to rows which are LLM_START
196
+ llm_start_mask = (df['event_type'] == 'LLM_START')
197
+ df.loc[llm_start_mask, 'NOVA-Predicted-OSL'] = (df.loc[llm_start_mask, 'UUID'].map(llm_end_map))
198
+
199
+ # ---------------------------------------------------------------------
200
+ # 6. NOVA-Time-To-Session-End
201
+ #
202
+ # For each example_number, we want the difference (in ms) between
203
+ # the row's event_timestamp and the final (max) event_timestamp
204
+ # in that example_number.
205
+ # ---------------------------------------------------------------------
206
+ max_ts_per_example = (df.groupby('example_number')['event_timestamp'].transform('max'))
207
+
208
+ # We'll subtract row's timestamp from the max, and convert to ms
209
+ df['NOVA-Time-To-Session-End'] = (max_ts_per_example - df['event_timestamp']) * 1000.0
210
+
211
+ # Return the updated DataFrame
212
+ return df