aiqtoolkit 1.2.0a20250706__py3-none-any.whl → 1.2.0a20250730__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (197) hide show
  1. aiq/agent/base.py +171 -8
  2. aiq/agent/dual_node.py +1 -1
  3. aiq/agent/react_agent/agent.py +113 -113
  4. aiq/agent/react_agent/register.py +31 -14
  5. aiq/agent/rewoo_agent/agent.py +36 -35
  6. aiq/agent/rewoo_agent/register.py +2 -2
  7. aiq/agent/tool_calling_agent/agent.py +3 -7
  8. aiq/authentication/__init__.py +14 -0
  9. aiq/authentication/api_key/__init__.py +14 -0
  10. aiq/authentication/api_key/api_key_auth_provider.py +92 -0
  11. aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
  12. aiq/authentication/api_key/register.py +26 -0
  13. aiq/authentication/exceptions/__init__.py +14 -0
  14. aiq/authentication/exceptions/api_key_exceptions.py +38 -0
  15. aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
  16. aiq/authentication/exceptions/call_back_exceptions.py +38 -0
  17. aiq/authentication/exceptions/request_exceptions.py +54 -0
  18. aiq/authentication/http_basic_auth/__init__.py +0 -0
  19. aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
  20. aiq/authentication/http_basic_auth/register.py +30 -0
  21. aiq/authentication/interfaces.py +93 -0
  22. aiq/authentication/oauth2/__init__.py +14 -0
  23. aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
  24. aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
  25. aiq/authentication/oauth2/register.py +25 -0
  26. aiq/authentication/register.py +21 -0
  27. aiq/builder/builder.py +64 -2
  28. aiq/builder/component_utils.py +16 -3
  29. aiq/builder/context.py +26 -0
  30. aiq/builder/eval_builder.py +43 -2
  31. aiq/builder/function.py +32 -4
  32. aiq/builder/function_base.py +1 -1
  33. aiq/builder/intermediate_step_manager.py +6 -8
  34. aiq/builder/user_interaction_manager.py +3 -0
  35. aiq/builder/workflow.py +23 -18
  36. aiq/builder/workflow_builder.py +420 -73
  37. aiq/cli/commands/info/list_mcp.py +103 -16
  38. aiq/cli/commands/sizing/__init__.py +14 -0
  39. aiq/cli/commands/sizing/calc.py +294 -0
  40. aiq/cli/commands/sizing/sizing.py +27 -0
  41. aiq/cli/commands/start.py +1 -0
  42. aiq/cli/entrypoint.py +2 -0
  43. aiq/cli/register_workflow.py +80 -0
  44. aiq/cli/type_registry.py +151 -30
  45. aiq/data_models/api_server.py +117 -11
  46. aiq/data_models/authentication.py +231 -0
  47. aiq/data_models/common.py +35 -7
  48. aiq/data_models/component.py +17 -9
  49. aiq/data_models/component_ref.py +33 -0
  50. aiq/data_models/config.py +60 -3
  51. aiq/data_models/embedder.py +1 -0
  52. aiq/data_models/function_dependencies.py +8 -0
  53. aiq/data_models/interactive.py +10 -1
  54. aiq/data_models/intermediate_step.py +15 -5
  55. aiq/data_models/its_strategy.py +30 -0
  56. aiq/data_models/llm.py +1 -0
  57. aiq/data_models/memory.py +1 -0
  58. aiq/data_models/object_store.py +44 -0
  59. aiq/data_models/retry_mixin.py +35 -0
  60. aiq/data_models/span.py +187 -0
  61. aiq/data_models/telemetry_exporter.py +2 -2
  62. aiq/embedder/nim_embedder.py +2 -1
  63. aiq/embedder/openai_embedder.py +2 -1
  64. aiq/eval/config.py +19 -1
  65. aiq/eval/dataset_handler/dataset_handler.py +75 -1
  66. aiq/eval/evaluate.py +53 -10
  67. aiq/eval/rag_evaluator/evaluate.py +23 -12
  68. aiq/eval/remote_workflow.py +7 -2
  69. aiq/eval/runners/__init__.py +14 -0
  70. aiq/eval/runners/config.py +39 -0
  71. aiq/eval/runners/multi_eval_runner.py +54 -0
  72. aiq/eval/usage_stats.py +6 -0
  73. aiq/eval/utils/weave_eval.py +5 -1
  74. aiq/experimental/__init__.py +0 -0
  75. aiq/experimental/decorators/__init__.py +0 -0
  76. aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
  77. aiq/experimental/inference_time_scaling/__init__.py +0 -0
  78. aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
  79. aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
  80. aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
  81. aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
  82. aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
  83. aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
  84. aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
  85. aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
  86. aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
  87. aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
  88. aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
  89. aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
  90. aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
  91. aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
  92. aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
  93. aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
  94. aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
  95. aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
  96. aiq/experimental/inference_time_scaling/register.py +36 -0
  97. aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
  98. aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
  99. aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
  100. aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
  101. aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
  102. aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
  103. aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
  104. aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
  105. aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
  106. aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
  107. aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
  108. aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
  109. aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
  110. aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
  111. aiq/front_ends/console/authentication_flow_handler.py +233 -0
  112. aiq/front_ends/console/console_front_end_plugin.py +11 -2
  113. aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
  114. aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
  115. aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
  116. aiq/front_ends/fastapi/fastapi_front_end_config.py +20 -0
  117. aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
  118. aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
  119. aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +353 -31
  120. aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
  121. aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
  122. aiq/front_ends/fastapi/main.py +2 -0
  123. aiq/front_ends/fastapi/message_handler.py +102 -84
  124. aiq/front_ends/fastapi/step_adaptor.py +2 -1
  125. aiq/llm/aws_bedrock_llm.py +2 -1
  126. aiq/llm/nim_llm.py +2 -1
  127. aiq/llm/openai_llm.py +2 -1
  128. aiq/object_store/__init__.py +20 -0
  129. aiq/object_store/in_memory_object_store.py +74 -0
  130. aiq/object_store/interfaces.py +84 -0
  131. aiq/object_store/models.py +36 -0
  132. aiq/object_store/register.py +20 -0
  133. aiq/observability/__init__.py +14 -0
  134. aiq/observability/exporter/__init__.py +14 -0
  135. aiq/observability/exporter/base_exporter.py +449 -0
  136. aiq/observability/exporter/exporter.py +78 -0
  137. aiq/observability/exporter/file_exporter.py +33 -0
  138. aiq/observability/exporter/processing_exporter.py +269 -0
  139. aiq/observability/exporter/raw_exporter.py +52 -0
  140. aiq/observability/exporter/span_exporter.py +264 -0
  141. aiq/observability/exporter_manager.py +335 -0
  142. aiq/observability/mixin/__init__.py +14 -0
  143. aiq/observability/mixin/batch_config_mixin.py +26 -0
  144. aiq/observability/mixin/collector_config_mixin.py +23 -0
  145. aiq/observability/mixin/file_mixin.py +288 -0
  146. aiq/observability/mixin/file_mode.py +23 -0
  147. aiq/observability/mixin/resource_conflict_mixin.py +134 -0
  148. aiq/observability/mixin/serialize_mixin.py +61 -0
  149. aiq/observability/mixin/type_introspection_mixin.py +183 -0
  150. aiq/observability/processor/__init__.py +14 -0
  151. aiq/observability/processor/batching_processor.py +316 -0
  152. aiq/observability/processor/intermediate_step_serializer.py +28 -0
  153. aiq/observability/processor/processor.py +68 -0
  154. aiq/observability/register.py +32 -116
  155. aiq/observability/utils/__init__.py +14 -0
  156. aiq/observability/utils/dict_utils.py +236 -0
  157. aiq/observability/utils/time_utils.py +31 -0
  158. aiq/profiler/calc/__init__.py +14 -0
  159. aiq/profiler/calc/calc_runner.py +623 -0
  160. aiq/profiler/calc/calculations.py +288 -0
  161. aiq/profiler/calc/data_models.py +176 -0
  162. aiq/profiler/calc/plot.py +345 -0
  163. aiq/profiler/data_models.py +2 -0
  164. aiq/profiler/profile_runner.py +16 -13
  165. aiq/runtime/loader.py +8 -2
  166. aiq/runtime/runner.py +23 -9
  167. aiq/runtime/session.py +16 -5
  168. aiq/tool/chat_completion.py +74 -0
  169. aiq/tool/code_execution/README.md +152 -0
  170. aiq/tool/code_execution/code_sandbox.py +151 -72
  171. aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
  172. aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
  173. aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
  174. aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
  175. aiq/tool/code_execution/register.py +7 -3
  176. aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
  177. aiq/tool/mcp/exceptions.py +142 -0
  178. aiq/tool/mcp/mcp_client.py +17 -3
  179. aiq/tool/mcp/mcp_tool.py +1 -1
  180. aiq/tool/register.py +1 -0
  181. aiq/tool/server_tools.py +2 -2
  182. aiq/utils/exception_handlers/automatic_retries.py +289 -0
  183. aiq/utils/exception_handlers/mcp.py +211 -0
  184. aiq/utils/io/model_processing.py +28 -0
  185. aiq/utils/log_utils.py +37 -0
  186. aiq/utils/string_utils.py +38 -0
  187. aiq/utils/type_converter.py +18 -2
  188. aiq/utils/type_utils.py +87 -0
  189. {aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/METADATA +37 -9
  190. {aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/RECORD +195 -80
  191. {aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/entry_points.txt +3 -0
  192. aiq/front_ends/fastapi/websocket.py +0 -153
  193. aiq/observability/async_otel_listener.py +0 -470
  194. {aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/WHEEL +0 -0
  195. {aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
  196. {aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/licenses/LICENSE.md +0 -0
  197. {aiqtoolkit-1.2.0a20250706.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/top_level.txt +0 -0
@@ -14,6 +14,7 @@
14
14
  # limitations under the License.
15
15
 
16
16
  import logging
17
+ import math
17
18
  from collections.abc import Sequence
18
19
 
19
20
  from pydantic import BaseModel
@@ -53,9 +54,9 @@ class RAGEvaluator:
53
54
  if self.input_obj_field and hasattr(input_obj, self.input_obj_field):
54
55
  # If input_obj_field is specified, return the value of that field
55
56
  return str(getattr(input_obj, self.input_obj_field, ""))
56
- else:
57
- # If no input_obj_field is specified, return the string representation of the model
58
- return input_obj.model_dump_json()
57
+
58
+ # If no input_obj_field is specified, return the string representation of the model
59
+ return input_obj.model_dump_json()
59
60
 
60
61
  if isinstance(input_obj, dict):
61
62
  # If input_obj is a dict, return the JSON string representation
@@ -105,19 +106,29 @@ class RAGEvaluator:
105
106
  return EvalOutput(average_score=0.0, eval_output_items=[])
106
107
 
107
108
  scores: list[dict[str, float]] = results_dataset.scores
109
+
110
+ # If Ragas returned no scores, return empty output to avoid downstream errors
108
111
  if not scores:
109
- logger.error("Ragas returned empty score list")
112
+ logger.warning("Ragas returned empty score list")
110
113
  return EvalOutput(average_score=0.0, eval_output_items=[])
111
114
 
112
- # Convert from list of dicts to dict of lists
113
- scores_dict = {metric: [score[metric] for score in scores] for metric in scores[0]}
115
+ def _nan_to_zero(v: float | None) -> float:
116
+ """Convert NaN or None to 0.0 for safe arithmetic/serialization."""
117
+ return 0.0 if v is None or (isinstance(v, float) and math.isnan(v)) else v
118
+
119
+ # Convert from list of dicts to dict of lists, coercing NaN/None to 0.0
120
+ scores_dict = {metric: [_nan_to_zero(score.get(metric)) for score in scores] for metric in scores[0]}
121
+ first_metric_name = list(scores_dict.keys())[0] if scores_dict else None
114
122
 
115
- # Compute the average of each metric
116
- average_scores = {metric: sum(values) / len(values) for metric, values in scores_dict.items()}
123
+ # Compute the average of each metric, guarding against empty lists
124
+ average_scores = {
125
+ metric: (sum(values) / len(values) if values else 0.0)
126
+ for metric, values in scores_dict.items()
127
+ }
117
128
 
118
- # Extract the first (and only) metric's average score
119
- first_avg_score = next(iter(average_scores.values()))
120
- first_metric_name = list(scores_dict.keys())[0]
129
+ first_avg_score = average_scores.get(list(scores_dict.keys())[0], 0.0)
130
+ if isinstance(first_avg_score, float) and math.isnan(first_avg_score):
131
+ first_avg_score = 0.0
121
132
 
122
133
  df = results_dataset.to_pandas()
123
134
  # Get id from eval_input if df size matches number of eval_input_items
@@ -130,7 +141,7 @@ class RAGEvaluator:
130
141
  eval_output_items = [
131
142
  EvalOutputItem(
132
143
  id=ids[i],
133
- score=getattr(row, first_metric_name, 0.0),
144
+ score=_nan_to_zero(getattr(row, first_metric_name, 0.0) if first_metric_name else 0.0),
134
145
  reasoning={
135
146
  key:
136
147
  getattr(row, key, None) # Use getattr to safely access attributes
@@ -24,6 +24,7 @@ from tqdm import tqdm
24
24
  from aiq.data_models.api_server import AIQResponseIntermediateStep
25
25
  from aiq.data_models.intermediate_step import IntermediateStep
26
26
  from aiq.data_models.intermediate_step import IntermediateStepPayload
27
+ from aiq.data_models.invocation_node import InvocationNode
27
28
  from aiq.eval.config import EvaluationRunConfig
28
29
  from aiq.eval.evaluator.evaluator_model import EvalInput
29
30
  from aiq.eval.evaluator.evaluator_model import EvalInputItem
@@ -81,8 +82,12 @@ class EvaluationRemoteWorkflowHandler:
81
82
  step_data = json.loads(line[len(INTERMEDIATE_DATA_PREFIX):])
82
83
  response_intermediate = AIQResponseIntermediateStep.model_validate(step_data)
83
84
  # The payload is expected to be IntermediateStepPayload
84
- intermediate_step = IntermediateStep(
85
- payload=IntermediateStepPayload.model_validate_json(response_intermediate.payload))
85
+ payload = IntermediateStepPayload.model_validate_json(response_intermediate.payload)
86
+ intermediate_step = IntermediateStep(parent_id="remote",
87
+ function_ancestry=InvocationNode(
88
+ function_name=payload.name or "remote_function",
89
+ function_id=payload.UUID or "remote_function_id"),
90
+ payload=payload)
86
91
  intermediate_steps.append(intermediate_step)
87
92
  except (json.JSONDecodeError, ValidationError) as e:
88
93
  logger.error("Failed to parse intermediate step: %s", e)
@@ -0,0 +1,14 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
@@ -0,0 +1,39 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import typing
17
+
18
+ from pydantic import BaseModel
19
+
20
+ from aiq.eval.config import EvaluationRunConfig
21
+ from aiq.eval.config import EvaluationRunOutput
22
+
23
+
24
+ class MultiEvaluationRunConfig(BaseModel):
25
+ """
26
+ Parameters used for a multi-evaluation run.
27
+ This includes a dict of configs. The key is an id of any type.
28
+ Each pass loads the config, applies the overrides and runs to completion
29
+ before the next pass starts.
30
+ """
31
+ configs: dict[typing.Any, EvaluationRunConfig]
32
+
33
+
34
+ class MultiEvaluationRunOutput(BaseModel):
35
+ """
36
+ Output of a multi-evaluation run.
37
+ The results per-pass are accumulated in the evaluation_run_outputs dict.
38
+ """
39
+ evaluation_run_outputs: dict[typing.Any, EvaluationRunOutput]
@@ -0,0 +1,54 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import copy
17
+ import typing
18
+
19
+ from aiq.eval.config import EvaluationRunConfig
20
+ from aiq.eval.config import EvaluationRunOutput
21
+ from aiq.eval.evaluate import EvaluationRun
22
+ from aiq.eval.runners.config import MultiEvaluationRunConfig
23
+
24
+
25
+ class MultiEvaluationRunner:
26
+ """
27
+ Run a multi-evaluation run.
28
+ """
29
+
30
+ def __init__(self, config: MultiEvaluationRunConfig):
31
+ """
32
+ Initialize a multi-evaluation run.
33
+ """
34
+ self.config = config
35
+ self.evaluation_run_outputs: dict[typing.Any, EvaluationRunOutput] = {}
36
+
37
+ async def run_all(self):
38
+ """
39
+ Run all evaluations defined by the overrides.
40
+ """
41
+ for id, config in self.config.configs.items():
42
+ output = await self.run_single_evaluation(id, config)
43
+ self.evaluation_run_outputs[id] = output
44
+
45
+ return self.evaluation_run_outputs
46
+
47
+ async def run_single_evaluation(self, id: typing.Any, config: EvaluationRunConfig) -> EvaluationRunOutput:
48
+ """
49
+ Run a single evaluation and return the output.
50
+ """
51
+ # copy the config in case the caller is using the same config for multiple evaluations
52
+ config_copy = copy.deepcopy(config)
53
+ evaluation_run = EvaluationRun(config_copy)
54
+ return await evaluation_run.run_and_evaluate()
aiq/eval/usage_stats.py CHANGED
@@ -28,8 +28,14 @@ class UsageStatsItem(BaseModel):
28
28
  usage_stats_per_llm: dict[str, UsageStatsLLM]
29
29
  total_tokens: int | None = None
30
30
  runtime: float = 0.0
31
+ min_timestamp: float = 0.0
32
+ max_timestamp: float = 0.0
33
+ llm_latency: float = 0.0
31
34
 
32
35
 
33
36
  class UsageStats(BaseModel):
34
37
  # key is the id or input_obj from EvalInputItem
38
+ min_timestamp: float = 0.0
39
+ max_timestamp: float = 0.0
40
+ total_runtime: float = 0.0
35
41
  usage_stats_items: dict[typing.Any, UsageStatsItem] = {}
@@ -152,10 +152,14 @@ class WeaveEvaluationIntegration: # pylint: disable=too-many-public-methods
152
152
  def _log_profiler_metrics(self, profiler_results: ProfilerResults, usage_stats: UsageStats) -> dict[str, Any]:
153
153
  """Log profiler metrics to Weave."""
154
154
  profile_metrics = {}
155
+ if profiler_results.llm_latency_ci:
156
+ profile_metrics["llm_latency_p95"] = profiler_results.llm_latency_ci.p95
155
157
  if profiler_results.workflow_runtime_metrics:
156
- profile_metrics["wf_p95_runtime"] = profiler_results.workflow_runtime_metrics.p95
158
+ profile_metrics["wf_runtime_p95"] = profiler_results.workflow_runtime_metrics.p95
157
159
 
158
160
  # TODO:get the LLM tokens from the usage stats and log them
161
+ profile_metrics["total_runtime"] = usage_stats.total_runtime
162
+
159
163
  return profile_metrics
160
164
 
161
165
  def log_summary(self,
File without changes
File without changes
@@ -0,0 +1,130 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import functools
17
+ import inspect
18
+ import logging
19
+ from typing import Any
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ BASE_WARNING_MESSAGE = ("is experimental and the API may change in future releases. "
24
+ "Future versions may introduce breaking changes without notice.")
25
+
26
+ _warning_issued = set()
27
+
28
+
29
+ def issue_experimental_warning(function_name: str,
30
+ feature_name: str | None = None,
31
+ metadata: dict[str, Any] | None = None):
32
+ """
33
+ Log a warning message that the function is experimental.
34
+
35
+ A warning is emitted only once per function. When a ``metadata`` dict
36
+ is supplied, it is appended to the log entry to provide extra context
37
+ (e.g., version, author, feature flag).
38
+ """
39
+ if function_name not in _warning_issued:
40
+ if (feature_name):
41
+ warning_message = f"The {feature_name} feature {BASE_WARNING_MESSAGE}"
42
+ else:
43
+ warning_message = f"This function {BASE_WARNING_MESSAGE}"
44
+
45
+ warning_message += f" Function: {function_name}"
46
+
47
+ if (metadata):
48
+ warning_message += f" | Metadata: {metadata}"
49
+
50
+ # Issue warning and save function name to avoid duplicate warnings
51
+ logger.warning(warning_message)
52
+
53
+ _warning_issued.add(function_name)
54
+
55
+
56
+ def aiq_experimental(func: Any = None, *, feature_name: str | None = None, metadata: dict[str, Any] | None = None):
57
+ """
58
+ Decorator that can wrap any type of function (sync, async, generator,
59
+ async generator) and logs a warning that the function is experimental.
60
+
61
+ Args:
62
+ func: The function to be decorated.
63
+ feature_name: Optional name of the feature that is experimental. If provided, the warning will be
64
+ prefixed with "The <feature_name> feature is experimental".
65
+ metadata: Optional dictionary of metadata to log with the warning. This can include information
66
+ like version, author, etc. If provided, the metadata will be
67
+ logged alongside the experimental warning.
68
+ """
69
+ function_name: str = f"{func.__module__}.{func.__qualname__}" if func else "<unknown_function>"
70
+
71
+ # If called as @track_function(...) but not immediately passed a function
72
+ if func is None:
73
+
74
+ def decorator_wrapper(actual_func):
75
+ return aiq_experimental(actual_func, feature_name=feature_name, metadata=metadata)
76
+
77
+ return decorator_wrapper
78
+
79
+ # --- Validate metadata ---
80
+ if metadata is not None:
81
+ if not isinstance(metadata, dict):
82
+ raise TypeError("metadata must be a dict[str, Any].")
83
+ if any(not isinstance(k, str) for k in metadata.keys()):
84
+ raise TypeError("All metadata keys must be strings.")
85
+
86
+ # --- Now detect the function type and wrap accordingly ---
87
+ if inspect.isasyncgenfunction(func):
88
+ # ---------------------
89
+ # ASYNC GENERATOR
90
+ # ---------------------
91
+
92
+ @functools.wraps(func)
93
+ async def async_gen_wrapper(*args, **kwargs):
94
+ issue_experimental_warning(function_name, feature_name, metadata)
95
+ async for item in func(*args, **kwargs):
96
+ yield item # yield the original item
97
+
98
+ return async_gen_wrapper
99
+
100
+ if inspect.iscoroutinefunction(func):
101
+ # ---------------------
102
+ # ASYNC FUNCTION
103
+ # ---------------------
104
+ @functools.wraps(func)
105
+ async def async_wrapper(*args, **kwargs):
106
+ issue_experimental_warning(function_name, feature_name, metadata)
107
+ result = await func(*args, **kwargs)
108
+ return result
109
+
110
+ return async_wrapper
111
+
112
+ if inspect.isgeneratorfunction(func):
113
+ # ---------------------
114
+ # SYNC GENERATOR
115
+ # ---------------------
116
+ @functools.wraps(func)
117
+ def sync_gen_wrapper(*args, **kwargs):
118
+ issue_experimental_warning(function_name, feature_name, metadata)
119
+ for item in func(*args, **kwargs):
120
+ yield item # yield the original item
121
+
122
+ return sync_gen_wrapper
123
+
124
+ @functools.wraps(func)
125
+ def sync_wrapper(*args, **kwargs):
126
+ issue_experimental_warning(function_name, feature_name, metadata)
127
+ result = func(*args, **kwargs)
128
+ return result
129
+
130
+ return sync_wrapper
File without changes
@@ -0,0 +1,147 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import asyncio
17
+ import logging
18
+ import re
19
+
20
+ from aiq.builder.builder import Builder
21
+ from aiq.builder.framework_enum import LLMFrameworkEnum
22
+ from aiq.cli.register_workflow import register_its_strategy
23
+ from aiq.data_models.its_strategy import ITSStrategyBaseConfig
24
+ from aiq.experimental.inference_time_scaling.models.editor_config import IterativePlanRefinementConfig
25
+ from aiq.experimental.inference_time_scaling.models.its_item import ITSItem
26
+ from aiq.experimental.inference_time_scaling.models.stage_enums import PipelineTypeEnum
27
+ from aiq.experimental.inference_time_scaling.models.stage_enums import StageTypeEnum
28
+ from aiq.experimental.inference_time_scaling.models.strategy_base import StrategyBase
29
+ from aiq.utils.io.model_processing import remove_r1_think_tags
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ class IterativePlanRefinementEditor(StrategyBase):
35
+ """
36
+ A planner that generates an initial plan, then refines it multiple times
37
+ using the same LLM. Each iteration updates the plan to (hopefully) be better.
38
+ """
39
+
40
+ def __init__(self, config: ITSStrategyBaseConfig) -> None:
41
+ super().__init__(config)
42
+ self.llm_bound = None
43
+
44
+ def supported_pipeline_types(self) -> [PipelineTypeEnum]:
45
+ return [PipelineTypeEnum.PLANNING]
46
+
47
+ def stage_type(self) -> StageTypeEnum:
48
+ return StageTypeEnum.EDITING
49
+
50
+ async def build_components(self, builder: Builder) -> None:
51
+ """
52
+ Build the components required for the iterative planner.
53
+ """
54
+ logger.debug("Building components for IterativePlanRefinementEditor")
55
+ self.llm_bound = await builder.get_llm(self.config.editor_llm, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
56
+
57
+ async def refine_single(self, prompt: str, context: str, its_item: ITSItem, prompt_idx: int) -> ITSItem:
58
+ from langchain_core.language_models import BaseChatModel
59
+ from langchain_core.prompts import PromptTemplate
60
+
61
+ if not isinstance(self.llm_bound, BaseChatModel):
62
+ raise ValueError("editor_llm must be a BaseChatModel instance for iterative plan refinement.")
63
+
64
+ llm: BaseChatModel = self.llm_bound
65
+
66
+ # Refinement loop
67
+ refinement_template = PromptTemplate(
68
+ template=self.config.refinement_template,
69
+ input_variables=["current_plan", "context", "original_prompt"],
70
+ validate_template=True,
71
+ )
72
+
73
+ current_plan = its_item.plan
74
+ for iteration in range(1, self.config.num_iterations + 1):
75
+ logger.info("Refinement iteration %d / %d for prompt %d", iteration, self.config.num_iterations, prompt_idx)
76
+ refine_prompt = (await refinement_template.ainvoke({
77
+ "current_plan": current_plan, "context": context, "original_prompt": prompt
78
+ })).to_string()
79
+
80
+ refine_response = await llm.ainvoke(refine_prompt)
81
+ refined_plan = remove_r1_think_tags(
82
+ refine_response.content if hasattr(refine_response, 'content') else str(refine_response))
83
+ refined_plan = re.sub(r'(?i)^\s*EDITED PLAN:\s*', '', refined_plan).strip()
84
+ if refined_plan:
85
+ current_plan = refined_plan
86
+ else:
87
+ logger.warning("Refinement iteration %d for prompt %d produced an empty plan; keeping existing plan.",
88
+ iteration,
89
+ prompt_idx)
90
+
91
+ logger.info("IterativePlanRefinementPlanner produced a final plan after %d iterations.",
92
+ self.config.num_iterations)
93
+
94
+ its_item.plan = current_plan
95
+ # Return a single final plan
96
+ return its_item
97
+
98
+ async def ainvoke(self,
99
+ items: list[ITSItem],
100
+ original_prompt: str | None = None,
101
+ agent_context: str | None = None,
102
+ **kwargs) -> list[ITSItem]:
103
+ """
104
+ Runs the iterative plan refinement process on the provided planning items.
105
+
106
+ Each planning item is refined in parallel the configured number of times. Default is 3.
107
+
108
+ Args:
109
+ items (list[ITSItem]): The planning items to refine.
110
+ original_prompt (str): The original prompt used to generate the plans.
111
+ agent_context (str): The context for the agent.
112
+
113
+ Returns:
114
+ list[ITSItem]: The refined planning items.
115
+ """
116
+
117
+ if not original_prompt or not agent_context:
118
+ raise ValueError("Arguments original_prompt and agent_context must be provdied.")
119
+
120
+ # Generate feedback for each planning item concurrently
121
+ tasks = [
122
+ self.refine_single(prompt=original_prompt, context=agent_context, its_item=item, prompt_idx=i + 1)
123
+ for i, item in enumerate(items)
124
+ ]
125
+
126
+ # Run the tasks concurrently and gather results
127
+ refined_planning_items = await asyncio.gather(*tasks)
128
+
129
+ return refined_planning_items
130
+
131
+
132
+ @register_its_strategy(config_type=IterativePlanRefinementConfig)
133
+ async def register_iterative_plan_refinement_editor(config: IterativePlanRefinementConfig, builder: Builder):
134
+ """
135
+ Register the IterativePlanRefinementEditor strategy.
136
+
137
+ Args:
138
+ config (IterativePlanRefinementConfig): The configuration for the strategy.
139
+
140
+ Returns:
141
+ IterativePlanRefinementEditor: The registered strategy instance.
142
+ """
143
+
144
+ editor = IterativePlanRefinementEditor(config)
145
+ await editor.build_components(builder=builder)
146
+
147
+ yield editor