PyPI - aiqtoolkit - Versions diffs - 1.2.0.dev0__py3-none-any.whl → 1.2.0rc2__py3-none-any.whl - Mend

aiqtoolkit 1.2.0.dev0py3-none-any.whl → 1.2.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (220) hide show

aiq/agent/base.py +170 -8
aiq/agent/dual_node.py +1 -1
aiq/agent/react_agent/agent.py +146 -112
aiq/agent/react_agent/prompt.py +1 -6
aiq/agent/react_agent/register.py +36 -35
aiq/agent/rewoo_agent/agent.py +36 -35
aiq/agent/rewoo_agent/register.py +2 -2
aiq/agent/tool_calling_agent/agent.py +3 -7
aiq/agent/tool_calling_agent/register.py +1 -1
aiq/authentication/__init__.py +14 -0
aiq/authentication/api_key/__init__.py +14 -0
aiq/authentication/api_key/api_key_auth_provider.py +92 -0
aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
aiq/authentication/api_key/register.py +26 -0
aiq/authentication/exceptions/__init__.py +14 -0
aiq/authentication/exceptions/api_key_exceptions.py +38 -0
aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
aiq/authentication/exceptions/call_back_exceptions.py +38 -0
aiq/authentication/exceptions/request_exceptions.py +54 -0
aiq/authentication/http_basic_auth/__init__.py +0 -0
aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
aiq/authentication/http_basic_auth/register.py +30 -0
aiq/authentication/interfaces.py +93 -0
aiq/authentication/oauth2/__init__.py +14 -0
aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
aiq/authentication/oauth2/register.py +25 -0
aiq/authentication/register.py +21 -0
aiq/builder/builder.py +64 -2
aiq/builder/component_utils.py +16 -3
aiq/builder/context.py +37 -0
aiq/builder/eval_builder.py +43 -2
aiq/builder/function.py +44 -12
aiq/builder/function_base.py +1 -1
aiq/builder/intermediate_step_manager.py +6 -8
aiq/builder/user_interaction_manager.py +3 -0
aiq/builder/workflow.py +23 -18
aiq/builder/workflow_builder.py +421 -61
aiq/cli/commands/info/list_mcp.py +103 -16
aiq/cli/commands/sizing/__init__.py +14 -0
aiq/cli/commands/sizing/calc.py +294 -0
aiq/cli/commands/sizing/sizing.py +27 -0
aiq/cli/commands/start.py +2 -1
aiq/cli/entrypoint.py +2 -0
aiq/cli/register_workflow.py +80 -0
aiq/cli/type_registry.py +151 -30
aiq/data_models/api_server.py +124 -12
aiq/data_models/authentication.py +231 -0
aiq/data_models/common.py +35 -7
aiq/data_models/component.py +17 -9
aiq/data_models/component_ref.py +33 -0
aiq/data_models/config.py +60 -3
aiq/data_models/dataset_handler.py +2 -1
aiq/data_models/embedder.py +1 -0
aiq/data_models/evaluate.py +23 -0
aiq/data_models/function_dependencies.py +8 -0
aiq/data_models/interactive.py +10 -1
aiq/data_models/intermediate_step.py +38 -5
aiq/data_models/its_strategy.py +30 -0
aiq/data_models/llm.py +1 -0
aiq/data_models/memory.py +1 -0
aiq/data_models/object_store.py +44 -0
aiq/data_models/profiler.py +1 -0
aiq/data_models/retry_mixin.py +35 -0
aiq/data_models/span.py +187 -0
aiq/data_models/telemetry_exporter.py +2 -2
aiq/embedder/nim_embedder.py +2 -1
aiq/embedder/openai_embedder.py +2 -1
aiq/eval/config.py +19 -1
aiq/eval/dataset_handler/dataset_handler.py +87 -2
aiq/eval/evaluate.py +208 -27
aiq/eval/evaluator/base_evaluator.py +73 -0
aiq/eval/evaluator/evaluator_model.py +1 -0
aiq/eval/intermediate_step_adapter.py +11 -5
aiq/eval/rag_evaluator/evaluate.py +55 -15
aiq/eval/rag_evaluator/register.py +6 -1
aiq/eval/remote_workflow.py +7 -2
aiq/eval/runners/__init__.py +14 -0
aiq/eval/runners/config.py +39 -0
aiq/eval/runners/multi_eval_runner.py +54 -0
aiq/eval/trajectory_evaluator/evaluate.py +22 -65
aiq/eval/tunable_rag_evaluator/evaluate.py +150 -168
aiq/eval/tunable_rag_evaluator/register.py +2 -0
aiq/eval/usage_stats.py +41 -0
aiq/eval/utils/output_uploader.py +10 -1
aiq/eval/utils/weave_eval.py +184 -0
aiq/experimental/__init__.py +0 -0
aiq/experimental/decorators/__init__.py +0 -0
aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
aiq/experimental/inference_time_scaling/__init__.py +0 -0
aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
aiq/experimental/inference_time_scaling/register.py +36 -0
aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
aiq/front_ends/console/authentication_flow_handler.py +233 -0
aiq/front_ends/console/console_front_end_plugin.py +11 -2
aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
aiq/front_ends/fastapi/fastapi_front_end_config.py +93 -9
aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +537 -52
aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
aiq/front_ends/fastapi/job_store.py +47 -25
aiq/front_ends/fastapi/main.py +2 -0
aiq/front_ends/fastapi/message_handler.py +108 -89
aiq/front_ends/fastapi/step_adaptor.py +2 -1
aiq/llm/aws_bedrock_llm.py +57 -0
aiq/llm/nim_llm.py +2 -1
aiq/llm/openai_llm.py +3 -2
aiq/llm/register.py +1 -0
aiq/meta/pypi.md +12 -12
aiq/object_store/__init__.py +20 -0
aiq/object_store/in_memory_object_store.py +74 -0
aiq/object_store/interfaces.py +84 -0
aiq/object_store/models.py +36 -0
aiq/object_store/register.py +20 -0
aiq/observability/__init__.py +14 -0
aiq/observability/exporter/__init__.py +14 -0
aiq/observability/exporter/base_exporter.py +449 -0
aiq/observability/exporter/exporter.py +78 -0
aiq/observability/exporter/file_exporter.py +33 -0
aiq/observability/exporter/processing_exporter.py +269 -0
aiq/observability/exporter/raw_exporter.py +52 -0
aiq/observability/exporter/span_exporter.py +264 -0
aiq/observability/exporter_manager.py +335 -0
aiq/observability/mixin/__init__.py +14 -0
aiq/observability/mixin/batch_config_mixin.py +26 -0
aiq/observability/mixin/collector_config_mixin.py +23 -0
aiq/observability/mixin/file_mixin.py +288 -0
aiq/observability/mixin/file_mode.py +23 -0
aiq/observability/mixin/resource_conflict_mixin.py +134 -0
aiq/observability/mixin/serialize_mixin.py +61 -0
aiq/observability/mixin/type_introspection_mixin.py +183 -0
aiq/observability/processor/__init__.py +14 -0
aiq/observability/processor/batching_processor.py +316 -0
aiq/observability/processor/intermediate_step_serializer.py +28 -0
aiq/observability/processor/processor.py +68 -0
aiq/observability/register.py +36 -39
aiq/observability/utils/__init__.py +14 -0
aiq/observability/utils/dict_utils.py +236 -0
aiq/observability/utils/time_utils.py +31 -0
aiq/profiler/calc/__init__.py +14 -0
aiq/profiler/calc/calc_runner.py +623 -0
aiq/profiler/calc/calculations.py +288 -0
aiq/profiler/calc/data_models.py +176 -0
aiq/profiler/calc/plot.py +345 -0
aiq/profiler/callbacks/langchain_callback_handler.py +22 -10
aiq/profiler/data_models.py +24 -0
aiq/profiler/inference_metrics_model.py +3 -0
aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +8 -0
aiq/profiler/inference_optimization/data_models.py +2 -2
aiq/profiler/inference_optimization/llm_metrics.py +2 -2
aiq/profiler/profile_runner.py +61 -21
aiq/runtime/loader.py +9 -3
aiq/runtime/runner.py +23 -9
aiq/runtime/session.py +25 -7
aiq/runtime/user_metadata.py +2 -3
aiq/tool/chat_completion.py +74 -0
aiq/tool/code_execution/README.md +152 -0
aiq/tool/code_execution/code_sandbox.py +151 -72
aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
aiq/tool/code_execution/register.py +7 -3
aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
aiq/tool/mcp/exceptions.py +142 -0
aiq/tool/mcp/mcp_client.py +41 -6
aiq/tool/mcp/mcp_tool.py +3 -2
aiq/tool/register.py +1 -0
aiq/tool/server_tools.py +6 -3
aiq/utils/exception_handlers/automatic_retries.py +289 -0
aiq/utils/exception_handlers/mcp.py +211 -0
aiq/utils/io/model_processing.py +28 -0
aiq/utils/log_utils.py +37 -0
aiq/utils/string_utils.py +38 -0
aiq/utils/type_converter.py +18 -2
aiq/utils/type_utils.py +87 -0
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/METADATA +53 -21
aiqtoolkit-1.2.0rc2.dist-info/RECORD +436 -0
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/WHEEL +1 -1
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/entry_points.txt +3 -0
aiq/front_ends/fastapi/websocket.py +0 -148
aiq/observability/async_otel_listener.py +0 -429
aiqtoolkit-1.2.0.dev0.dist-info/RECORD +0 -316
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/licenses/LICENSE.md +0 -0
{aiqtoolkit-1.2.0.dev0.dist-info → aiqtoolkit-1.2.0rc2.dist-info}/top_level.txt +0 -0

aiq/data_models/interactive.py CHANGED Viewed

@@ -33,6 +33,7 @@ class HumanPromptModelType(str, Enum):
     RADIO = "radio"
     CHECKBOX = "checkbox"
     DROPDOWN = "dropdown"
+    OAUTH_CONSENT = "oauth_consent"
 class BinaryChoiceOptionsType(str, Enum):
@@ -145,6 +146,14 @@ class HumanPromptNotification(HumanPromptBase):
     input_type: typing.Literal[HumanPromptModelType.NOTIFICATION] = HumanPromptModelType.NOTIFICATION
+class _HumanPromptOAuthConsent(HumanPromptBase):
+    """
+    Represents an OAuth consent prompt interaction used to notify the UI to open the authentication page for completing
+    the consent flow.
+    """
+    input_type: typing.Literal[HumanPromptModelType.OAUTH_CONSENT] = HumanPromptModelType.OAUTH_CONSENT
 class HumanPromptBinary(HumanPromptBase):
     """
     Represents a binary interaction.
@@ -190,7 +199,7 @@ class HumanPromptDropdown(HumanPromptMultipleChoiceBase):
 HumanPrompt = typing.Annotated[HumanPromptText | HumanPromptNotification | HumanPromptBinary | HumanPromptRadio
-                               | HumanPromptCheckbox | HumanPromptDropdown,
+                               | HumanPromptCheckbox | HumanPromptDropdown | _HumanPromptOAuthConsent,
                                Discriminator("input_type")]

aiq/data_models/intermediate_step.py CHANGED Viewed

@@ -17,6 +17,7 @@ import time
 import typing
 import uuid
 from enum import Enum
+from typing import Literal
 from pydantic import BaseModel
 from pydantic import ConfigDict
@@ -82,6 +83,26 @@ class UsageInfo(BaseModel):
     seconds_between_calls: int = 0
+class ToolParameters(BaseModel):
+    properties: dict[str, typing.Any] = Field(..., description="The properties of the function parameters.")
+    required: list[str] = Field(default_factory=list, description="The required properties of the function parameters.")
+    type_: Literal["object"] = Field(default="object", description="The type of the function parameters.", alias="type")
+    additionalProperties: bool = Field(default=False,
+                                       description="Enable function parameters allow additional properties.")
+    strict: bool = Field(default=True, description="Ensure function calls reliably adhere to the function schema.")
+class ToolDetails(BaseModel):
+    name: str = Field(..., description="The name of the function.")
+    description: str = Field(..., description="The description of the function.")
+    parameters: ToolParameters = Field(..., description="The parameters of the function.")
+class ToolSchema(BaseModel):
+    type: Literal["function"] = Field(..., description="The type of the tool.")
+    function: ToolDetails = Field(..., description="The function details.")
 class TraceMetadata(BaseModel):
     chat_responses: typing.Any | None = None
     chat_inputs: typing.Any | None = None
@@ -91,6 +112,8 @@ class TraceMetadata(BaseModel):
     span_inputs: typing.Any | None = None
     span_outputs: typing.Any | None = None
     provided_metadata: typing.Any | None = None
+    tools_schema: list[ToolSchema] = Field(default_factory=list,
+                                           description="The schema of tools used in a tool calling request.")
     # Allow extra fields in the model_config to support derived models
     model_config = ConfigDict(extra="allow")
@@ -211,9 +234,23 @@ class IntermediateStep(BaseModel):
     # Allow extra fields in the model_config to support derived models
     model_config = ConfigDict(extra="forbid")
-    function_ancestry: InvocationNode | None = InvocationNode(function_name="N/A", function_id="N/A")
+    parent_id: str
+    """
+    The parent step ID for the current step. The parent ID is the ID of the last START step which has a different UUID
+    than the current step. This value is different from the function_ancestry.parent_id value which tracks the last
+    parent FUNCTION step. For the first START step, the parent_id is 'root'.
+    """
+    function_ancestry: InvocationNode
+    """
+    The function ancestry for the current step showing the current AIQ function that was being executed when the step
+    was created.
+    """
     payload: IntermediateStepPayload
+    """
+    The payload for the current step.
+    """
     # ===== Payload Properties =====
     @property
@@ -263,7 +300,3 @@ class IntermediateStep(BaseModel):
     @property
     def event_state(self) -> IntermediateStepState:
         return self.payload.event_state
-    @property
-    def parent_id(self) -> str | None:
-        return self.function_ancestry.function_id if self.function_ancestry else None

aiq/data_models/its_strategy.py ADDED Viewed

@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import typing
+from .common import BaseModelRegistryTag
+from .common import TypedBaseModel
+class ITSStrategyBaseConfig(TypedBaseModel, BaseModelRegistryTag):
+    """
+    Base configuration class for Inference Time Scaling (ITS) strategy.
+    This class is used to define the structure of ITS strategy configurations.
+    """
+    pass
+ITSStrategyBaseConfigT = typing.TypeVar("ITSStrategyBaseConfigT", bound=ITSStrategyBaseConfig)

aiq/data_models/llm.py CHANGED Viewed

@@ -20,6 +20,7 @@ from .common import TypedBaseModel
 class LLMBaseConfig(TypedBaseModel, BaseModelRegistryTag):
+    """Base configuration for LLM providers."""
     pass

aiq/data_models/memory.py CHANGED Viewed

@@ -20,6 +20,7 @@ from .common import TypedBaseModel
 class MemoryBaseConfig(TypedBaseModel, BaseModelRegistryTag):
+    """ The base level config object for a memory object. Memories provide an interface for storing and retrieving. """
     pass

aiq/data_models/object_store.py ADDED Viewed

@@ -0,0 +1,44 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import typing
+from .common import BaseModelRegistryTag
+from .common import TypedBaseModel
+class ObjectStoreBaseConfig(TypedBaseModel, BaseModelRegistryTag):
+    pass
+ObjectStoreBaseConfigT = typing.TypeVar("ObjectStoreBaseConfigT", bound=ObjectStoreBaseConfig)
+class KeyAlreadyExistsError(Exception):
+    def __init__(self, key: str, additional_message: str | None = None):
+        parts = [f"Key already exists: {key}."]
+        if additional_message:
+            parts.append(additional_message)
+        super().__init__(" ".join(parts))
+class NoSuchKeyError(Exception):
+    def __init__(self, key: str, additional_message: str | None = None):
+        parts = [f"No object found with key: {key}."]
+        if additional_message:
+            parts.append(additional_message)
+        super().__init__(" ".join(parts))

aiq/data_models/profiler.py CHANGED Viewed

@@ -42,6 +42,7 @@ class PrefixSpanConfig(BaseModel):
 class ProfilerConfig(BaseModel):
+    base_metrics: bool = False
     token_usage_forecast: bool = False
     token_uniqueness_forecast: bool = False
     workflow_runtime_forecast: bool = False

aiq/data_models/retry_mixin.py ADDED Viewed

@@ -0,0 +1,35 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pydantic import BaseModel
+from pydantic import Field
+class RetryMixin(BaseModel):
+    """Mixin class for retry configuration."""
+    do_auto_retry: bool = Field(default=True,
+                                description="Whether to automatically retry method calls"
+                                " that fail with a retryable error.",
+                                exclude=True)
+    num_retries: int = Field(default=5,
+                             description="Number of times to retry a method call that fails"
+                             " with a retryable error.",
+                             exclude=True)
+    retry_on_status_codes: list[int | str] = Field(default_factory=lambda: [429, 500, 502, 503, 504],
+                                                   description="List of HTTP status codes that should trigger a retry.",
+                                                   exclude=True)
+    retry_on_errors: list[str] | None = Field(default_factory=lambda: ["Too Many Requests"],
+                                              description="List of error substrings that should trigger a retry.",
+                                              exclude=True)

aiq/data_models/span.py ADDED Viewed

@@ -0,0 +1,187 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import time
+import uuid
+from enum import Enum
+from typing import Any
+from pydantic import BaseModel
+from pydantic import Field
+from pydantic import field_validator
+logger = logging.getLogger(__name__)
+class SpanKind(Enum):
+    LLM = "LLM"
+    TOOL = "TOOL"
+    WORKFLOW = "WORKFLOW"
+    TASK = "TASK"
+    FUNCTION = "FUNCTION"
+    CUSTOM = "CUSTOM"
+    SPAN = "SPAN"
+    EMBEDDER = "EMBEDDER"
+    RETRIEVER = "RETRIEVER"
+    AGENT = "AGENT"
+    RERANKER = "RERANKER"
+    GUARDRAIL = "GUARDRAIL"
+    EVALUATOR = "EVALUATOR"
+    UNKNOWN = "UNKNOWN"
+EVENT_TYPE_TO_SPAN_KIND_MAP = {
+    "LLM_START": SpanKind.LLM,
+    "LLM_END": SpanKind.LLM,
+    "LLM_NEW_TOKEN": SpanKind.LLM,
+    "TOOL_START": SpanKind.TOOL,
+    "TOOL_END": SpanKind.TOOL,
+    "WORKFLOW_START": SpanKind.WORKFLOW,
+    "WORKFLOW_END": SpanKind.WORKFLOW,
+    "TASK_START": SpanKind.TASK,
+    "TASK_END": SpanKind.TASK,
+    "FUNCTION_START": SpanKind.FUNCTION,
+    "FUNCTION_END": SpanKind.FUNCTION,
+    "CUSTOM_START": SpanKind.CUSTOM,
+    "CUSTOM_END": SpanKind.CUSTOM,
+    "SPAN_START": SpanKind.SPAN,
+    "SPAN_END": SpanKind.SPAN,
+    "EMBEDDER_START": SpanKind.EMBEDDER,
+    "EMBEDDER_END": SpanKind.EMBEDDER,
+    "RETRIEVER_START": SpanKind.RETRIEVER,
+    "RETRIEVER_END": SpanKind.RETRIEVER,
+    "AGENT_START": SpanKind.AGENT,
+    "AGENT_END": SpanKind.AGENT,
+    "RERANKER_START": SpanKind.RERANKER,
+    "RERANKER_END": SpanKind.RERANKER,
+    "GUARDRAIL_START": SpanKind.GUARDRAIL,
+    "GUARDRAIL_END": SpanKind.GUARDRAIL,
+    "EVALUATOR_START": SpanKind.EVALUATOR,
+    "EVALUATOR_END": SpanKind.EVALUATOR,
+}
+def event_type_to_span_kind(event_type: str) -> SpanKind:
+    """Convert an event type to a span kind.
+    Args:
+        event_type (str): The event type to convert.
+    Returns:
+        SpanKind: The span kind.
+    """
+    return EVENT_TYPE_TO_SPAN_KIND_MAP.get(event_type, SpanKind.UNKNOWN)
+class SpanAttributes(Enum):
+    AIQ_SPAN_KIND = "aiq.span.kind"
+    INPUT_VALUE = "input.value"
+    INPUT_MIME_TYPE = "input.mime_type"
+    LLM_TOKEN_COUNT_PROMPT = "llm.token_count.prompt"
+    LLM_TOKEN_COUNT_COMPLETION = "llm.token_count.completion"
+    LLM_TOKEN_COUNT_TOTAL = "llm.token_count.total"
+    OUTPUT_VALUE = "output.value"
+    OUTPUT_MIME_TYPE = "output.mime_type"
+    AIQ_USAGE_NUM_LLM_CALLS = "aiq.usage.num_llm_calls"
+    AIQ_USAGE_SECONDS_BETWEEN_CALLS = "aiq.usage.seconds_between_calls"
+    AIQ_USAGE_TOKEN_COUNT_PROMPT = "aiq.usage.token_count.prompt"
+    AIQ_USAGE_TOKEN_COUNT_COMPLETION = "aiq.usage.token_count.completion"
+    AIQ_USAGE_TOKEN_COUNT_TOTAL = "aiq.usage.token_count.total"
+    AIQ_EVENT_TYPE = "aiq.event_type"
+class MimeTypes(Enum):
+    TEXT = "text/plain"
+    JSON = "application/json"
+class SpanStatusCode(Enum):
+    OK = "OK"
+    ERROR = "ERROR"
+    UNSET = "UNSET"
+class SpanEvent(BaseModel):
+    timestamp: float = Field(default_factory=lambda: int(time.time() * 1e9), description="The timestamp of the event.")
+    name: str = Field(description="The name of the event.")
+    attributes: dict[str, Any] = Field(default_factory=dict, description="The attributes of the event.")
+class SpanStatus(BaseModel):
+    code: SpanStatusCode = Field(default=SpanStatusCode.OK, description="The status code of the span.")
+    message: str | None = Field(default=None, description="The status message of the span.")
+class SpanContext(BaseModel):
+    trace_id: int = Field(default_factory=lambda: uuid.uuid4().int, description="The 128-bit trace ID of the span.")
+    span_id: int = Field(default_factory=lambda: uuid.uuid4().int & ((1 << 64) - 1),
+                         description="The 64-bit span ID of the span.")
+class Span(BaseModel):
+    name: str = Field(description="The name of the span.")
+    context: SpanContext | None = Field(default=None, description="The context of the span.")
+    parent: "Span | None" = Field(default=None, description="The parent span of the span.")
+    start_time: int = Field(default_factory=lambda: int(time.time() * 1e9), description="The start time of the span.")
+    end_time: int | None = Field(default=None, description="The end time of the span.")
+    attributes: dict[str, Any] = Field(default_factory=dict, description="The attributes of the span.")
+    events: list[SpanEvent] = Field(default_factory=list, description="The events of the span.")
+    status: SpanStatus = Field(default_factory=SpanStatus, description="The status of the span.")
+    @field_validator('context', mode='before')
+    @classmethod
+    def set_default_context(cls, v: SpanContext | None) -> SpanContext:
+        """Set the default context if the context is not provided.
+        Args:
+            v (SpanContext | None): The context to set.
+        Returns:
+            SpanContext: The context.
+        """
+        if v is None:
+            return SpanContext()
+        return v
+    def set_attribute(self, key: str, value: Any) -> None:
+        """Set the attribute of the span.
+        Args:
+            key (str): The key of the attribute.
+            value (Any): The value of the attribute.
+        """
+        self.attributes[key] = value
+    def add_event(self, name: str, attributes: dict[str, Any] | None = None) -> None:
+        """Add an event to the span.
+        Args:
+            name (str): The name of the event.
+            attributes (dict[str, Any] | None): The attributes of the event.
+        """
+        if attributes is None:
+            attributes = {}
+        self.events = self.events + [SpanEvent(name=name, attributes=attributes)]
+    def end(self, end_time: int | None = None) -> None:
+        """End the span.
+        Args:
+            end_time (int | None): The end time of the span.
+        """
+        if end_time is None:
+            end_time = int(time.time() * 1e9)
+        self.end_time = end_time

aiq/data_models/telemetry_exporter.py CHANGED Viewed

@@ -15,8 +15,8 @@
 import typing
-from .common import BaseModelRegistryTag
-from .common import TypedBaseModel
+from aiq.data_models.common import BaseModelRegistryTag
+from aiq.data_models.common import TypedBaseModel
 class TelemetryExporterBaseConfig(TypedBaseModel, BaseModelRegistryTag):

aiq/embedder/nim_embedder.py CHANGED Viewed

@@ -24,6 +24,7 @@ from aiq.builder.builder import Builder
 from aiq.builder.embedder import EmbedderProviderInfo
 from aiq.cli.register_workflow import register_embedder_provider
 from aiq.data_models.embedder import EmbedderBaseConfig
+from aiq.data_models.retry_mixin import RetryMixin
 allowed_truncate_values = ["NONE", "START", "END"]
@@ -37,7 +38,7 @@ def option_in_allowed_values(v):
 TruncationOption = typing.Annotated[str, AfterValidator(option_in_allowed_values)]
-class NIMEmbedderModelConfig(EmbedderBaseConfig, name="nim"):
+class NIMEmbedderModelConfig(EmbedderBaseConfig, RetryMixin, name="nim"):
     """A NVIDIA Inference Microservice (NIM) embedder provider to be used with an embedder client."""
     api_key: str | None = Field(default=None, description="NVIDIA API key to interact with hosted NIM.")

aiq/embedder/openai_embedder.py CHANGED Viewed

@@ -21,9 +21,10 @@ from aiq.builder.builder import Builder
 from aiq.builder.embedder import EmbedderProviderInfo
 from aiq.cli.register_workflow import register_embedder_provider
 from aiq.data_models.embedder import EmbedderBaseConfig
+from aiq.data_models.retry_mixin import RetryMixin
-class OpenAIEmbedderModelConfig(EmbedderBaseConfig, name="openai"):
+class OpenAIEmbedderModelConfig(EmbedderBaseConfig, RetryMixin, name="openai"):
     """An OpenAI LLM provider to be used with an LLM client."""
     model_config = ConfigDict(protected_namespaces=())

aiq/eval/config.py CHANGED Viewed

@@ -17,13 +17,18 @@ from pathlib import Path
 from pydantic import BaseModel
+from aiq.eval.evaluator.evaluator_model import EvalInput
+from aiq.eval.evaluator.evaluator_model import EvalOutput
+from aiq.eval.usage_stats import UsageStats
+from aiq.profiler.data_models import ProfilerResults
 class EvaluationRunConfig(BaseModel):
     """
     Parameters used for a single evaluation run.
     """
     config_file: Path
-    dataset: str | None  # dataset file path can be specified in the config file
+    dataset: str | None = None  # dataset file path can be specified in the config file
     result_json_path: str = "$"
     skip_workflow: bool = False
     skip_completed_entries: bool = False
@@ -31,6 +36,14 @@ class EvaluationRunConfig(BaseModel):
     endpoint_timeout: int = 300
     reps: int = 1
     override: tuple[tuple[str, str], ...] = ()
+    # If false, the output will not be written to the output directory. This is
+    # useful when running evaluation via another tool.
+    write_output: bool = True
+    # if true, the dataset is adjusted to a multiple of the concurrency
+    adjust_dataset_size: bool = False
+    # number of passes at each concurrency, if 0 the dataset is adjusted to a multiple of the
+    # concurrency. The is only used if adjust_dataset_size is true
+    num_passes: int = 0
 class EvaluationRunOutput(BaseModel):
@@ -40,3 +53,8 @@ class EvaluationRunOutput(BaseModel):
     workflow_output_file: Path | None
     evaluator_output_files: list[Path]
     workflow_interrupted: bool
+    eval_input: EvalInput
+    evaluation_results: list[tuple[str, EvalOutput]]
+    usage_stats: UsageStats | None = None
+    profiler_results: ProfilerResults

aiq/eval/dataset_handler/dataset_handler.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # limitations under the License.
 import json
+import math
 import pandas as pd
@@ -33,12 +34,23 @@ class DatasetHandler:
     One DatasetHandler object is needed for each dataset to be evaluated.
     """
-    def __init__(self, dataset_config: EvalDatasetConfig, reps: int):
+    def __init__(self,
+                 dataset_config: EvalDatasetConfig,
+                 reps: int,
+                 concurrency: int,
+                 num_passes: int | None = None,
+                 adjust_dataset_size: bool = False):
         from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
         self.dataset_config = dataset_config
         self.dataset_filter = DatasetFilter(dataset_config.filter)
         self.reps = reps
+        # number of passes at specific concurrency
+        self.concurrency = concurrency
+        self.num_passes = num_passes
+        self.adjust_dataset_size = adjust_dataset_size
         # Helpers
         self.intermediate_step_adapter = IntermediateStepAdapter()
@@ -81,6 +93,7 @@ class DatasetHandler:
                 output_obj=row.get(self.generated_answer_key, "") if structured else "",
                 trajectory=row.get(self.trajectory_key, []) if structured else [],
                 expected_trajectory=row.get(self.expected_trajectory_key, []) if structured else [],
+                full_dataset_entry=row.to_dict(),
             )
         # if input dataframe is empty return an empty list
@@ -108,6 +121,63 @@ class DatasetHandler:
         return input_df
+    def adjust_dataset(self, input_df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Adjust the dataset so its length is a multiple of concurrency.
+        If num_passes > 0:
+            dataset size is adjusted to concurrency * num_passes
+        else:
+            dataset size is adjusted to the largest multiple of concurrency
+            that is less than or equal to the current dataset size
+        """
+        if self.concurrency <= 0:
+            raise ValueError("Concurrency must be > 0")
+        if self.num_passes < 0:
+            raise ValueError("num_passes must be >= 0")
+        original_size = input_df.shape[0]
+        # Calculate target size
+        if self.num_passes > 0:
+            # When num_passes is specified, always use concurrency * num_passes
+            # This respects the user's intent for exact number of passes
+            target_size = self.concurrency * self.num_passes
+        else:
+            # When num_passes = 0, use the largest multiple of concurrency <= original_size
+            # If original_size < concurrency, we need at least concurrency rows
+            if original_size >= self.concurrency:
+                target_size = (original_size // self.concurrency) * self.concurrency
+            else:
+                target_size = self.concurrency
+        if target_size == 0:
+            raise ValueError("Input dataset too small for even one batch at given concurrency.")
+        id_col = self.dataset_config.id_key
+        # If we need more rows than we have, replicate the dataset
+        if original_size < target_size:
+            # Clean existing _rep suffix if present
+            input_df[id_col] = input_df[id_col].astype(str).str.replace(r"_rep\d+$", "", regex=True)
+            # Calculate how many complete copies we need
+            copies_needed = math.ceil(target_size / original_size)
+            # Create the replicated dataframe
+            replicated_dfs = []
+            for i in range(copies_needed):
+                df_copy = input_df.copy()
+                if i > 0:  # Add suffix to all but the first copy
+                    df_copy[id_col] = df_copy[id_col].astype(str) + f"_rep{i}"
+                replicated_dfs.append(df_copy)
+            input_df = pd.concat(replicated_dfs, ignore_index=True)
+        # Return exactly the target size
+        return input_df.head(target_size)
     def get_eval_input_from_dataset(self, dataset: str) -> EvalInput:
         # read the dataset and convert it to EvalInput
@@ -126,9 +196,14 @@ class DatasetHandler:
         input_df = self.dataset_filter.apply_filters(input_df)
         input_df.drop_duplicates(subset=[self.dataset_config.id_key], inplace=True)
+        if self.reps > 1 and self.adjust_dataset_size:
+            raise ValueError("reps and adjust_dataset_size are mutually exclusive")
         # If more than one repetition is needed, replicate the rows
         if self.reps > 1:
             input_df = self.setup_reps(input_df)
+        elif self.adjust_dataset_size:
+            input_df = self.adjust_dataset(input_df)
         # Convert the DataFrame to a list of EvalInput objects
         return self.get_eval_input_from_df(input_df)
@@ -151,6 +226,16 @@ class DatasetHandler:
         allow re-running evaluation using the orignal config file and '--skip_workflow' option.
         """
+        def parse_if_json_string(value):
+            if isinstance(value, str):
+                try:
+                    return json.loads(value)
+                except json.JSONDecodeError:
+                    return value
+            if hasattr(value, "model_dump"):
+                return value.model_dump()
+            return value
         indent = 2
         if self.is_structured_input():
             # Extract structured data from EvalInputItems
@@ -164,6 +249,6 @@ class DatasetHandler:
             } for item in eval_input.eval_input_items]
         else:
             # Unstructured case: return only raw output objects as a JSON array
-            data = [json.loads(item.output_obj) for item in eval_input.eval_input_items]
+            data = [parse_if_json_string(item.output_obj) for item in eval_input.eval_input_items]
         return json.dumps(data, indent=indent, ensure_ascii=False, default=str)

aiqtoolkit 1.2.0.dev0__py3-none-any.whl → 1.2.0rc2__py3-none-any.whl

Potentially problematic release.

aiqtoolkit 1.2.0.dev0py3-none-any.whl → 1.2.0rc2py3-none-any.whl