PyPI - aiqtoolkit - Versions diffs - 1.2.0a20250707__py3-none-any.whl → 1.2.0a20250730__py3-none-any.whl - Mend

aiqtoolkit 1.2.0a20250707py3-none-any.whl → 1.2.0a20250730py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (197) hide show

aiq/agent/base.py +171 -8
aiq/agent/dual_node.py +1 -1
aiq/agent/react_agent/agent.py +113 -113
aiq/agent/react_agent/register.py +31 -14
aiq/agent/rewoo_agent/agent.py +36 -35
aiq/agent/rewoo_agent/register.py +2 -2
aiq/agent/tool_calling_agent/agent.py +3 -7
aiq/authentication/__init__.py +14 -0
aiq/authentication/api_key/__init__.py +14 -0
aiq/authentication/api_key/api_key_auth_provider.py +92 -0
aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
aiq/authentication/api_key/register.py +26 -0
aiq/authentication/exceptions/__init__.py +14 -0
aiq/authentication/exceptions/api_key_exceptions.py +38 -0
aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
aiq/authentication/exceptions/call_back_exceptions.py +38 -0
aiq/authentication/exceptions/request_exceptions.py +54 -0
aiq/authentication/http_basic_auth/__init__.py +0 -0
aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
aiq/authentication/http_basic_auth/register.py +30 -0
aiq/authentication/interfaces.py +93 -0
aiq/authentication/oauth2/__init__.py +14 -0
aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
aiq/authentication/oauth2/register.py +25 -0
aiq/authentication/register.py +21 -0
aiq/builder/builder.py +64 -2
aiq/builder/component_utils.py +16 -3
aiq/builder/context.py +26 -0
aiq/builder/eval_builder.py +43 -2
aiq/builder/function.py +32 -4
aiq/builder/function_base.py +1 -1
aiq/builder/intermediate_step_manager.py +6 -8
aiq/builder/user_interaction_manager.py +3 -0
aiq/builder/workflow.py +23 -18
aiq/builder/workflow_builder.py +420 -73
aiq/cli/commands/info/list_mcp.py +103 -16
aiq/cli/commands/sizing/__init__.py +14 -0
aiq/cli/commands/sizing/calc.py +294 -0
aiq/cli/commands/sizing/sizing.py +27 -0
aiq/cli/commands/start.py +1 -0
aiq/cli/entrypoint.py +2 -0
aiq/cli/register_workflow.py +80 -0
aiq/cli/type_registry.py +151 -30
aiq/data_models/api_server.py +117 -11
aiq/data_models/authentication.py +231 -0
aiq/data_models/common.py +35 -7
aiq/data_models/component.py +17 -9
aiq/data_models/component_ref.py +33 -0
aiq/data_models/config.py +60 -3
aiq/data_models/embedder.py +1 -0
aiq/data_models/function_dependencies.py +8 -0
aiq/data_models/interactive.py +10 -1
aiq/data_models/intermediate_step.py +15 -5
aiq/data_models/its_strategy.py +30 -0
aiq/data_models/llm.py +1 -0
aiq/data_models/memory.py +1 -0
aiq/data_models/object_store.py +44 -0
aiq/data_models/retry_mixin.py +35 -0
aiq/data_models/span.py +187 -0
aiq/data_models/telemetry_exporter.py +2 -2
aiq/embedder/nim_embedder.py +2 -1
aiq/embedder/openai_embedder.py +2 -1
aiq/eval/config.py +19 -1
aiq/eval/dataset_handler/dataset_handler.py +75 -1
aiq/eval/evaluate.py +53 -10
aiq/eval/rag_evaluator/evaluate.py +23 -12
aiq/eval/remote_workflow.py +7 -2
aiq/eval/runners/__init__.py +14 -0
aiq/eval/runners/config.py +39 -0
aiq/eval/runners/multi_eval_runner.py +54 -0
aiq/eval/usage_stats.py +6 -0
aiq/eval/utils/weave_eval.py +5 -1
aiq/experimental/__init__.py +0 -0
aiq/experimental/decorators/__init__.py +0 -0
aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
aiq/experimental/inference_time_scaling/__init__.py +0 -0
aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
aiq/experimental/inference_time_scaling/register.py +36 -0
aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
aiq/front_ends/console/authentication_flow_handler.py +233 -0
aiq/front_ends/console/console_front_end_plugin.py +11 -2
aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
aiq/front_ends/fastapi/fastapi_front_end_config.py +20 -0
aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +353 -31
aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
aiq/front_ends/fastapi/main.py +2 -0
aiq/front_ends/fastapi/message_handler.py +102 -84
aiq/front_ends/fastapi/step_adaptor.py +2 -1
aiq/llm/aws_bedrock_llm.py +2 -1
aiq/llm/nim_llm.py +2 -1
aiq/llm/openai_llm.py +2 -1
aiq/object_store/__init__.py +20 -0
aiq/object_store/in_memory_object_store.py +74 -0
aiq/object_store/interfaces.py +84 -0
aiq/object_store/models.py +36 -0
aiq/object_store/register.py +20 -0
aiq/observability/__init__.py +14 -0
aiq/observability/exporter/__init__.py +14 -0
aiq/observability/exporter/base_exporter.py +449 -0
aiq/observability/exporter/exporter.py +78 -0
aiq/observability/exporter/file_exporter.py +33 -0
aiq/observability/exporter/processing_exporter.py +269 -0
aiq/observability/exporter/raw_exporter.py +52 -0
aiq/observability/exporter/span_exporter.py +264 -0
aiq/observability/exporter_manager.py +335 -0
aiq/observability/mixin/__init__.py +14 -0
aiq/observability/mixin/batch_config_mixin.py +26 -0
aiq/observability/mixin/collector_config_mixin.py +23 -0
aiq/observability/mixin/file_mixin.py +288 -0
aiq/observability/mixin/file_mode.py +23 -0
aiq/observability/mixin/resource_conflict_mixin.py +134 -0
aiq/observability/mixin/serialize_mixin.py +61 -0
aiq/observability/mixin/type_introspection_mixin.py +183 -0
aiq/observability/processor/__init__.py +14 -0
aiq/observability/processor/batching_processor.py +316 -0
aiq/observability/processor/intermediate_step_serializer.py +28 -0
aiq/observability/processor/processor.py +68 -0
aiq/observability/register.py +32 -116
aiq/observability/utils/__init__.py +14 -0
aiq/observability/utils/dict_utils.py +236 -0
aiq/observability/utils/time_utils.py +31 -0
aiq/profiler/calc/__init__.py +14 -0
aiq/profiler/calc/calc_runner.py +623 -0
aiq/profiler/calc/calculations.py +288 -0
aiq/profiler/calc/data_models.py +176 -0
aiq/profiler/calc/plot.py +345 -0
aiq/profiler/data_models.py +2 -0
aiq/profiler/profile_runner.py +16 -13
aiq/runtime/loader.py +8 -2
aiq/runtime/runner.py +23 -9
aiq/runtime/session.py +16 -5
aiq/tool/chat_completion.py +74 -0
aiq/tool/code_execution/README.md +152 -0
aiq/tool/code_execution/code_sandbox.py +151 -72
aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
aiq/tool/code_execution/register.py +7 -3
aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
aiq/tool/mcp/exceptions.py +142 -0
aiq/tool/mcp/mcp_client.py +17 -3
aiq/tool/mcp/mcp_tool.py +1 -1
aiq/tool/register.py +1 -0
aiq/tool/server_tools.py +2 -2
aiq/utils/exception_handlers/automatic_retries.py +289 -0
aiq/utils/exception_handlers/mcp.py +211 -0
aiq/utils/io/model_processing.py +28 -0
aiq/utils/log_utils.py +37 -0
aiq/utils/string_utils.py +38 -0
aiq/utils/type_converter.py +18 -2
aiq/utils/type_utils.py +87 -0
{aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/METADATA +37 -9
{aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/RECORD +195 -80
{aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/entry_points.txt +3 -0
aiq/front_ends/fastapi/websocket.py +0 -153
aiq/observability/async_otel_listener.py +0 -470
{aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/WHEEL +0 -0
{aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
{aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/licenses/LICENSE.md +0 -0
{aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0a20250730.dist-info}/top_level.txt +0 -0

aiq/data_models/its_strategy.py ADDED Viewed

@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import typing
+from .common import BaseModelRegistryTag
+from .common import TypedBaseModel
+class ITSStrategyBaseConfig(TypedBaseModel, BaseModelRegistryTag):
+    """
+    Base configuration class for Inference Time Scaling (ITS) strategy.
+    This class is used to define the structure of ITS strategy configurations.
+    """
+    pass
+ITSStrategyBaseConfigT = typing.TypeVar("ITSStrategyBaseConfigT", bound=ITSStrategyBaseConfig)

aiq/data_models/llm.py CHANGED Viewed

@@ -20,6 +20,7 @@ from .common import TypedBaseModel
 class LLMBaseConfig(TypedBaseModel, BaseModelRegistryTag):
+    """Base configuration for LLM providers."""
     pass

aiq/data_models/memory.py CHANGED Viewed

@@ -20,6 +20,7 @@ from .common import TypedBaseModel
 class MemoryBaseConfig(TypedBaseModel, BaseModelRegistryTag):
+    """ The base level config object for a memory object. Memories provide an interface for storing and retrieving. """
     pass

aiq/data_models/object_store.py ADDED Viewed

@@ -0,0 +1,44 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import typing
+from .common import BaseModelRegistryTag
+from .common import TypedBaseModel
+class ObjectStoreBaseConfig(TypedBaseModel, BaseModelRegistryTag):
+    pass
+ObjectStoreBaseConfigT = typing.TypeVar("ObjectStoreBaseConfigT", bound=ObjectStoreBaseConfig)
+class KeyAlreadyExistsError(Exception):
+    def __init__(self, key: str, additional_message: str | None = None):
+        parts = [f"Key already exists: {key}."]
+        if additional_message:
+            parts.append(additional_message)
+        super().__init__(" ".join(parts))
+class NoSuchKeyError(Exception):
+    def __init__(self, key: str, additional_message: str | None = None):
+        parts = [f"No object found with key: {key}."]
+        if additional_message:
+            parts.append(additional_message)
+        super().__init__(" ".join(parts))

aiq/data_models/retry_mixin.py ADDED Viewed

@@ -0,0 +1,35 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pydantic import BaseModel
+from pydantic import Field
+class RetryMixin(BaseModel):
+    """Mixin class for retry configuration."""
+    do_auto_retry: bool = Field(default=True,
+                                description="Whether to automatically retry method calls"
+                                " that fail with a retryable error.",
+                                exclude=True)
+    num_retries: int = Field(default=5,
+                             description="Number of times to retry a method call that fails"
+                             " with a retryable error.",
+                             exclude=True)
+    retry_on_status_codes: list[int | str] = Field(default_factory=lambda: [429, 500, 502, 503, 504],
+                                                   description="List of HTTP status codes that should trigger a retry.",
+                                                   exclude=True)
+    retry_on_errors: list[str] | None = Field(default_factory=lambda: ["Too Many Requests"],
+                                              description="List of error substrings that should trigger a retry.",
+                                              exclude=True)

aiq/data_models/span.py ADDED Viewed

@@ -0,0 +1,187 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import time
+import uuid
+from enum import Enum
+from typing import Any
+from pydantic import BaseModel
+from pydantic import Field
+from pydantic import field_validator
+logger = logging.getLogger(__name__)
+class SpanKind(Enum):
+    LLM = "LLM"
+    TOOL = "TOOL"
+    WORKFLOW = "WORKFLOW"
+    TASK = "TASK"
+    FUNCTION = "FUNCTION"
+    CUSTOM = "CUSTOM"
+    SPAN = "SPAN"
+    EMBEDDER = "EMBEDDER"
+    RETRIEVER = "RETRIEVER"
+    AGENT = "AGENT"
+    RERANKER = "RERANKER"
+    GUARDRAIL = "GUARDRAIL"
+    EVALUATOR = "EVALUATOR"
+    UNKNOWN = "UNKNOWN"
+EVENT_TYPE_TO_SPAN_KIND_MAP = {
+    "LLM_START": SpanKind.LLM,
+    "LLM_END": SpanKind.LLM,
+    "LLM_NEW_TOKEN": SpanKind.LLM,
+    "TOOL_START": SpanKind.TOOL,
+    "TOOL_END": SpanKind.TOOL,
+    "WORKFLOW_START": SpanKind.WORKFLOW,
+    "WORKFLOW_END": SpanKind.WORKFLOW,
+    "TASK_START": SpanKind.TASK,
+    "TASK_END": SpanKind.TASK,
+    "FUNCTION_START": SpanKind.FUNCTION,
+    "FUNCTION_END": SpanKind.FUNCTION,
+    "CUSTOM_START": SpanKind.CUSTOM,
+    "CUSTOM_END": SpanKind.CUSTOM,
+    "SPAN_START": SpanKind.SPAN,
+    "SPAN_END": SpanKind.SPAN,
+    "EMBEDDER_START": SpanKind.EMBEDDER,
+    "EMBEDDER_END": SpanKind.EMBEDDER,
+    "RETRIEVER_START": SpanKind.RETRIEVER,
+    "RETRIEVER_END": SpanKind.RETRIEVER,
+    "AGENT_START": SpanKind.AGENT,
+    "AGENT_END": SpanKind.AGENT,
+    "RERANKER_START": SpanKind.RERANKER,
+    "RERANKER_END": SpanKind.RERANKER,
+    "GUARDRAIL_START": SpanKind.GUARDRAIL,
+    "GUARDRAIL_END": SpanKind.GUARDRAIL,
+    "EVALUATOR_START": SpanKind.EVALUATOR,
+    "EVALUATOR_END": SpanKind.EVALUATOR,
+}
+def event_type_to_span_kind(event_type: str) -> SpanKind:
+    """Convert an event type to a span kind.
+    Args:
+        event_type (str): The event type to convert.
+    Returns:
+        SpanKind: The span kind.
+    """
+    return EVENT_TYPE_TO_SPAN_KIND_MAP.get(event_type, SpanKind.UNKNOWN)
+class SpanAttributes(Enum):
+    AIQ_SPAN_KIND = "aiq.span.kind"
+    INPUT_VALUE = "input.value"
+    INPUT_MIME_TYPE = "input.mime_type"
+    LLM_TOKEN_COUNT_PROMPT = "llm.token_count.prompt"
+    LLM_TOKEN_COUNT_COMPLETION = "llm.token_count.completion"
+    LLM_TOKEN_COUNT_TOTAL = "llm.token_count.total"
+    OUTPUT_VALUE = "output.value"
+    OUTPUT_MIME_TYPE = "output.mime_type"
+    AIQ_USAGE_NUM_LLM_CALLS = "aiq.usage.num_llm_calls"
+    AIQ_USAGE_SECONDS_BETWEEN_CALLS = "aiq.usage.seconds_between_calls"
+    AIQ_USAGE_TOKEN_COUNT_PROMPT = "aiq.usage.token_count.prompt"
+    AIQ_USAGE_TOKEN_COUNT_COMPLETION = "aiq.usage.token_count.completion"
+    AIQ_USAGE_TOKEN_COUNT_TOTAL = "aiq.usage.token_count.total"
+    AIQ_EVENT_TYPE = "aiq.event_type"
+class MimeTypes(Enum):
+    TEXT = "text/plain"
+    JSON = "application/json"
+class SpanStatusCode(Enum):
+    OK = "OK"
+    ERROR = "ERROR"
+    UNSET = "UNSET"
+class SpanEvent(BaseModel):
+    timestamp: float = Field(default_factory=lambda: int(time.time() * 1e9), description="The timestamp of the event.")
+    name: str = Field(description="The name of the event.")
+    attributes: dict[str, Any] = Field(default_factory=dict, description="The attributes of the event.")
+class SpanStatus(BaseModel):
+    code: SpanStatusCode = Field(default=SpanStatusCode.OK, description="The status code of the span.")
+    message: str | None = Field(default=None, description="The status message of the span.")
+class SpanContext(BaseModel):
+    trace_id: int = Field(default_factory=lambda: uuid.uuid4().int, description="The 128-bit trace ID of the span.")
+    span_id: int = Field(default_factory=lambda: uuid.uuid4().int & ((1 << 64) - 1),
+                         description="The 64-bit span ID of the span.")
+class Span(BaseModel):
+    name: str = Field(description="The name of the span.")
+    context: SpanContext | None = Field(default=None, description="The context of the span.")
+    parent: "Span | None" = Field(default=None, description="The parent span of the span.")
+    start_time: int = Field(default_factory=lambda: int(time.time() * 1e9), description="The start time of the span.")
+    end_time: int | None = Field(default=None, description="The end time of the span.")
+    attributes: dict[str, Any] = Field(default_factory=dict, description="The attributes of the span.")
+    events: list[SpanEvent] = Field(default_factory=list, description="The events of the span.")
+    status: SpanStatus = Field(default_factory=SpanStatus, description="The status of the span.")
+    @field_validator('context', mode='before')
+    @classmethod
+    def set_default_context(cls, v: SpanContext | None) -> SpanContext:
+        """Set the default context if the context is not provided.
+        Args:
+            v (SpanContext | None): The context to set.
+        Returns:
+            SpanContext: The context.
+        """
+        if v is None:
+            return SpanContext()
+        return v
+    def set_attribute(self, key: str, value: Any) -> None:
+        """Set the attribute of the span.
+        Args:
+            key (str): The key of the attribute.
+            value (Any): The value of the attribute.
+        """
+        self.attributes[key] = value
+    def add_event(self, name: str, attributes: dict[str, Any] | None = None) -> None:
+        """Add an event to the span.
+        Args:
+            name (str): The name of the event.
+            attributes (dict[str, Any] | None): The attributes of the event.
+        """
+        if attributes is None:
+            attributes = {}
+        self.events = self.events + [SpanEvent(name=name, attributes=attributes)]
+    def end(self, end_time: int | None = None) -> None:
+        """End the span.
+        Args:
+            end_time (int | None): The end time of the span.
+        """
+        if end_time is None:
+            end_time = int(time.time() * 1e9)
+        self.end_time = end_time

aiq/data_models/telemetry_exporter.py CHANGED Viewed

@@ -15,8 +15,8 @@
 import typing
-from .common import BaseModelRegistryTag
-from .common import TypedBaseModel
+from aiq.data_models.common import BaseModelRegistryTag
+from aiq.data_models.common import TypedBaseModel
 class TelemetryExporterBaseConfig(TypedBaseModel, BaseModelRegistryTag):

aiq/embedder/nim_embedder.py CHANGED Viewed

@@ -24,6 +24,7 @@ from aiq.builder.builder import Builder
 from aiq.builder.embedder import EmbedderProviderInfo
 from aiq.cli.register_workflow import register_embedder_provider
 from aiq.data_models.embedder import EmbedderBaseConfig
+from aiq.data_models.retry_mixin import RetryMixin
 allowed_truncate_values = ["NONE", "START", "END"]
@@ -37,7 +38,7 @@ def option_in_allowed_values(v):
 TruncationOption = typing.Annotated[str, AfterValidator(option_in_allowed_values)]
-class NIMEmbedderModelConfig(EmbedderBaseConfig, name="nim"):
+class NIMEmbedderModelConfig(EmbedderBaseConfig, RetryMixin, name="nim"):
     """A NVIDIA Inference Microservice (NIM) embedder provider to be used with an embedder client."""
     api_key: str | None = Field(default=None, description="NVIDIA API key to interact with hosted NIM.")

aiq/embedder/openai_embedder.py CHANGED Viewed

@@ -21,9 +21,10 @@ from aiq.builder.builder import Builder
 from aiq.builder.embedder import EmbedderProviderInfo
 from aiq.cli.register_workflow import register_embedder_provider
 from aiq.data_models.embedder import EmbedderBaseConfig
+from aiq.data_models.retry_mixin import RetryMixin
-class OpenAIEmbedderModelConfig(EmbedderBaseConfig, name="openai"):
+class OpenAIEmbedderModelConfig(EmbedderBaseConfig, RetryMixin, name="openai"):
     """An OpenAI LLM provider to be used with an LLM client."""
     model_config = ConfigDict(protected_namespaces=())

aiq/eval/config.py CHANGED Viewed

@@ -17,13 +17,18 @@ from pathlib import Path
 from pydantic import BaseModel
+from aiq.eval.evaluator.evaluator_model import EvalInput
+from aiq.eval.evaluator.evaluator_model import EvalOutput
+from aiq.eval.usage_stats import UsageStats
+from aiq.profiler.data_models import ProfilerResults
 class EvaluationRunConfig(BaseModel):
     """
     Parameters used for a single evaluation run.
     """
     config_file: Path
-    dataset: str | None  # dataset file path can be specified in the config file
+    dataset: str | None = None  # dataset file path can be specified in the config file
     result_json_path: str = "$"
     skip_workflow: bool = False
     skip_completed_entries: bool = False
@@ -31,6 +36,14 @@ class EvaluationRunConfig(BaseModel):
     endpoint_timeout: int = 300
     reps: int = 1
     override: tuple[tuple[str, str], ...] = ()
+    # If false, the output will not be written to the output directory. This is
+    # useful when running evaluation via another tool.
+    write_output: bool = True
+    # if true, the dataset is adjusted to a multiple of the concurrency
+    adjust_dataset_size: bool = False
+    # number of passes at each concurrency, if 0 the dataset is adjusted to a multiple of the
+    # concurrency. The is only used if adjust_dataset_size is true
+    num_passes: int = 0
 class EvaluationRunOutput(BaseModel):
@@ -40,3 +53,8 @@ class EvaluationRunOutput(BaseModel):
     workflow_output_file: Path | None
     evaluator_output_files: list[Path]
     workflow_interrupted: bool
+    eval_input: EvalInput
+    evaluation_results: list[tuple[str, EvalOutput]]
+    usage_stats: UsageStats | None = None
+    profiler_results: ProfilerResults

aiq/eval/dataset_handler/dataset_handler.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # limitations under the License.
 import json
+import math
 import pandas as pd
@@ -33,12 +34,23 @@ class DatasetHandler:
     One DatasetHandler object is needed for each dataset to be evaluated.
     """
-    def __init__(self, dataset_config: EvalDatasetConfig, reps: int):
+    def __init__(self,
+                 dataset_config: EvalDatasetConfig,
+                 reps: int,
+                 concurrency: int,
+                 num_passes: int | None = None,
+                 adjust_dataset_size: bool = False):
         from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
         self.dataset_config = dataset_config
         self.dataset_filter = DatasetFilter(dataset_config.filter)
         self.reps = reps
+        # number of passes at specific concurrency
+        self.concurrency = concurrency
+        self.num_passes = num_passes
+        self.adjust_dataset_size = adjust_dataset_size
         # Helpers
         self.intermediate_step_adapter = IntermediateStepAdapter()
@@ -109,6 +121,63 @@ class DatasetHandler:
         return input_df
+    def adjust_dataset(self, input_df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Adjust the dataset so its length is a multiple of concurrency.
+        If num_passes > 0:
+            dataset size is adjusted to concurrency * num_passes
+        else:
+            dataset size is adjusted to the largest multiple of concurrency
+            that is less than or equal to the current dataset size
+        """
+        if self.concurrency <= 0:
+            raise ValueError("Concurrency must be > 0")
+        if self.num_passes < 0:
+            raise ValueError("num_passes must be >= 0")
+        original_size = input_df.shape[0]
+        # Calculate target size
+        if self.num_passes > 0:
+            # When num_passes is specified, always use concurrency * num_passes
+            # This respects the user's intent for exact number of passes
+            target_size = self.concurrency * self.num_passes
+        else:
+            # When num_passes = 0, use the largest multiple of concurrency <= original_size
+            # If original_size < concurrency, we need at least concurrency rows
+            if original_size >= self.concurrency:
+                target_size = (original_size // self.concurrency) * self.concurrency
+            else:
+                target_size = self.concurrency
+        if target_size == 0:
+            raise ValueError("Input dataset too small for even one batch at given concurrency.")
+        id_col = self.dataset_config.id_key
+        # If we need more rows than we have, replicate the dataset
+        if original_size < target_size:
+            # Clean existing _rep suffix if present
+            input_df[id_col] = input_df[id_col].astype(str).str.replace(r"_rep\d+$", "", regex=True)
+            # Calculate how many complete copies we need
+            copies_needed = math.ceil(target_size / original_size)
+            # Create the replicated dataframe
+            replicated_dfs = []
+            for i in range(copies_needed):
+                df_copy = input_df.copy()
+                if i > 0:  # Add suffix to all but the first copy
+                    df_copy[id_col] = df_copy[id_col].astype(str) + f"_rep{i}"
+                replicated_dfs.append(df_copy)
+            input_df = pd.concat(replicated_dfs, ignore_index=True)
+        # Return exactly the target size
+        return input_df.head(target_size)
     def get_eval_input_from_dataset(self, dataset: str) -> EvalInput:
         # read the dataset and convert it to EvalInput
@@ -127,9 +196,14 @@ class DatasetHandler:
         input_df = self.dataset_filter.apply_filters(input_df)
         input_df.drop_duplicates(subset=[self.dataset_config.id_key], inplace=True)
+        if self.reps > 1 and self.adjust_dataset_size:
+            raise ValueError("reps and adjust_dataset_size are mutually exclusive")
         # If more than one repetition is needed, replicate the rows
         if self.reps > 1:
             input_df = self.setup_reps(input_df)
+        elif self.adjust_dataset_size:
+            input_df = self.adjust_dataset(input_df)
         # Convert the DataFrame to a list of EvalInput objects
         return self.get_eval_input_from_df(input_df)

aiq/eval/evaluate.py CHANGED Viewed

@@ -99,12 +99,34 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
             max_timestamp = max(step.event_timestamp for step in item.trajectory)
             runtime = max_timestamp - min_timestamp
         else:
+            min_timestamp = 0.0
+            max_timestamp = 0.0
             runtime = 0.0
+        # find llm latency by calculating p95 of all llm calls
+        llm_latencies = []
+        previous_llm_start_time = None
+        for step in steps:
+            if step.event_type == "LLM_START":
+                previous_llm_start_time = step.event_timestamp
+            elif step.event_type == "LLM_END" and previous_llm_start_time is not None:
+                llm_latencies.append(step.event_timestamp - previous_llm_start_time)
+                previous_llm_start_time = None
+        # Calculate p95 LLM latency (or 0 if no LLM calls)
+        if llm_latencies:
+            import numpy as np
+            llm_latency = float(np.percentile(llm_latencies, 95))
+        else:
+            llm_latency = 0.0
         # add the usage stats to the usage stats dict
         self.usage_stats.usage_stats_items[item.id] = UsageStatsItem(usage_stats_per_llm=usage_stats_per_llm,
                                                                      runtime=runtime,
-                                                                     total_tokens=total_tokens)
+                                                                     total_tokens=total_tokens,
+                                                                     min_timestamp=min_timestamp,
+                                                                     max_timestamp=max_timestamp,
+                                                                     llm_latency=llm_latency)
         return self.usage_stats.usage_stats_items[item.id]
     async def run_workflow_local(self, session_manager: AIQSessionManager):
@@ -221,7 +243,9 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
         for input_item in self.eval_input.eval_input_items:
             all_stats.append(input_item.trajectory)
-        profiler_runner = ProfilerRunner(self.eval_config.general.profiler, self.eval_config.general.output_dir)
+        profiler_runner = ProfilerRunner(self.eval_config.general.profiler,
+                                         self.eval_config.general.output_dir,
+                                         write_output=self.config.write_output)
         return await profiler_runner.run(all_stats)
@@ -308,6 +332,11 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
             self.evaluator_output_files.append(output_file)
             logger.info("Evaluation results written to %s", output_file)
+    def publish_output(self, dataset_handler: DatasetHandler, profiler_results: ProfilerResults):
+        """Publish the output"""
+        if self.config.write_output:
+            self.write_output(dataset_handler, profiler_results)
         if self.workflow_interrupted:
             # Issue a warning if the workflow was not completed on all datasets
             msg = ("Workflow execution was interrupted due to an error. The results may be incomplete. "
@@ -415,7 +444,11 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
                 workflow_interrupted=self.workflow_interrupted,
             )
-        dataset_handler = DatasetHandler(dataset_config=dataset_config, reps=self.config.reps)
+        dataset_handler = DatasetHandler(dataset_config=dataset_config,
+                                         reps=self.config.reps,
+                                         concurrency=self.eval_config.general.max_concurrency,
+                                         num_passes=self.config.num_passes,
+                                         adjust_dataset_size=self.config.adjust_dataset_size)
         self.eval_input = dataset_handler.get_eval_input_from_dataset(self.config.dataset)
         if not self.eval_input.eval_input_items:
             logger.info("Dataset is empty. Nothing to evaluate.")
@@ -447,8 +480,16 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
         # Profile the workflow
         profiler_results = await self.profile_workflow()
-        # Write the results to the output directory
-        self.write_output(dataset_handler, profiler_results)
+        # compute total runtime
+        if self.usage_stats.usage_stats_items:
+            self.usage_stats.total_runtime = max(self.usage_stats.usage_stats_items.values(),
+                                                 key=lambda x: x.max_timestamp).max_timestamp - \
+                min(self.usage_stats.usage_stats_items.values(), key=lambda x: x.min_timestamp).min_timestamp
+        else:
+            self.usage_stats.total_runtime = 0.0
+        # Publish the results
+        self.publish_output(dataset_handler, profiler_results)
         # Run custom scripts and upload evaluation outputs to S3
         if self.eval_config.general.output:
@@ -456,8 +497,10 @@ class EvaluationRun:  # pylint: disable=too-many-public-methods
             output_uploader.run_custom_scripts()
             await output_uploader.upload_directory()
-        return EvaluationRunOutput(
-            workflow_output_file=self.workflow_output_file,
-            evaluator_output_files=self.evaluator_output_files,
-            workflow_interrupted=self.workflow_interrupted,
-        )
+        return EvaluationRunOutput(workflow_output_file=self.workflow_output_file,
+                                   evaluator_output_files=self.evaluator_output_files,
+                                   workflow_interrupted=self.workflow_interrupted,
+                                   eval_input=self.eval_input,
+                                   evaluation_results=self.evaluation_results,
+                                   usage_stats=self.usage_stats,
+                                   profiler_results=profiler_results)

aiqtoolkit 1.2.0a20250707__py3-none-any.whl → 1.2.0a20250730__py3-none-any.whl

Potentially problematic release.

aiqtoolkit 1.2.0a20250707py3-none-any.whl → 1.2.0a20250730py3-none-any.whl