aiqtoolkit 1.2.0a20250707__py3-none-any.whl → 1.2.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiqtoolkit might be problematic. Click here for more details.
- aiq/agent/base.py +170 -8
- aiq/agent/dual_node.py +1 -1
- aiq/agent/react_agent/agent.py +112 -111
- aiq/agent/react_agent/register.py +31 -14
- aiq/agent/rewoo_agent/agent.py +36 -35
- aiq/agent/rewoo_agent/register.py +2 -2
- aiq/agent/tool_calling_agent/agent.py +3 -7
- aiq/authentication/__init__.py +14 -0
- aiq/authentication/api_key/__init__.py +14 -0
- aiq/authentication/api_key/api_key_auth_provider.py +92 -0
- aiq/authentication/api_key/api_key_auth_provider_config.py +124 -0
- aiq/authentication/api_key/register.py +26 -0
- aiq/authentication/exceptions/__init__.py +14 -0
- aiq/authentication/exceptions/api_key_exceptions.py +38 -0
- aiq/authentication/exceptions/auth_code_grant_exceptions.py +86 -0
- aiq/authentication/exceptions/call_back_exceptions.py +38 -0
- aiq/authentication/exceptions/request_exceptions.py +54 -0
- aiq/authentication/http_basic_auth/__init__.py +0 -0
- aiq/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
- aiq/authentication/http_basic_auth/register.py +30 -0
- aiq/authentication/interfaces.py +93 -0
- aiq/authentication/oauth2/__init__.py +14 -0
- aiq/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
- aiq/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
- aiq/authentication/oauth2/register.py +25 -0
- aiq/authentication/register.py +21 -0
- aiq/builder/builder.py +64 -2
- aiq/builder/component_utils.py +16 -3
- aiq/builder/context.py +26 -0
- aiq/builder/eval_builder.py +43 -2
- aiq/builder/function.py +32 -4
- aiq/builder/function_base.py +1 -1
- aiq/builder/intermediate_step_manager.py +6 -8
- aiq/builder/user_interaction_manager.py +3 -0
- aiq/builder/workflow.py +23 -18
- aiq/builder/workflow_builder.py +420 -73
- aiq/cli/commands/info/list_mcp.py +103 -16
- aiq/cli/commands/sizing/__init__.py +14 -0
- aiq/cli/commands/sizing/calc.py +294 -0
- aiq/cli/commands/sizing/sizing.py +27 -0
- aiq/cli/commands/start.py +1 -0
- aiq/cli/entrypoint.py +2 -0
- aiq/cli/register_workflow.py +80 -0
- aiq/cli/type_registry.py +151 -30
- aiq/data_models/api_server.py +123 -11
- aiq/data_models/authentication.py +231 -0
- aiq/data_models/common.py +35 -7
- aiq/data_models/component.py +17 -9
- aiq/data_models/component_ref.py +33 -0
- aiq/data_models/config.py +60 -3
- aiq/data_models/embedder.py +1 -0
- aiq/data_models/function_dependencies.py +8 -0
- aiq/data_models/interactive.py +10 -1
- aiq/data_models/intermediate_step.py +15 -5
- aiq/data_models/its_strategy.py +30 -0
- aiq/data_models/llm.py +1 -0
- aiq/data_models/memory.py +1 -0
- aiq/data_models/object_store.py +44 -0
- aiq/data_models/retry_mixin.py +35 -0
- aiq/data_models/span.py +187 -0
- aiq/data_models/telemetry_exporter.py +2 -2
- aiq/embedder/nim_embedder.py +2 -1
- aiq/embedder/openai_embedder.py +2 -1
- aiq/eval/config.py +19 -1
- aiq/eval/dataset_handler/dataset_handler.py +75 -1
- aiq/eval/evaluate.py +53 -10
- aiq/eval/rag_evaluator/evaluate.py +23 -12
- aiq/eval/remote_workflow.py +7 -2
- aiq/eval/runners/__init__.py +14 -0
- aiq/eval/runners/config.py +39 -0
- aiq/eval/runners/multi_eval_runner.py +54 -0
- aiq/eval/usage_stats.py +6 -0
- aiq/eval/utils/weave_eval.py +5 -1
- aiq/experimental/__init__.py +0 -0
- aiq/experimental/decorators/__init__.py +0 -0
- aiq/experimental/decorators/experimental_warning_decorator.py +130 -0
- aiq/experimental/inference_time_scaling/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/editing/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/editing/iterative_plan_refinement_editor.py +147 -0
- aiq/experimental/inference_time_scaling/editing/llm_as_a_judge_editor.py +204 -0
- aiq/experimental/inference_time_scaling/editing/motivation_aware_summarization.py +107 -0
- aiq/experimental/inference_time_scaling/functions/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/functions/execute_score_select_function.py +105 -0
- aiq/experimental/inference_time_scaling/functions/its_tool_orchestration_function.py +205 -0
- aiq/experimental/inference_time_scaling/functions/its_tool_wrapper_function.py +146 -0
- aiq/experimental/inference_time_scaling/functions/plan_select_execute_function.py +224 -0
- aiq/experimental/inference_time_scaling/models/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/models/editor_config.py +132 -0
- aiq/experimental/inference_time_scaling/models/its_item.py +48 -0
- aiq/experimental/inference_time_scaling/models/scoring_config.py +112 -0
- aiq/experimental/inference_time_scaling/models/search_config.py +120 -0
- aiq/experimental/inference_time_scaling/models/selection_config.py +154 -0
- aiq/experimental/inference_time_scaling/models/stage_enums.py +43 -0
- aiq/experimental/inference_time_scaling/models/strategy_base.py +66 -0
- aiq/experimental/inference_time_scaling/models/tool_use_config.py +41 -0
- aiq/experimental/inference_time_scaling/register.py +36 -0
- aiq/experimental/inference_time_scaling/scoring/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/scoring/llm_based_agent_scorer.py +168 -0
- aiq/experimental/inference_time_scaling/scoring/llm_based_plan_scorer.py +168 -0
- aiq/experimental/inference_time_scaling/scoring/motivation_aware_scorer.py +111 -0
- aiq/experimental/inference_time_scaling/search/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/search/multi_llm_planner.py +128 -0
- aiq/experimental/inference_time_scaling/search/multi_query_retrieval_search.py +122 -0
- aiq/experimental/inference_time_scaling/search/single_shot_multi_plan_planner.py +128 -0
- aiq/experimental/inference_time_scaling/selection/__init__.py +0 -0
- aiq/experimental/inference_time_scaling/selection/best_of_n_selector.py +63 -0
- aiq/experimental/inference_time_scaling/selection/llm_based_agent_output_selector.py +131 -0
- aiq/experimental/inference_time_scaling/selection/llm_based_output_merging_selector.py +159 -0
- aiq/experimental/inference_time_scaling/selection/llm_based_plan_selector.py +128 -0
- aiq/experimental/inference_time_scaling/selection/threshold_selector.py +58 -0
- aiq/front_ends/console/authentication_flow_handler.py +233 -0
- aiq/front_ends/console/console_front_end_plugin.py +11 -2
- aiq/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
- aiq/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
- aiq/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
- aiq/front_ends/fastapi/fastapi_front_end_config.py +20 -0
- aiq/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
- aiq/front_ends/fastapi/fastapi_front_end_plugin.py +14 -1
- aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +353 -31
- aiq/front_ends/fastapi/html_snippets/__init__.py +14 -0
- aiq/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
- aiq/front_ends/fastapi/main.py +2 -0
- aiq/front_ends/fastapi/message_handler.py +102 -84
- aiq/front_ends/fastapi/step_adaptor.py +2 -1
- aiq/llm/aws_bedrock_llm.py +2 -1
- aiq/llm/nim_llm.py +2 -1
- aiq/llm/openai_llm.py +2 -1
- aiq/object_store/__init__.py +20 -0
- aiq/object_store/in_memory_object_store.py +74 -0
- aiq/object_store/interfaces.py +84 -0
- aiq/object_store/models.py +36 -0
- aiq/object_store/register.py +20 -0
- aiq/observability/__init__.py +14 -0
- aiq/observability/exporter/__init__.py +14 -0
- aiq/observability/exporter/base_exporter.py +449 -0
- aiq/observability/exporter/exporter.py +78 -0
- aiq/observability/exporter/file_exporter.py +33 -0
- aiq/observability/exporter/processing_exporter.py +269 -0
- aiq/observability/exporter/raw_exporter.py +52 -0
- aiq/observability/exporter/span_exporter.py +264 -0
- aiq/observability/exporter_manager.py +335 -0
- aiq/observability/mixin/__init__.py +14 -0
- aiq/observability/mixin/batch_config_mixin.py +26 -0
- aiq/observability/mixin/collector_config_mixin.py +23 -0
- aiq/observability/mixin/file_mixin.py +288 -0
- aiq/observability/mixin/file_mode.py +23 -0
- aiq/observability/mixin/resource_conflict_mixin.py +134 -0
- aiq/observability/mixin/serialize_mixin.py +61 -0
- aiq/observability/mixin/type_introspection_mixin.py +183 -0
- aiq/observability/processor/__init__.py +14 -0
- aiq/observability/processor/batching_processor.py +316 -0
- aiq/observability/processor/intermediate_step_serializer.py +28 -0
- aiq/observability/processor/processor.py +68 -0
- aiq/observability/register.py +32 -116
- aiq/observability/utils/__init__.py +14 -0
- aiq/observability/utils/dict_utils.py +236 -0
- aiq/observability/utils/time_utils.py +31 -0
- aiq/profiler/calc/__init__.py +14 -0
- aiq/profiler/calc/calc_runner.py +623 -0
- aiq/profiler/calc/calculations.py +288 -0
- aiq/profiler/calc/data_models.py +176 -0
- aiq/profiler/calc/plot.py +345 -0
- aiq/profiler/data_models.py +2 -0
- aiq/profiler/profile_runner.py +16 -13
- aiq/runtime/loader.py +8 -2
- aiq/runtime/runner.py +23 -9
- aiq/runtime/session.py +16 -5
- aiq/tool/chat_completion.py +74 -0
- aiq/tool/code_execution/README.md +152 -0
- aiq/tool/code_execution/code_sandbox.py +151 -72
- aiq/tool/code_execution/local_sandbox/.gitignore +1 -0
- aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +139 -24
- aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +3 -1
- aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +27 -2
- aiq/tool/code_execution/register.py +7 -3
- aiq/tool/code_execution/test_code_execution_sandbox.py +414 -0
- aiq/tool/mcp/exceptions.py +142 -0
- aiq/tool/mcp/mcp_client.py +17 -3
- aiq/tool/mcp/mcp_tool.py +1 -1
- aiq/tool/register.py +1 -0
- aiq/tool/server_tools.py +2 -2
- aiq/utils/exception_handlers/automatic_retries.py +289 -0
- aiq/utils/exception_handlers/mcp.py +211 -0
- aiq/utils/io/model_processing.py +28 -0
- aiq/utils/log_utils.py +37 -0
- aiq/utils/string_utils.py +38 -0
- aiq/utils/type_converter.py +18 -2
- aiq/utils/type_utils.py +87 -0
- {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/METADATA +37 -9
- {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/RECORD +195 -80
- {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/entry_points.txt +3 -0
- aiq/front_ends/fastapi/websocket.py +0 -153
- aiq/observability/async_otel_listener.py +0 -470
- {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/WHEEL +0 -0
- {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/licenses/LICENSE.md +0 -0
- {aiqtoolkit-1.2.0a20250707.dist-info → aiqtoolkit-1.2.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import typing
|
|
17
|
+
|
|
18
|
+
from .common import BaseModelRegistryTag
|
|
19
|
+
from .common import TypedBaseModel
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ITSStrategyBaseConfig(TypedBaseModel, BaseModelRegistryTag):
|
|
23
|
+
"""
|
|
24
|
+
Base configuration class for Inference Time Scaling (ITS) strategy.
|
|
25
|
+
This class is used to define the structure of ITS strategy configurations.
|
|
26
|
+
"""
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
ITSStrategyBaseConfigT = typing.TypeVar("ITSStrategyBaseConfigT", bound=ITSStrategyBaseConfig)
|
aiq/data_models/llm.py
CHANGED
aiq/data_models/memory.py
CHANGED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import typing
|
|
17
|
+
|
|
18
|
+
from .common import BaseModelRegistryTag
|
|
19
|
+
from .common import TypedBaseModel
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ObjectStoreBaseConfig(TypedBaseModel, BaseModelRegistryTag):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
ObjectStoreBaseConfigT = typing.TypeVar("ObjectStoreBaseConfigT", bound=ObjectStoreBaseConfig)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class KeyAlreadyExistsError(Exception):
|
|
30
|
+
|
|
31
|
+
def __init__(self, key: str, additional_message: str | None = None):
|
|
32
|
+
parts = [f"Key already exists: {key}."]
|
|
33
|
+
if additional_message:
|
|
34
|
+
parts.append(additional_message)
|
|
35
|
+
super().__init__(" ".join(parts))
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class NoSuchKeyError(Exception):
|
|
39
|
+
|
|
40
|
+
def __init__(self, key: str, additional_message: str | None = None):
|
|
41
|
+
parts = [f"No object found with key: {key}."]
|
|
42
|
+
if additional_message:
|
|
43
|
+
parts.append(additional_message)
|
|
44
|
+
super().__init__(" ".join(parts))
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel
|
|
17
|
+
from pydantic import Field
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class RetryMixin(BaseModel):
|
|
21
|
+
"""Mixin class for retry configuration."""
|
|
22
|
+
do_auto_retry: bool = Field(default=True,
|
|
23
|
+
description="Whether to automatically retry method calls"
|
|
24
|
+
" that fail with a retryable error.",
|
|
25
|
+
exclude=True)
|
|
26
|
+
num_retries: int = Field(default=5,
|
|
27
|
+
description="Number of times to retry a method call that fails"
|
|
28
|
+
" with a retryable error.",
|
|
29
|
+
exclude=True)
|
|
30
|
+
retry_on_status_codes: list[int | str] = Field(default_factory=lambda: [429, 500, 502, 503, 504],
|
|
31
|
+
description="List of HTTP status codes that should trigger a retry.",
|
|
32
|
+
exclude=True)
|
|
33
|
+
retry_on_errors: list[str] | None = Field(default_factory=lambda: ["Too Many Requests"],
|
|
34
|
+
description="List of error substrings that should trigger a retry.",
|
|
35
|
+
exclude=True)
|
aiq/data_models/span.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
import time
|
|
18
|
+
import uuid
|
|
19
|
+
from enum import Enum
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from pydantic import BaseModel
|
|
23
|
+
from pydantic import Field
|
|
24
|
+
from pydantic import field_validator
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SpanKind(Enum):
|
|
30
|
+
LLM = "LLM"
|
|
31
|
+
TOOL = "TOOL"
|
|
32
|
+
WORKFLOW = "WORKFLOW"
|
|
33
|
+
TASK = "TASK"
|
|
34
|
+
FUNCTION = "FUNCTION"
|
|
35
|
+
CUSTOM = "CUSTOM"
|
|
36
|
+
SPAN = "SPAN"
|
|
37
|
+
EMBEDDER = "EMBEDDER"
|
|
38
|
+
RETRIEVER = "RETRIEVER"
|
|
39
|
+
AGENT = "AGENT"
|
|
40
|
+
RERANKER = "RERANKER"
|
|
41
|
+
GUARDRAIL = "GUARDRAIL"
|
|
42
|
+
EVALUATOR = "EVALUATOR"
|
|
43
|
+
UNKNOWN = "UNKNOWN"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
EVENT_TYPE_TO_SPAN_KIND_MAP = {
|
|
47
|
+
"LLM_START": SpanKind.LLM,
|
|
48
|
+
"LLM_END": SpanKind.LLM,
|
|
49
|
+
"LLM_NEW_TOKEN": SpanKind.LLM,
|
|
50
|
+
"TOOL_START": SpanKind.TOOL,
|
|
51
|
+
"TOOL_END": SpanKind.TOOL,
|
|
52
|
+
"WORKFLOW_START": SpanKind.WORKFLOW,
|
|
53
|
+
"WORKFLOW_END": SpanKind.WORKFLOW,
|
|
54
|
+
"TASK_START": SpanKind.TASK,
|
|
55
|
+
"TASK_END": SpanKind.TASK,
|
|
56
|
+
"FUNCTION_START": SpanKind.FUNCTION,
|
|
57
|
+
"FUNCTION_END": SpanKind.FUNCTION,
|
|
58
|
+
"CUSTOM_START": SpanKind.CUSTOM,
|
|
59
|
+
"CUSTOM_END": SpanKind.CUSTOM,
|
|
60
|
+
"SPAN_START": SpanKind.SPAN,
|
|
61
|
+
"SPAN_END": SpanKind.SPAN,
|
|
62
|
+
"EMBEDDER_START": SpanKind.EMBEDDER,
|
|
63
|
+
"EMBEDDER_END": SpanKind.EMBEDDER,
|
|
64
|
+
"RETRIEVER_START": SpanKind.RETRIEVER,
|
|
65
|
+
"RETRIEVER_END": SpanKind.RETRIEVER,
|
|
66
|
+
"AGENT_START": SpanKind.AGENT,
|
|
67
|
+
"AGENT_END": SpanKind.AGENT,
|
|
68
|
+
"RERANKER_START": SpanKind.RERANKER,
|
|
69
|
+
"RERANKER_END": SpanKind.RERANKER,
|
|
70
|
+
"GUARDRAIL_START": SpanKind.GUARDRAIL,
|
|
71
|
+
"GUARDRAIL_END": SpanKind.GUARDRAIL,
|
|
72
|
+
"EVALUATOR_START": SpanKind.EVALUATOR,
|
|
73
|
+
"EVALUATOR_END": SpanKind.EVALUATOR,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def event_type_to_span_kind(event_type: str) -> SpanKind:
|
|
78
|
+
"""Convert an event type to a span kind.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
event_type (str): The event type to convert.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
SpanKind: The span kind.
|
|
85
|
+
"""
|
|
86
|
+
return EVENT_TYPE_TO_SPAN_KIND_MAP.get(event_type, SpanKind.UNKNOWN)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class SpanAttributes(Enum):
|
|
90
|
+
AIQ_SPAN_KIND = "aiq.span.kind"
|
|
91
|
+
INPUT_VALUE = "input.value"
|
|
92
|
+
INPUT_MIME_TYPE = "input.mime_type"
|
|
93
|
+
LLM_TOKEN_COUNT_PROMPT = "llm.token_count.prompt"
|
|
94
|
+
LLM_TOKEN_COUNT_COMPLETION = "llm.token_count.completion"
|
|
95
|
+
LLM_TOKEN_COUNT_TOTAL = "llm.token_count.total"
|
|
96
|
+
OUTPUT_VALUE = "output.value"
|
|
97
|
+
OUTPUT_MIME_TYPE = "output.mime_type"
|
|
98
|
+
AIQ_USAGE_NUM_LLM_CALLS = "aiq.usage.num_llm_calls"
|
|
99
|
+
AIQ_USAGE_SECONDS_BETWEEN_CALLS = "aiq.usage.seconds_between_calls"
|
|
100
|
+
AIQ_USAGE_TOKEN_COUNT_PROMPT = "aiq.usage.token_count.prompt"
|
|
101
|
+
AIQ_USAGE_TOKEN_COUNT_COMPLETION = "aiq.usage.token_count.completion"
|
|
102
|
+
AIQ_USAGE_TOKEN_COUNT_TOTAL = "aiq.usage.token_count.total"
|
|
103
|
+
AIQ_EVENT_TYPE = "aiq.event_type"
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class MimeTypes(Enum):
|
|
107
|
+
TEXT = "text/plain"
|
|
108
|
+
JSON = "application/json"
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class SpanStatusCode(Enum):
|
|
112
|
+
OK = "OK"
|
|
113
|
+
ERROR = "ERROR"
|
|
114
|
+
UNSET = "UNSET"
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class SpanEvent(BaseModel):
|
|
118
|
+
timestamp: float = Field(default_factory=lambda: int(time.time() * 1e9), description="The timestamp of the event.")
|
|
119
|
+
name: str = Field(description="The name of the event.")
|
|
120
|
+
attributes: dict[str, Any] = Field(default_factory=dict, description="The attributes of the event.")
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class SpanStatus(BaseModel):
|
|
124
|
+
code: SpanStatusCode = Field(default=SpanStatusCode.OK, description="The status code of the span.")
|
|
125
|
+
message: str | None = Field(default=None, description="The status message of the span.")
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class SpanContext(BaseModel):
|
|
129
|
+
trace_id: int = Field(default_factory=lambda: uuid.uuid4().int, description="The 128-bit trace ID of the span.")
|
|
130
|
+
span_id: int = Field(default_factory=lambda: uuid.uuid4().int & ((1 << 64) - 1),
|
|
131
|
+
description="The 64-bit span ID of the span.")
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class Span(BaseModel):
|
|
135
|
+
name: str = Field(description="The name of the span.")
|
|
136
|
+
context: SpanContext | None = Field(default=None, description="The context of the span.")
|
|
137
|
+
parent: "Span | None" = Field(default=None, description="The parent span of the span.")
|
|
138
|
+
start_time: int = Field(default_factory=lambda: int(time.time() * 1e9), description="The start time of the span.")
|
|
139
|
+
end_time: int | None = Field(default=None, description="The end time of the span.")
|
|
140
|
+
attributes: dict[str, Any] = Field(default_factory=dict, description="The attributes of the span.")
|
|
141
|
+
events: list[SpanEvent] = Field(default_factory=list, description="The events of the span.")
|
|
142
|
+
status: SpanStatus = Field(default_factory=SpanStatus, description="The status of the span.")
|
|
143
|
+
|
|
144
|
+
@field_validator('context', mode='before')
|
|
145
|
+
@classmethod
|
|
146
|
+
def set_default_context(cls, v: SpanContext | None) -> SpanContext:
|
|
147
|
+
"""Set the default context if the context is not provided.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
v (SpanContext | None): The context to set.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
SpanContext: The context.
|
|
154
|
+
"""
|
|
155
|
+
if v is None:
|
|
156
|
+
return SpanContext()
|
|
157
|
+
return v
|
|
158
|
+
|
|
159
|
+
def set_attribute(self, key: str, value: Any) -> None:
|
|
160
|
+
"""Set the attribute of the span.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
key (str): The key of the attribute.
|
|
164
|
+
value (Any): The value of the attribute.
|
|
165
|
+
"""
|
|
166
|
+
self.attributes[key] = value
|
|
167
|
+
|
|
168
|
+
def add_event(self, name: str, attributes: dict[str, Any] | None = None) -> None:
|
|
169
|
+
"""Add an event to the span.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
name (str): The name of the event.
|
|
173
|
+
attributes (dict[str, Any] | None): The attributes of the event.
|
|
174
|
+
"""
|
|
175
|
+
if attributes is None:
|
|
176
|
+
attributes = {}
|
|
177
|
+
self.events = self.events + [SpanEvent(name=name, attributes=attributes)]
|
|
178
|
+
|
|
179
|
+
def end(self, end_time: int | None = None) -> None:
|
|
180
|
+
"""End the span.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
end_time (int | None): The end time of the span.
|
|
184
|
+
"""
|
|
185
|
+
if end_time is None:
|
|
186
|
+
end_time = int(time.time() * 1e9)
|
|
187
|
+
self.end_time = end_time
|
|
@@ -15,8 +15,8 @@
|
|
|
15
15
|
|
|
16
16
|
import typing
|
|
17
17
|
|
|
18
|
-
from .common import BaseModelRegistryTag
|
|
19
|
-
from .common import TypedBaseModel
|
|
18
|
+
from aiq.data_models.common import BaseModelRegistryTag
|
|
19
|
+
from aiq.data_models.common import TypedBaseModel
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class TelemetryExporterBaseConfig(TypedBaseModel, BaseModelRegistryTag):
|
aiq/embedder/nim_embedder.py
CHANGED
|
@@ -24,6 +24,7 @@ from aiq.builder.builder import Builder
|
|
|
24
24
|
from aiq.builder.embedder import EmbedderProviderInfo
|
|
25
25
|
from aiq.cli.register_workflow import register_embedder_provider
|
|
26
26
|
from aiq.data_models.embedder import EmbedderBaseConfig
|
|
27
|
+
from aiq.data_models.retry_mixin import RetryMixin
|
|
27
28
|
|
|
28
29
|
allowed_truncate_values = ["NONE", "START", "END"]
|
|
29
30
|
|
|
@@ -37,7 +38,7 @@ def option_in_allowed_values(v):
|
|
|
37
38
|
TruncationOption = typing.Annotated[str, AfterValidator(option_in_allowed_values)]
|
|
38
39
|
|
|
39
40
|
|
|
40
|
-
class NIMEmbedderModelConfig(EmbedderBaseConfig, name="nim"):
|
|
41
|
+
class NIMEmbedderModelConfig(EmbedderBaseConfig, RetryMixin, name="nim"):
|
|
41
42
|
"""A NVIDIA Inference Microservice (NIM) embedder provider to be used with an embedder client."""
|
|
42
43
|
|
|
43
44
|
api_key: str | None = Field(default=None, description="NVIDIA API key to interact with hosted NIM.")
|
aiq/embedder/openai_embedder.py
CHANGED
|
@@ -21,9 +21,10 @@ from aiq.builder.builder import Builder
|
|
|
21
21
|
from aiq.builder.embedder import EmbedderProviderInfo
|
|
22
22
|
from aiq.cli.register_workflow import register_embedder_provider
|
|
23
23
|
from aiq.data_models.embedder import EmbedderBaseConfig
|
|
24
|
+
from aiq.data_models.retry_mixin import RetryMixin
|
|
24
25
|
|
|
25
26
|
|
|
26
|
-
class OpenAIEmbedderModelConfig(EmbedderBaseConfig, name="openai"):
|
|
27
|
+
class OpenAIEmbedderModelConfig(EmbedderBaseConfig, RetryMixin, name="openai"):
|
|
27
28
|
"""An OpenAI LLM provider to be used with an LLM client."""
|
|
28
29
|
|
|
29
30
|
model_config = ConfigDict(protected_namespaces=())
|
aiq/eval/config.py
CHANGED
|
@@ -17,13 +17,18 @@ from pathlib import Path
|
|
|
17
17
|
|
|
18
18
|
from pydantic import BaseModel
|
|
19
19
|
|
|
20
|
+
from aiq.eval.evaluator.evaluator_model import EvalInput
|
|
21
|
+
from aiq.eval.evaluator.evaluator_model import EvalOutput
|
|
22
|
+
from aiq.eval.usage_stats import UsageStats
|
|
23
|
+
from aiq.profiler.data_models import ProfilerResults
|
|
24
|
+
|
|
20
25
|
|
|
21
26
|
class EvaluationRunConfig(BaseModel):
|
|
22
27
|
"""
|
|
23
28
|
Parameters used for a single evaluation run.
|
|
24
29
|
"""
|
|
25
30
|
config_file: Path
|
|
26
|
-
dataset: str | None # dataset file path can be specified in the config file
|
|
31
|
+
dataset: str | None = None # dataset file path can be specified in the config file
|
|
27
32
|
result_json_path: str = "$"
|
|
28
33
|
skip_workflow: bool = False
|
|
29
34
|
skip_completed_entries: bool = False
|
|
@@ -31,6 +36,14 @@ class EvaluationRunConfig(BaseModel):
|
|
|
31
36
|
endpoint_timeout: int = 300
|
|
32
37
|
reps: int = 1
|
|
33
38
|
override: tuple[tuple[str, str], ...] = ()
|
|
39
|
+
# If false, the output will not be written to the output directory. This is
|
|
40
|
+
# useful when running evaluation via another tool.
|
|
41
|
+
write_output: bool = True
|
|
42
|
+
# if true, the dataset is adjusted to a multiple of the concurrency
|
|
43
|
+
adjust_dataset_size: bool = False
|
|
44
|
+
# number of passes at each concurrency, if 0 the dataset is adjusted to a multiple of the
|
|
45
|
+
# concurrency. The is only used if adjust_dataset_size is true
|
|
46
|
+
num_passes: int = 0
|
|
34
47
|
|
|
35
48
|
|
|
36
49
|
class EvaluationRunOutput(BaseModel):
|
|
@@ -40,3 +53,8 @@ class EvaluationRunOutput(BaseModel):
|
|
|
40
53
|
workflow_output_file: Path | None
|
|
41
54
|
evaluator_output_files: list[Path]
|
|
42
55
|
workflow_interrupted: bool
|
|
56
|
+
|
|
57
|
+
eval_input: EvalInput
|
|
58
|
+
evaluation_results: list[tuple[str, EvalOutput]]
|
|
59
|
+
usage_stats: UsageStats | None = None
|
|
60
|
+
profiler_results: ProfilerResults
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
|
|
16
16
|
import json
|
|
17
|
+
import math
|
|
17
18
|
|
|
18
19
|
import pandas as pd
|
|
19
20
|
|
|
@@ -33,12 +34,23 @@ class DatasetHandler:
|
|
|
33
34
|
One DatasetHandler object is needed for each dataset to be evaluated.
|
|
34
35
|
"""
|
|
35
36
|
|
|
36
|
-
def __init__(self,
|
|
37
|
+
def __init__(self,
|
|
38
|
+
dataset_config: EvalDatasetConfig,
|
|
39
|
+
reps: int,
|
|
40
|
+
concurrency: int,
|
|
41
|
+
num_passes: int | None = None,
|
|
42
|
+
adjust_dataset_size: bool = False):
|
|
37
43
|
from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
|
|
38
44
|
|
|
39
45
|
self.dataset_config = dataset_config
|
|
40
46
|
self.dataset_filter = DatasetFilter(dataset_config.filter)
|
|
41
47
|
self.reps = reps
|
|
48
|
+
|
|
49
|
+
# number of passes at specific concurrency
|
|
50
|
+
self.concurrency = concurrency
|
|
51
|
+
self.num_passes = num_passes
|
|
52
|
+
self.adjust_dataset_size = adjust_dataset_size
|
|
53
|
+
|
|
42
54
|
# Helpers
|
|
43
55
|
self.intermediate_step_adapter = IntermediateStepAdapter()
|
|
44
56
|
|
|
@@ -109,6 +121,63 @@ class DatasetHandler:
|
|
|
109
121
|
|
|
110
122
|
return input_df
|
|
111
123
|
|
|
124
|
+
def adjust_dataset(self, input_df: pd.DataFrame) -> pd.DataFrame:
|
|
125
|
+
"""
|
|
126
|
+
Adjust the dataset so its length is a multiple of concurrency.
|
|
127
|
+
|
|
128
|
+
If num_passes > 0:
|
|
129
|
+
dataset size is adjusted to concurrency * num_passes
|
|
130
|
+
else:
|
|
131
|
+
dataset size is adjusted to the largest multiple of concurrency
|
|
132
|
+
that is less than or equal to the current dataset size
|
|
133
|
+
"""
|
|
134
|
+
if self.concurrency <= 0:
|
|
135
|
+
raise ValueError("Concurrency must be > 0")
|
|
136
|
+
|
|
137
|
+
if self.num_passes < 0:
|
|
138
|
+
raise ValueError("num_passes must be >= 0")
|
|
139
|
+
|
|
140
|
+
original_size = input_df.shape[0]
|
|
141
|
+
|
|
142
|
+
# Calculate target size
|
|
143
|
+
if self.num_passes > 0:
|
|
144
|
+
# When num_passes is specified, always use concurrency * num_passes
|
|
145
|
+
# This respects the user's intent for exact number of passes
|
|
146
|
+
target_size = self.concurrency * self.num_passes
|
|
147
|
+
else:
|
|
148
|
+
# When num_passes = 0, use the largest multiple of concurrency <= original_size
|
|
149
|
+
# If original_size < concurrency, we need at least concurrency rows
|
|
150
|
+
if original_size >= self.concurrency:
|
|
151
|
+
target_size = (original_size // self.concurrency) * self.concurrency
|
|
152
|
+
else:
|
|
153
|
+
target_size = self.concurrency
|
|
154
|
+
|
|
155
|
+
if target_size == 0:
|
|
156
|
+
raise ValueError("Input dataset too small for even one batch at given concurrency.")
|
|
157
|
+
|
|
158
|
+
id_col = self.dataset_config.id_key
|
|
159
|
+
|
|
160
|
+
# If we need more rows than we have, replicate the dataset
|
|
161
|
+
if original_size < target_size:
|
|
162
|
+
# Clean existing _rep suffix if present
|
|
163
|
+
input_df[id_col] = input_df[id_col].astype(str).str.replace(r"_rep\d+$", "", regex=True)
|
|
164
|
+
|
|
165
|
+
# Calculate how many complete copies we need
|
|
166
|
+
copies_needed = math.ceil(target_size / original_size)
|
|
167
|
+
|
|
168
|
+
# Create the replicated dataframe
|
|
169
|
+
replicated_dfs = []
|
|
170
|
+
for i in range(copies_needed):
|
|
171
|
+
df_copy = input_df.copy()
|
|
172
|
+
if i > 0: # Add suffix to all but the first copy
|
|
173
|
+
df_copy[id_col] = df_copy[id_col].astype(str) + f"_rep{i}"
|
|
174
|
+
replicated_dfs.append(df_copy)
|
|
175
|
+
|
|
176
|
+
input_df = pd.concat(replicated_dfs, ignore_index=True)
|
|
177
|
+
|
|
178
|
+
# Return exactly the target size
|
|
179
|
+
return input_df.head(target_size)
|
|
180
|
+
|
|
112
181
|
def get_eval_input_from_dataset(self, dataset: str) -> EvalInput:
|
|
113
182
|
# read the dataset and convert it to EvalInput
|
|
114
183
|
|
|
@@ -127,9 +196,14 @@ class DatasetHandler:
|
|
|
127
196
|
input_df = self.dataset_filter.apply_filters(input_df)
|
|
128
197
|
input_df.drop_duplicates(subset=[self.dataset_config.id_key], inplace=True)
|
|
129
198
|
|
|
199
|
+
if self.reps > 1 and self.adjust_dataset_size:
|
|
200
|
+
raise ValueError("reps and adjust_dataset_size are mutually exclusive")
|
|
201
|
+
|
|
130
202
|
# If more than one repetition is needed, replicate the rows
|
|
131
203
|
if self.reps > 1:
|
|
132
204
|
input_df = self.setup_reps(input_df)
|
|
205
|
+
elif self.adjust_dataset_size:
|
|
206
|
+
input_df = self.adjust_dataset(input_df)
|
|
133
207
|
|
|
134
208
|
# Convert the DataFrame to a list of EvalInput objects
|
|
135
209
|
return self.get_eval_input_from_df(input_df)
|
aiq/eval/evaluate.py
CHANGED
|
@@ -99,12 +99,34 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
|
|
|
99
99
|
max_timestamp = max(step.event_timestamp for step in item.trajectory)
|
|
100
100
|
runtime = max_timestamp - min_timestamp
|
|
101
101
|
else:
|
|
102
|
+
min_timestamp = 0.0
|
|
103
|
+
max_timestamp = 0.0
|
|
102
104
|
runtime = 0.0
|
|
103
105
|
|
|
106
|
+
# find llm latency by calculating p95 of all llm calls
|
|
107
|
+
llm_latencies = []
|
|
108
|
+
previous_llm_start_time = None
|
|
109
|
+
for step in steps:
|
|
110
|
+
if step.event_type == "LLM_START":
|
|
111
|
+
previous_llm_start_time = step.event_timestamp
|
|
112
|
+
elif step.event_type == "LLM_END" and previous_llm_start_time is not None:
|
|
113
|
+
llm_latencies.append(step.event_timestamp - previous_llm_start_time)
|
|
114
|
+
previous_llm_start_time = None
|
|
115
|
+
|
|
116
|
+
# Calculate p95 LLM latency (or 0 if no LLM calls)
|
|
117
|
+
if llm_latencies:
|
|
118
|
+
import numpy as np
|
|
119
|
+
llm_latency = float(np.percentile(llm_latencies, 95))
|
|
120
|
+
else:
|
|
121
|
+
llm_latency = 0.0
|
|
122
|
+
|
|
104
123
|
# add the usage stats to the usage stats dict
|
|
105
124
|
self.usage_stats.usage_stats_items[item.id] = UsageStatsItem(usage_stats_per_llm=usage_stats_per_llm,
|
|
106
125
|
runtime=runtime,
|
|
107
|
-
total_tokens=total_tokens
|
|
126
|
+
total_tokens=total_tokens,
|
|
127
|
+
min_timestamp=min_timestamp,
|
|
128
|
+
max_timestamp=max_timestamp,
|
|
129
|
+
llm_latency=llm_latency)
|
|
108
130
|
return self.usage_stats.usage_stats_items[item.id]
|
|
109
131
|
|
|
110
132
|
async def run_workflow_local(self, session_manager: AIQSessionManager):
|
|
@@ -221,7 +243,9 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
|
|
|
221
243
|
for input_item in self.eval_input.eval_input_items:
|
|
222
244
|
all_stats.append(input_item.trajectory)
|
|
223
245
|
|
|
224
|
-
profiler_runner = ProfilerRunner(self.eval_config.general.profiler,
|
|
246
|
+
profiler_runner = ProfilerRunner(self.eval_config.general.profiler,
|
|
247
|
+
self.eval_config.general.output_dir,
|
|
248
|
+
write_output=self.config.write_output)
|
|
225
249
|
|
|
226
250
|
return await profiler_runner.run(all_stats)
|
|
227
251
|
|
|
@@ -308,6 +332,11 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
|
|
|
308
332
|
self.evaluator_output_files.append(output_file)
|
|
309
333
|
logger.info("Evaluation results written to %s", output_file)
|
|
310
334
|
|
|
335
|
+
def publish_output(self, dataset_handler: DatasetHandler, profiler_results: ProfilerResults):
|
|
336
|
+
"""Publish the output"""
|
|
337
|
+
if self.config.write_output:
|
|
338
|
+
self.write_output(dataset_handler, profiler_results)
|
|
339
|
+
|
|
311
340
|
if self.workflow_interrupted:
|
|
312
341
|
# Issue a warning if the workflow was not completed on all datasets
|
|
313
342
|
msg = ("Workflow execution was interrupted due to an error. The results may be incomplete. "
|
|
@@ -415,7 +444,11 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
|
|
|
415
444
|
workflow_interrupted=self.workflow_interrupted,
|
|
416
445
|
)
|
|
417
446
|
|
|
418
|
-
dataset_handler = DatasetHandler(dataset_config=dataset_config,
|
|
447
|
+
dataset_handler = DatasetHandler(dataset_config=dataset_config,
|
|
448
|
+
reps=self.config.reps,
|
|
449
|
+
concurrency=self.eval_config.general.max_concurrency,
|
|
450
|
+
num_passes=self.config.num_passes,
|
|
451
|
+
adjust_dataset_size=self.config.adjust_dataset_size)
|
|
419
452
|
self.eval_input = dataset_handler.get_eval_input_from_dataset(self.config.dataset)
|
|
420
453
|
if not self.eval_input.eval_input_items:
|
|
421
454
|
logger.info("Dataset is empty. Nothing to evaluate.")
|
|
@@ -447,8 +480,16 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
|
|
|
447
480
|
# Profile the workflow
|
|
448
481
|
profiler_results = await self.profile_workflow()
|
|
449
482
|
|
|
450
|
-
#
|
|
451
|
-
self.
|
|
483
|
+
# compute total runtime
|
|
484
|
+
if self.usage_stats.usage_stats_items:
|
|
485
|
+
self.usage_stats.total_runtime = max(self.usage_stats.usage_stats_items.values(),
|
|
486
|
+
key=lambda x: x.max_timestamp).max_timestamp - \
|
|
487
|
+
min(self.usage_stats.usage_stats_items.values(), key=lambda x: x.min_timestamp).min_timestamp
|
|
488
|
+
else:
|
|
489
|
+
self.usage_stats.total_runtime = 0.0
|
|
490
|
+
|
|
491
|
+
# Publish the results
|
|
492
|
+
self.publish_output(dataset_handler, profiler_results)
|
|
452
493
|
|
|
453
494
|
# Run custom scripts and upload evaluation outputs to S3
|
|
454
495
|
if self.eval_config.general.output:
|
|
@@ -456,8 +497,10 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
|
|
|
456
497
|
output_uploader.run_custom_scripts()
|
|
457
498
|
await output_uploader.upload_directory()
|
|
458
499
|
|
|
459
|
-
return EvaluationRunOutput(
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
500
|
+
return EvaluationRunOutput(workflow_output_file=self.workflow_output_file,
|
|
501
|
+
evaluator_output_files=self.evaluator_output_files,
|
|
502
|
+
workflow_interrupted=self.workflow_interrupted,
|
|
503
|
+
eval_input=self.eval_input,
|
|
504
|
+
evaluation_results=self.evaluation_results,
|
|
505
|
+
usage_stats=self.usage_stats,
|
|
506
|
+
profiler_results=profiler_results)
|