PyPI - aiqtoolkit - Versions diffs - 1.1.0rc5__py3-none-any.whl → 1.2.0__py3-none-any.whl - Mend

aiqtoolkit 1.1.0rc5py3-none-any.whl → 1.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiqtoolkit might be problematic. Click here for more details.

Files changed (319) hide show

aiqtoolkit-1.2.0.dist-info/METADATA +29 -0
aiqtoolkit-1.2.0.dist-info/RECORD +4 -0
{aiqtoolkit-1.1.0rc5.dist-info → aiqtoolkit-1.2.0.dist-info}/WHEEL +1 -1
aiqtoolkit-1.2.0.dist-info/top_level.txt +1 -0
aiq/agent/__init__.py +0 -0
aiq/agent/base.py +0 -76
aiq/agent/dual_node.py +0 -67
aiq/agent/react_agent/__init__.py +0 -0
aiq/agent/react_agent/agent.py +0 -322
aiq/agent/react_agent/output_parser.py +0 -104
aiq/agent/react_agent/prompt.py +0 -46
aiq/agent/react_agent/register.py +0 -148
aiq/agent/reasoning_agent/__init__.py +0 -0
aiq/agent/reasoning_agent/reasoning_agent.py +0 -224
aiq/agent/register.py +0 -23
aiq/agent/rewoo_agent/__init__.py +0 -0
aiq/agent/rewoo_agent/agent.py +0 -410
aiq/agent/rewoo_agent/prompt.py +0 -108
aiq/agent/rewoo_agent/register.py +0 -158
aiq/agent/tool_calling_agent/__init__.py +0 -0
aiq/agent/tool_calling_agent/agent.py +0 -123
aiq/agent/tool_calling_agent/register.py +0 -105
aiq/builder/__init__.py +0 -0
aiq/builder/builder.py +0 -223
aiq/builder/component_utils.py +0 -303
aiq/builder/context.py +0 -227
aiq/builder/embedder.py +0 -24
aiq/builder/eval_builder.py +0 -120
aiq/builder/evaluator.py +0 -29
aiq/builder/framework_enum.py +0 -24
aiq/builder/front_end.py +0 -73
aiq/builder/function.py +0 -297
aiq/builder/function_base.py +0 -376
aiq/builder/function_info.py +0 -627
aiq/builder/intermediate_step_manager.py +0 -135
aiq/builder/llm.py +0 -25
aiq/builder/retriever.py +0 -25
aiq/builder/user_interaction_manager.py +0 -71
aiq/builder/workflow.py +0 -143
aiq/builder/workflow_builder.py +0 -757
aiq/cli/__init__.py +0 -14
aiq/cli/cli_utils/__init__.py +0 -0
aiq/cli/cli_utils/config_override.py +0 -231
aiq/cli/cli_utils/validation.py +0 -37
aiq/cli/commands/__init__.py +0 -0
aiq/cli/commands/configure/__init__.py +0 -0
aiq/cli/commands/configure/channel/__init__.py +0 -0
aiq/cli/commands/configure/channel/add.py +0 -28
aiq/cli/commands/configure/channel/channel.py +0 -36
aiq/cli/commands/configure/channel/remove.py +0 -30
aiq/cli/commands/configure/channel/update.py +0 -30
aiq/cli/commands/configure/configure.py +0 -33
aiq/cli/commands/evaluate.py +0 -139
aiq/cli/commands/info/__init__.py +0 -14
aiq/cli/commands/info/info.py +0 -39
aiq/cli/commands/info/list_channels.py +0 -32
aiq/cli/commands/info/list_components.py +0 -129
aiq/cli/commands/info/list_mcp.py +0 -126
aiq/cli/commands/registry/__init__.py +0 -14
aiq/cli/commands/registry/publish.py +0 -88
aiq/cli/commands/registry/pull.py +0 -118
aiq/cli/commands/registry/registry.py +0 -38
aiq/cli/commands/registry/remove.py +0 -108
aiq/cli/commands/registry/search.py +0 -155
aiq/cli/commands/start.py +0 -250
aiq/cli/commands/uninstall.py +0 -83
aiq/cli/commands/validate.py +0 -47
aiq/cli/commands/workflow/__init__.py +0 -14
aiq/cli/commands/workflow/templates/__init__.py.j2 +0 -0
aiq/cli/commands/workflow/templates/config.yml.j2 +0 -16
aiq/cli/commands/workflow/templates/pyproject.toml.j2 +0 -22
aiq/cli/commands/workflow/templates/register.py.j2 +0 -5
aiq/cli/commands/workflow/templates/workflow.py.j2 +0 -36
aiq/cli/commands/workflow/workflow.py +0 -37
aiq/cli/commands/workflow/workflow_commands.py +0 -313
aiq/cli/entrypoint.py +0 -133
aiq/cli/main.py +0 -44
aiq/cli/register_workflow.py +0 -408
aiq/cli/type_registry.py +0 -879
aiq/data_models/__init__.py +0 -14
aiq/data_models/api_server.py +0 -588
aiq/data_models/common.py +0 -143
aiq/data_models/component.py +0 -46
aiq/data_models/component_ref.py +0 -135
aiq/data_models/config.py +0 -349
aiq/data_models/dataset_handler.py +0 -122
aiq/data_models/discovery_metadata.py +0 -286
aiq/data_models/embedder.py +0 -26
aiq/data_models/evaluate.py +0 -104
aiq/data_models/evaluator.py +0 -26
aiq/data_models/front_end.py +0 -26
aiq/data_models/function.py +0 -30
aiq/data_models/function_dependencies.py +0 -64
aiq/data_models/interactive.py +0 -237
aiq/data_models/intermediate_step.py +0 -269
aiq/data_models/invocation_node.py +0 -38
aiq/data_models/llm.py +0 -26
aiq/data_models/logging.py +0 -26
aiq/data_models/memory.py +0 -26
aiq/data_models/profiler.py +0 -53
aiq/data_models/registry_handler.py +0 -26
aiq/data_models/retriever.py +0 -30
aiq/data_models/step_adaptor.py +0 -64
aiq/data_models/streaming.py +0 -33
aiq/data_models/swe_bench_model.py +0 -54
aiq/data_models/telemetry_exporter.py +0 -26
aiq/embedder/__init__.py +0 -0
aiq/embedder/langchain_client.py +0 -41
aiq/embedder/nim_embedder.py +0 -58
aiq/embedder/openai_embedder.py +0 -42
aiq/embedder/register.py +0 -24
aiq/eval/__init__.py +0 -14
aiq/eval/config.py +0 -42
aiq/eval/dataset_handler/__init__.py +0 -0
aiq/eval/dataset_handler/dataset_downloader.py +0 -106
aiq/eval/dataset_handler/dataset_filter.py +0 -52
aiq/eval/dataset_handler/dataset_handler.py +0 -169
aiq/eval/evaluate.py +0 -325
aiq/eval/evaluator/__init__.py +0 -14
aiq/eval/evaluator/evaluator_model.py +0 -44
aiq/eval/intermediate_step_adapter.py +0 -93
aiq/eval/rag_evaluator/__init__.py +0 -0
aiq/eval/rag_evaluator/evaluate.py +0 -138
aiq/eval/rag_evaluator/register.py +0 -138
aiq/eval/register.py +0 -23
aiq/eval/remote_workflow.py +0 -128
aiq/eval/runtime_event_subscriber.py +0 -52
aiq/eval/swe_bench_evaluator/__init__.py +0 -0
aiq/eval/swe_bench_evaluator/evaluate.py +0 -215
aiq/eval/swe_bench_evaluator/register.py +0 -36
aiq/eval/trajectory_evaluator/__init__.py +0 -0
aiq/eval/trajectory_evaluator/evaluate.py +0 -118
aiq/eval/trajectory_evaluator/register.py +0 -40
aiq/eval/tunable_rag_evaluator/__init__.py +0 -0
aiq/eval/tunable_rag_evaluator/evaluate.py +0 -263
aiq/eval/tunable_rag_evaluator/register.py +0 -50
aiq/eval/utils/__init__.py +0 -0
aiq/eval/utils/output_uploader.py +0 -131
aiq/eval/utils/tqdm_position_registry.py +0 -40
aiq/front_ends/__init__.py +0 -14
aiq/front_ends/console/__init__.py +0 -14
aiq/front_ends/console/console_front_end_config.py +0 -32
aiq/front_ends/console/console_front_end_plugin.py +0 -107
aiq/front_ends/console/register.py +0 -25
aiq/front_ends/cron/__init__.py +0 -14
aiq/front_ends/fastapi/__init__.py +0 -14
aiq/front_ends/fastapi/fastapi_front_end_config.py +0 -150
aiq/front_ends/fastapi/fastapi_front_end_plugin.py +0 -103
aiq/front_ends/fastapi/fastapi_front_end_plugin_worker.py +0 -607
aiq/front_ends/fastapi/intermediate_steps_subscriber.py +0 -80
aiq/front_ends/fastapi/job_store.py +0 -161
aiq/front_ends/fastapi/main.py +0 -70
aiq/front_ends/fastapi/message_handler.py +0 -279
aiq/front_ends/fastapi/message_validator.py +0 -345
aiq/front_ends/fastapi/register.py +0 -25
aiq/front_ends/fastapi/response_helpers.py +0 -195
aiq/front_ends/fastapi/step_adaptor.py +0 -320
aiq/front_ends/fastapi/websocket.py +0 -148
aiq/front_ends/mcp/__init__.py +0 -14
aiq/front_ends/mcp/mcp_front_end_config.py +0 -32
aiq/front_ends/mcp/mcp_front_end_plugin.py +0 -93
aiq/front_ends/mcp/register.py +0 -27
aiq/front_ends/mcp/tool_converter.py +0 -242
aiq/front_ends/register.py +0 -22
aiq/front_ends/simple_base/__init__.py +0 -14
aiq/front_ends/simple_base/simple_front_end_plugin_base.py +0 -52
aiq/llm/__init__.py +0 -0
aiq/llm/nim_llm.py +0 -45
aiq/llm/openai_llm.py +0 -45
aiq/llm/register.py +0 -22
aiq/llm/utils/__init__.py +0 -14
aiq/llm/utils/env_config_value.py +0 -94
aiq/llm/utils/error.py +0 -17
aiq/memory/__init__.py +0 -20
aiq/memory/interfaces.py +0 -183
aiq/memory/models.py +0 -112
aiq/meta/module_to_distro.json +0 -3
aiq/meta/pypi.md +0 -58
aiq/observability/__init__.py +0 -0
aiq/observability/async_otel_listener.py +0 -429
aiq/observability/register.py +0 -99
aiq/plugins/.namespace +0 -1
aiq/profiler/__init__.py +0 -0
aiq/profiler/callbacks/__init__.py +0 -0
aiq/profiler/callbacks/agno_callback_handler.py +0 -295
aiq/profiler/callbacks/base_callback_class.py +0 -20
aiq/profiler/callbacks/langchain_callback_handler.py +0 -278
aiq/profiler/callbacks/llama_index_callback_handler.py +0 -205
aiq/profiler/callbacks/semantic_kernel_callback_handler.py +0 -238
aiq/profiler/callbacks/token_usage_base_model.py +0 -27
aiq/profiler/data_frame_row.py +0 -51
aiq/profiler/decorators/__init__.py +0 -0
aiq/profiler/decorators/framework_wrapper.py +0 -131
aiq/profiler/decorators/function_tracking.py +0 -254
aiq/profiler/forecasting/__init__.py +0 -0
aiq/profiler/forecasting/config.py +0 -18
aiq/profiler/forecasting/model_trainer.py +0 -75
aiq/profiler/forecasting/models/__init__.py +0 -22
aiq/profiler/forecasting/models/forecasting_base_model.py +0 -40
aiq/profiler/forecasting/models/linear_model.py +0 -196
aiq/profiler/forecasting/models/random_forest_regressor.py +0 -268
aiq/profiler/inference_metrics_model.py +0 -25
aiq/profiler/inference_optimization/__init__.py +0 -0
aiq/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
aiq/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +0 -452
aiq/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +0 -258
aiq/profiler/inference_optimization/data_models.py +0 -386
aiq/profiler/inference_optimization/experimental/__init__.py +0 -0
aiq/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +0 -468
aiq/profiler/inference_optimization/experimental/prefix_span_analysis.py +0 -405
aiq/profiler/inference_optimization/llm_metrics.py +0 -212
aiq/profiler/inference_optimization/prompt_caching.py +0 -163
aiq/profiler/inference_optimization/token_uniqueness.py +0 -107
aiq/profiler/inference_optimization/workflow_runtimes.py +0 -72
aiq/profiler/intermediate_property_adapter.py +0 -102
aiq/profiler/profile_runner.py +0 -433
aiq/profiler/utils.py +0 -184
aiq/registry_handlers/__init__.py +0 -0
aiq/registry_handlers/local/__init__.py +0 -0
aiq/registry_handlers/local/local_handler.py +0 -176
aiq/registry_handlers/local/register_local.py +0 -37
aiq/registry_handlers/metadata_factory.py +0 -60
aiq/registry_handlers/package_utils.py +0 -198
aiq/registry_handlers/pypi/__init__.py +0 -0
aiq/registry_handlers/pypi/pypi_handler.py +0 -251
aiq/registry_handlers/pypi/register_pypi.py +0 -40
aiq/registry_handlers/register.py +0 -21
aiq/registry_handlers/registry_handler_base.py +0 -157
aiq/registry_handlers/rest/__init__.py +0 -0
aiq/registry_handlers/rest/register_rest.py +0 -56
aiq/registry_handlers/rest/rest_handler.py +0 -237
aiq/registry_handlers/schemas/__init__.py +0 -0
aiq/registry_handlers/schemas/headers.py +0 -42
aiq/registry_handlers/schemas/package.py +0 -68
aiq/registry_handlers/schemas/publish.py +0 -63
aiq/registry_handlers/schemas/pull.py +0 -82
aiq/registry_handlers/schemas/remove.py +0 -36
aiq/registry_handlers/schemas/search.py +0 -91
aiq/registry_handlers/schemas/status.py +0 -47
aiq/retriever/__init__.py +0 -0
aiq/retriever/interface.py +0 -37
aiq/retriever/milvus/__init__.py +0 -14
aiq/retriever/milvus/register.py +0 -81
aiq/retriever/milvus/retriever.py +0 -228
aiq/retriever/models.py +0 -74
aiq/retriever/nemo_retriever/__init__.py +0 -14
aiq/retriever/nemo_retriever/register.py +0 -60
aiq/retriever/nemo_retriever/retriever.py +0 -190
aiq/retriever/register.py +0 -22
aiq/runtime/__init__.py +0 -14
aiq/runtime/loader.py +0 -188
aiq/runtime/runner.py +0 -176
aiq/runtime/session.py +0 -140
aiq/runtime/user_metadata.py +0 -131
aiq/settings/__init__.py +0 -0
aiq/settings/global_settings.py +0 -318
aiq/test/.namespace +0 -1
aiq/tool/__init__.py +0 -0
aiq/tool/code_execution/__init__.py +0 -0
aiq/tool/code_execution/code_sandbox.py +0 -188
aiq/tool/code_execution/local_sandbox/Dockerfile.sandbox +0 -60
aiq/tool/code_execution/local_sandbox/__init__.py +0 -13
aiq/tool/code_execution/local_sandbox/local_sandbox_server.py +0 -83
aiq/tool/code_execution/local_sandbox/sandbox.requirements.txt +0 -4
aiq/tool/code_execution/local_sandbox/start_local_sandbox.sh +0 -25
aiq/tool/code_execution/register.py +0 -70
aiq/tool/code_execution/utils.py +0 -100
aiq/tool/datetime_tools.py +0 -42
aiq/tool/document_search.py +0 -141
aiq/tool/github_tools/__init__.py +0 -0
aiq/tool/github_tools/create_github_commit.py +0 -133
aiq/tool/github_tools/create_github_issue.py +0 -87
aiq/tool/github_tools/create_github_pr.py +0 -106
aiq/tool/github_tools/get_github_file.py +0 -106
aiq/tool/github_tools/get_github_issue.py +0 -166
aiq/tool/github_tools/get_github_pr.py +0 -256
aiq/tool/github_tools/update_github_issue.py +0 -100
aiq/tool/mcp/__init__.py +0 -14
aiq/tool/mcp/mcp_client.py +0 -220
aiq/tool/mcp/mcp_tool.py +0 -95
aiq/tool/memory_tools/__init__.py +0 -0
aiq/tool/memory_tools/add_memory_tool.py +0 -79
aiq/tool/memory_tools/delete_memory_tool.py +0 -67
aiq/tool/memory_tools/get_memory_tool.py +0 -72
aiq/tool/nvidia_rag.py +0 -95
aiq/tool/register.py +0 -37
aiq/tool/retriever.py +0 -89
aiq/tool/server_tools.py +0 -63
aiq/utils/__init__.py +0 -0
aiq/utils/data_models/__init__.py +0 -0
aiq/utils/data_models/schema_validator.py +0 -58
aiq/utils/debugging_utils.py +0 -43
aiq/utils/exception_handlers/__init__.py +0 -0
aiq/utils/exception_handlers/schemas.py +0 -114
aiq/utils/io/__init__.py +0 -0
aiq/utils/io/yaml_tools.py +0 -119
aiq/utils/metadata_utils.py +0 -74
aiq/utils/optional_imports.py +0 -142
aiq/utils/producer_consumer_queue.py +0 -178
aiq/utils/reactive/__init__.py +0 -0
aiq/utils/reactive/base/__init__.py +0 -0
aiq/utils/reactive/base/observable_base.py +0 -65
aiq/utils/reactive/base/observer_base.py +0 -55
aiq/utils/reactive/base/subject_base.py +0 -79
aiq/utils/reactive/observable.py +0 -59
aiq/utils/reactive/observer.py +0 -76
aiq/utils/reactive/subject.py +0 -131
aiq/utils/reactive/subscription.py +0 -49
aiq/utils/settings/__init__.py +0 -0
aiq/utils/settings/global_settings.py +0 -197
aiq/utils/type_converter.py +0 -232
aiq/utils/type_utils.py +0 -397
aiq/utils/url_utils.py +0 -27
aiqtoolkit-1.1.0rc5.dist-info/METADATA +0 -331
aiqtoolkit-1.1.0rc5.dist-info/RECORD +0 -316
aiqtoolkit-1.1.0rc5.dist-info/entry_points.txt +0 -17
aiqtoolkit-1.1.0rc5.dist-info/licenses/LICENSE-3rd-party.txt +0 -3686
aiqtoolkit-1.1.0rc5.dist-info/licenses/LICENSE.md +0 -201
aiqtoolkit-1.1.0rc5.dist-info/top_level.txt +0 -1

aiq/eval/rag_evaluator/evaluate.py DELETED Viewed

@@ -1,138 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import logging
-from collections.abc import Sequence
-from ragas import EvaluationDataset
-from ragas import SingleTurnSample
-from ragas.dataset_schema import EvaluationResult
-from ragas.llms import LangchainLLMWrapper
-from ragas.metrics import Metric
-from tqdm import tqdm
-from aiq.eval.evaluator.evaluator_model import EvalInput
-from aiq.eval.evaluator.evaluator_model import EvalOutput
-from aiq.eval.evaluator.evaluator_model import EvalOutputItem
-from aiq.eval.utils.tqdm_position_registry import TqdmPositionRegistry
-logger = logging.getLogger(__name__)
-class RAGEvaluator:
-    def __init__(self, evaluator_llm: LangchainLLMWrapper, metrics: Sequence[Metric]):
-        self.evaluator_llm = evaluator_llm
-        self.metrics = metrics
-    @staticmethod
-    def eval_input_to_ragas(eval_input: EvalInput) -> EvaluationDataset:
-        """Converts EvalInput into a Ragas-compatible EvaluationDataset."""
-        from aiq.eval.intermediate_step_adapter import IntermediateStepAdapter
-        samples = []
-        intermediate_step_adapter = IntermediateStepAdapter()
-        for item in eval_input.eval_input_items:
-            # Extract required fields from EvalInputItem
-            user_input = item.input_obj  # Assumes input_obj is a string (modify if needed)
-            reference = item.expected_output_obj  # Reference correct answer
-            response = item.output_obj  # Model's generated response
-            # Handle context extraction from trajectory if available
-            reference_contexts = [""]  # Default to empty context
-            # implement context extraction from expected_trajectory
-            retrieved_contexts = intermediate_step_adapter.get_context(item.trajectory)
-            # implement context extraction from expected_trajectory
-            # Create a SingleTurnSample
-            sample = SingleTurnSample(
-                user_input=user_input,
-                reference=reference,
-                response=response,
-                reference_contexts=reference_contexts,
-                retrieved_contexts=retrieved_contexts,
-            )
-            samples.append(sample)
-        return EvaluationDataset(samples=samples)
-    def ragas_to_eval_output(self, eval_input: EvalInput, results_dataset: EvaluationResult | None) -> EvalOutput:
-        """Converts the ragas EvaluationResult to aiq EvalOutput"""
-        if not results_dataset:
-            logger.error("Ragas evaluation failed with no results")
-            return EvalOutput(average_score=0.0, eval_output_items=[])
-        scores: list[dict[str, float]] = results_dataset.scores
-        if not scores:
-            logger.error("Ragas returned empty score list")
-            return EvalOutput(average_score=0.0, eval_output_items=[])
-        # Convert from list of dicts to dict of lists
-        scores_dict = {metric: [score[metric] for score in scores] for metric in scores[0]}
-        # Compute the average of each metric
-        average_scores = {metric: sum(values) / len(values) for metric, values in scores_dict.items()}
-        # Extract the first (and only) metric's average score
-        first_avg_score = next(iter(average_scores.values()))
-        first_metric_name = list(scores_dict.keys())[0]
-        df = results_dataset.to_pandas()
-        # Get id from eval_input if df size matches number of eval_input_items
-        if len(eval_input.eval_input_items) >= len(df):
-            ids = [item.id for item in eval_input.eval_input_items]  # Extract IDs
-        else:
-            ids = df["user_input"].tolist()  # Use "user_input" as ID fallback
-        # Construct EvalOutputItem list
-        eval_output_items = [
-            EvalOutputItem(
-                id=ids[i],
-                score=getattr(row, first_metric_name, 0.0),
-                reasoning={
-                    key:
-                        getattr(row, key, None)  # Use getattr to safely access attributes
-                    for key in ["user_input", "reference", "response", "retrieved_contexts"]
-                }) for i, row in enumerate(df.itertuples(index=False))
-        ]
-        # Return EvalOutput
-        return EvalOutput(average_score=first_avg_score, eval_output_items=eval_output_items)
-    async def evaluate(self, eval_input: EvalInput) -> EvalOutput:
-        """Run Ragas metrics evaluation on the provided EvalInput"""
-        from ragas import evaluate as ragas_evaluate
-        ragas_dataset = self.eval_input_to_ragas(eval_input)
-        tqdm_position = TqdmPositionRegistry.claim()
-        first_metric_name = self.metrics[0].name
-        pbar = tqdm(total=len(ragas_dataset), desc=f"Evaluating Ragas {first_metric_name}", position=tqdm_position)
-        try:
-            results_dataset = ragas_evaluate(dataset=ragas_dataset,
-                                             metrics=self.metrics,
-                                             show_progress=True,
-                                             llm=self.evaluator_llm,
-                                             _pbar=pbar)
-        except Exception as e:
-            # On exception we still continue with other evaluators. Log and return an avg_score of 0.0
-            logger.exception("Error evaluating ragas metric, Error: %s", e, exc_info=True)
-            results_dataset = None
-        finally:
-            pbar.close()
-            TqdmPositionRegistry.release(tqdm_position)
-        return self.ragas_to_eval_output(eval_input, results_dataset)

aiq/eval/rag_evaluator/register.py DELETED Viewed

@@ -1,138 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import logging
-from pydantic import BaseModel
-from pydantic import Field
-from pydantic import model_validator
-from aiq.builder.builder import EvalBuilder
-from aiq.builder.evaluator import EvaluatorInfo
-from aiq.builder.framework_enum import LLMFrameworkEnum
-from aiq.cli.register_workflow import register_evaluator
-from aiq.data_models.evaluator import EvaluatorBaseConfig
-from aiq.eval.evaluator.evaluator_model import EvalInput
-from aiq.eval.evaluator.evaluator_model import EvalOutput
-logger = logging.getLogger(__name__)
-class RagasMetricConfig(BaseModel):
-    ''' RAGAS metrics configuration
-    skip: Allows the metric config to be present but not used
-    kwargs: Additional arguments to pass to the metric's callable
-    '''
-    skip: bool = False
-    # kwargs specific to the metric's callable
-    kwargs: dict | None = None
-class RagasEvaluatorConfig(EvaluatorBaseConfig, name="ragas"):
-    """Evaluation using RAGAS metrics."""
-    llm_name: str = Field(description="LLM as a judge.")
-    # Ragas metric
-    metric: str | dict[str, RagasMetricConfig] = Field(default="AnswerAccuracy",
-                                                       description="RAGAS metric callable with optional 'kwargs:'")
-    @model_validator(mode="before")
-    @classmethod
-    def validate_metric(cls, values):
-        """Ensures metric is either a string or a single-item dictionary."""
-        metric = values.get("metric")
-        if isinstance(metric, dict):
-            if len(metric) != 1:
-                raise ValueError("Only one metric is allowed in the configuration.")
-            _, value = next(iter(metric.items()))
-            if not isinstance(value, dict):
-                raise ValueError("Metric value must be a RagasMetricConfig object.")
-        elif not isinstance(metric, str):
-            raise ValueError("Metric must be either a string or a single-item dictionary.")
-        return values
-    @property
-    def metric_name(self) -> str:
-        """Returns the single metric name."""
-        if isinstance(self.metric, str):
-            return self.metric
-        if isinstance(self.metric, dict) and self.metric:
-            return next(iter(self.metric.keys()))  # pylint: disable=no-member
-        return ""
-    @property
-    def metric_config(self) -> RagasMetricConfig:
-        """Returns the metric configuration (or a default if only a string is provided)."""
-        if isinstance(self.metric, str):
-            return RagasMetricConfig()  # Default config when only a metric name is given
-        if isinstance(self.metric, dict) and self.metric:
-            return next(iter(self.metric.values()))  # pylint: disable=no-member
-        return RagasMetricConfig()  # Default config when an invalid type is provided
-@register_evaluator(config_type=RagasEvaluatorConfig)
-async def register_ragas_evaluator(config: RagasEvaluatorConfig, builder: EvalBuilder):
-    from ragas.metrics import Metric
-    def get_ragas_metric(metric_name: str) -> Metric | None:
-        """
-        Fetch callable for RAGAS metrics
-        """
-        try:
-            import ragas.metrics as ragas_metrics
-            return getattr(ragas_metrics, metric_name)
-        except ImportError as e:
-            message = f"Ragas metrics not found {e}."
-            logger.error(message)
-            raise ValueError(message) from e
-        except AttributeError as e:
-            message = f"Ragas metric {metric_name} not found {e}."
-            logger.error(message)
-            return None
-    async def evaluate_fn(eval_input: EvalInput) -> EvalOutput:
-        '''Run the RAGAS evaluation and return the average scores and evaluation results dataframe'''
-        if not _evaluator:
-            logger.warning("No evaluator found for RAGAS metrics.")
-            # return empty results if no evaluator is found
-            return EvalOutput(average_score=0.0, eval_output_items=[])
-        return await _evaluator.evaluate(eval_input)
-    from .evaluate import RAGEvaluator
-    # Get LLM
-    llm = await builder.get_llm(config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
-    # Get RAGAS metric callable from the metric config and create a list of metric-callables
-    metrics = []
-    # currently only one metric is supported
-    metric_name = config.metric_name  # Extracts the metric name
-    metric_config = config.metric_config  # Extracts the config (handles str/dict cases)
-    # Skip if `skip` is True
-    if not metric_config.skip:
-        metric_callable = get_ragas_metric(metric_name)
-        if metric_callable:
-            kwargs = metric_config.kwargs or {}
-            metrics.append(metric_callable(**kwargs))
-    # Create the RAG evaluator
-    _evaluator = RAGEvaluator(evaluator_llm=llm, metrics=metrics) if metrics else None
-    yield EvaluatorInfo(config=config, evaluate_fn=evaluate_fn, description="Evaluator for RAGAS metrics")

aiq/eval/register.py DELETED Viewed

@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# flake8: noqa
-# pylint: disable=unused-import
-# Import evaluators which need to be automatically registered here
-from .rag_evaluator.register import register_ragas_evaluator
-from .swe_bench_evaluator.register import register_swe_bench_evaluator
-from .trajectory_evaluator.register import register_trajectory_evaluator
-from .tunable_rag_evaluator.register import register_tunable_rag_evaluator

aiq/eval/remote_workflow.py DELETED Viewed

@@ -1,128 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import asyncio
-import json
-import logging
-import aiohttp
-from pydantic import ValidationError
-from tqdm import tqdm
-from aiq.data_models.api_server import AIQResponseIntermediateStep
-from aiq.data_models.intermediate_step import IntermediateStep
-from aiq.data_models.intermediate_step import IntermediateStepPayload
-from aiq.eval.config import EvaluationRunConfig
-from aiq.eval.evaluator.evaluator_model import EvalInput
-from aiq.eval.evaluator.evaluator_model import EvalInputItem
-logger = logging.getLogger(__name__)
-# Constants for streaming response prefixes
-DATA_PREFIX = "data: "
-INTERMEDIATE_DATA_PREFIX = "intermediate_data: "
-class EvaluationRemoteWorkflowHandler:
-    def __init__(self, config: EvaluationRunConfig, max_concurrency: int):
-        self.config = config
-        # Run metadata
-        self.semaphore = asyncio.Semaphore(max_concurrency)
-    async def run_workflow_remote_single(self, session: aiohttp.ClientSession, item: EvalInputItem):
-        """
-        Sends a single input to the endpoint hosting the workflow and retrieves the response.
-        """
-        question = item.input_obj
-        # generate request format
-        payload = {"input_message": question}
-        try:
-            # Use the streaming endpoint
-            endpoint = f"{self.config.endpoint}/generate/full"
-            async with session.post(endpoint, json=payload) as response:
-                response.raise_for_status()  # Raise an exception for HTTP errors
-                # Initialize variables to store the response
-                final_response = None
-                intermediate_steps = []
-                # Process the streaming response
-                async for line in response.content:
-                    line = line.decode('utf-8').strip()
-                    if not line:
-                        continue
-                    if line.startswith(DATA_PREFIX):
-                        # This is a generate response chunk
-                        try:
-                            chunk_data = json.loads(line[len(DATA_PREFIX):])
-                            if chunk_data.get("value"):
-                                final_response = chunk_data.get("value")
-                        except json.JSONDecodeError as e:
-                            logger.error("Failed to parse generate response chunk: %s", e)
-                            continue
-                    elif line.startswith(INTERMEDIATE_DATA_PREFIX):
-                        # This is an intermediate step
-                        try:
-                            step_data = json.loads(line[len(INTERMEDIATE_DATA_PREFIX):])
-                            response_intermediate = AIQResponseIntermediateStep.model_validate(step_data)
-                            # The payload is expected to be IntermediateStepPayload
-                            intermediate_step = IntermediateStep(
-                                payload=IntermediateStepPayload.model_validate_json(response_intermediate.payload))
-                            intermediate_steps.append(intermediate_step)
-                        except (json.JSONDecodeError, ValidationError) as e:
-                            logger.error("Failed to parse intermediate step: %s", e)
-                            continue
-        except aiohttp.ClientError as e:
-            # Handle connection or HTTP-related errors
-            logger.error("Request failed for question %s: %s", question, e)
-            item.output_obj = None
-            item.trajectory = []
-            return
-        # Extract and fill the item with the response and intermediate steps
-        item.output_obj = final_response
-        item.trajectory = intermediate_steps
-        return
-    async def run_workflow_remote_with_limits(self, session: aiohttp.ClientSession, item: EvalInputItem, pbar: tqdm):
-        """
-        Sends limited number of concurrent requests to a remote workflow and retrieves responses.
-        """
-        async with self.semaphore:
-            await self.run_workflow_remote_single(session=session, item=item)
-            pbar.update(1)
-    async def run_workflow_remote(self, eval_input: EvalInput) -> EvalInput:
-        """
-        Sends inputs to a workflow hosted on a remote endpoint.
-        """
-        timeout = aiohttp.ClientTimeout(total=self.config.endpoint_timeout)
-        try:
-            pbar = tqdm(total=len(eval_input.eval_input_items), desc="Running workflow", unit="item")
-            async with aiohttp.ClientSession(timeout=timeout) as session:
-                # get the questions from the eval_input
-                tasks = [
-                    self.run_workflow_remote_with_limits(session, item, pbar) for item in eval_input.eval_input_items
-                ]
-                await asyncio.gather(*tasks)
-        finally:
-            pbar.close()
-        return eval_input

aiq/eval/runtime_event_subscriber.py DELETED Viewed

@@ -1,52 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import asyncio
-import logging
-from aiq.builder.context import AIQContext
-from aiq.data_models.intermediate_step import IntermediateStep
-logger = logging.getLogger(__name__)
-def pull_intermediate() -> asyncio.Future[list[dict]]:
-    """
-    Subscribes to the runner's event stream using callbacks.
-    Intermediate steps are collected and, when complete, the future is set
-    with the list of dumped intermediate steps.
-    """
-    future = asyncio.Future()
-    intermediate_steps = []  # We'll store the dumped steps here.
-    context = AIQContext.get()
-    def on_next_cb(item: IntermediateStep):
-        # Append each new intermediate step (dumped to dict) to the list.
-        intermediate_steps.append(item.model_dump())
-    def on_error_cb(exc: Exception):
-        logger.error("Hit on_error: %s", exc)
-        if not future.done():
-            future.set_exception(exc)
-    def on_complete_cb():
-        logger.debug("Completed reading intermediate steps")
-        if not future.done():
-            future.set_result(intermediate_steps)
-    # Subscribe with our callbacks.
-    context.intermediate_step_manager.subscribe(on_next=on_next_cb, on_error=on_error_cb, on_complete=on_complete_cb)
-    return future

aiq/eval/swe_bench_evaluator/__init__.py DELETED Viewed

File without changes

aiqtoolkit 1.1.0rc5__py3-none-any.whl → 1.2.0__py3-none-any.whl

Potentially problematic release.

aiqtoolkit 1.1.0rc5py3-none-any.whl → 1.2.0py3-none-any.whl