nvidia-nat 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiq/__init__.py +66 -0
- nat/agent/__init__.py +0 -0
- nat/agent/base.py +256 -0
- nat/agent/dual_node.py +67 -0
- nat/agent/react_agent/__init__.py +0 -0
- nat/agent/react_agent/agent.py +363 -0
- nat/agent/react_agent/output_parser.py +104 -0
- nat/agent/react_agent/prompt.py +44 -0
- nat/agent/react_agent/register.py +149 -0
- nat/agent/reasoning_agent/__init__.py +0 -0
- nat/agent/reasoning_agent/reasoning_agent.py +225 -0
- nat/agent/register.py +23 -0
- nat/agent/rewoo_agent/__init__.py +0 -0
- nat/agent/rewoo_agent/agent.py +415 -0
- nat/agent/rewoo_agent/prompt.py +110 -0
- nat/agent/rewoo_agent/register.py +157 -0
- nat/agent/tool_calling_agent/__init__.py +0 -0
- nat/agent/tool_calling_agent/agent.py +119 -0
- nat/agent/tool_calling_agent/register.py +106 -0
- nat/authentication/__init__.py +14 -0
- nat/authentication/api_key/__init__.py +14 -0
- nat/authentication/api_key/api_key_auth_provider.py +96 -0
- nat/authentication/api_key/api_key_auth_provider_config.py +124 -0
- nat/authentication/api_key/register.py +26 -0
- nat/authentication/exceptions/__init__.py +14 -0
- nat/authentication/exceptions/api_key_exceptions.py +38 -0
- nat/authentication/http_basic_auth/__init__.py +0 -0
- nat/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
- nat/authentication/http_basic_auth/register.py +30 -0
- nat/authentication/interfaces.py +93 -0
- nat/authentication/oauth2/__init__.py +14 -0
- nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +107 -0
- nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
- nat/authentication/oauth2/register.py +25 -0
- nat/authentication/register.py +21 -0
- nat/builder/__init__.py +0 -0
- nat/builder/builder.py +285 -0
- nat/builder/component_utils.py +316 -0
- nat/builder/context.py +270 -0
- nat/builder/embedder.py +24 -0
- nat/builder/eval_builder.py +161 -0
- nat/builder/evaluator.py +29 -0
- nat/builder/framework_enum.py +24 -0
- nat/builder/front_end.py +73 -0
- nat/builder/function.py +344 -0
- nat/builder/function_base.py +380 -0
- nat/builder/function_info.py +627 -0
- nat/builder/intermediate_step_manager.py +174 -0
- nat/builder/llm.py +25 -0
- nat/builder/retriever.py +25 -0
- nat/builder/user_interaction_manager.py +78 -0
- nat/builder/workflow.py +148 -0
- nat/builder/workflow_builder.py +1117 -0
- nat/cli/__init__.py +14 -0
- nat/cli/cli_utils/__init__.py +0 -0
- nat/cli/cli_utils/config_override.py +231 -0
- nat/cli/cli_utils/validation.py +37 -0
- nat/cli/commands/__init__.py +0 -0
- nat/cli/commands/configure/__init__.py +0 -0
- nat/cli/commands/configure/channel/__init__.py +0 -0
- nat/cli/commands/configure/channel/add.py +28 -0
- nat/cli/commands/configure/channel/channel.py +34 -0
- nat/cli/commands/configure/channel/remove.py +30 -0
- nat/cli/commands/configure/channel/update.py +30 -0
- nat/cli/commands/configure/configure.py +33 -0
- nat/cli/commands/evaluate.py +139 -0
- nat/cli/commands/info/__init__.py +14 -0
- nat/cli/commands/info/info.py +37 -0
- nat/cli/commands/info/list_channels.py +32 -0
- nat/cli/commands/info/list_components.py +129 -0
- nat/cli/commands/info/list_mcp.py +304 -0
- nat/cli/commands/registry/__init__.py +14 -0
- nat/cli/commands/registry/publish.py +88 -0
- nat/cli/commands/registry/pull.py +118 -0
- nat/cli/commands/registry/registry.py +36 -0
- nat/cli/commands/registry/remove.py +108 -0
- nat/cli/commands/registry/search.py +155 -0
- nat/cli/commands/sizing/__init__.py +14 -0
- nat/cli/commands/sizing/calc.py +297 -0
- nat/cli/commands/sizing/sizing.py +27 -0
- nat/cli/commands/start.py +246 -0
- nat/cli/commands/uninstall.py +81 -0
- nat/cli/commands/validate.py +47 -0
- nat/cli/commands/workflow/__init__.py +14 -0
- nat/cli/commands/workflow/templates/__init__.py.j2 +0 -0
- nat/cli/commands/workflow/templates/config.yml.j2 +16 -0
- nat/cli/commands/workflow/templates/pyproject.toml.j2 +22 -0
- nat/cli/commands/workflow/templates/register.py.j2 +5 -0
- nat/cli/commands/workflow/templates/workflow.py.j2 +36 -0
- nat/cli/commands/workflow/workflow.py +37 -0
- nat/cli/commands/workflow/workflow_commands.py +317 -0
- nat/cli/entrypoint.py +135 -0
- nat/cli/main.py +57 -0
- nat/cli/register_workflow.py +488 -0
- nat/cli/type_registry.py +1000 -0
- nat/data_models/__init__.py +14 -0
- nat/data_models/api_server.py +716 -0
- nat/data_models/authentication.py +231 -0
- nat/data_models/common.py +171 -0
- nat/data_models/component.py +58 -0
- nat/data_models/component_ref.py +168 -0
- nat/data_models/config.py +410 -0
- nat/data_models/dataset_handler.py +169 -0
- nat/data_models/discovery_metadata.py +305 -0
- nat/data_models/embedder.py +27 -0
- nat/data_models/evaluate.py +127 -0
- nat/data_models/evaluator.py +26 -0
- nat/data_models/front_end.py +26 -0
- nat/data_models/function.py +30 -0
- nat/data_models/function_dependencies.py +72 -0
- nat/data_models/interactive.py +246 -0
- nat/data_models/intermediate_step.py +302 -0
- nat/data_models/invocation_node.py +38 -0
- nat/data_models/llm.py +27 -0
- nat/data_models/logging.py +26 -0
- nat/data_models/memory.py +27 -0
- nat/data_models/object_store.py +44 -0
- nat/data_models/profiler.py +54 -0
- nat/data_models/registry_handler.py +26 -0
- nat/data_models/retriever.py +30 -0
- nat/data_models/retry_mixin.py +35 -0
- nat/data_models/span.py +190 -0
- nat/data_models/step_adaptor.py +64 -0
- nat/data_models/streaming.py +33 -0
- nat/data_models/swe_bench_model.py +54 -0
- nat/data_models/telemetry_exporter.py +26 -0
- nat/data_models/ttc_strategy.py +30 -0
- nat/embedder/__init__.py +0 -0
- nat/embedder/nim_embedder.py +59 -0
- nat/embedder/openai_embedder.py +43 -0
- nat/embedder/register.py +22 -0
- nat/eval/__init__.py +14 -0
- nat/eval/config.py +60 -0
- nat/eval/dataset_handler/__init__.py +0 -0
- nat/eval/dataset_handler/dataset_downloader.py +106 -0
- nat/eval/dataset_handler/dataset_filter.py +52 -0
- nat/eval/dataset_handler/dataset_handler.py +367 -0
- nat/eval/evaluate.py +510 -0
- nat/eval/evaluator/__init__.py +14 -0
- nat/eval/evaluator/base_evaluator.py +77 -0
- nat/eval/evaluator/evaluator_model.py +45 -0
- nat/eval/intermediate_step_adapter.py +99 -0
- nat/eval/rag_evaluator/__init__.py +0 -0
- nat/eval/rag_evaluator/evaluate.py +178 -0
- nat/eval/rag_evaluator/register.py +143 -0
- nat/eval/register.py +23 -0
- nat/eval/remote_workflow.py +133 -0
- nat/eval/runners/__init__.py +14 -0
- nat/eval/runners/config.py +39 -0
- nat/eval/runners/multi_eval_runner.py +54 -0
- nat/eval/runtime_event_subscriber.py +52 -0
- nat/eval/swe_bench_evaluator/__init__.py +0 -0
- nat/eval/swe_bench_evaluator/evaluate.py +215 -0
- nat/eval/swe_bench_evaluator/register.py +36 -0
- nat/eval/trajectory_evaluator/__init__.py +0 -0
- nat/eval/trajectory_evaluator/evaluate.py +75 -0
- nat/eval/trajectory_evaluator/register.py +40 -0
- nat/eval/tunable_rag_evaluator/__init__.py +0 -0
- nat/eval/tunable_rag_evaluator/evaluate.py +245 -0
- nat/eval/tunable_rag_evaluator/register.py +52 -0
- nat/eval/usage_stats.py +41 -0
- nat/eval/utils/__init__.py +0 -0
- nat/eval/utils/output_uploader.py +140 -0
- nat/eval/utils/tqdm_position_registry.py +40 -0
- nat/eval/utils/weave_eval.py +184 -0
- nat/experimental/__init__.py +0 -0
- nat/experimental/decorators/__init__.py +0 -0
- nat/experimental/decorators/experimental_warning_decorator.py +134 -0
- nat/experimental/test_time_compute/__init__.py +0 -0
- nat/experimental/test_time_compute/editing/__init__.py +0 -0
- nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +147 -0
- nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +204 -0
- nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +107 -0
- nat/experimental/test_time_compute/functions/__init__.py +0 -0
- nat/experimental/test_time_compute/functions/execute_score_select_function.py +105 -0
- nat/experimental/test_time_compute/functions/plan_select_execute_function.py +224 -0
- nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +205 -0
- nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +146 -0
- nat/experimental/test_time_compute/models/__init__.py +0 -0
- nat/experimental/test_time_compute/models/editor_config.py +132 -0
- nat/experimental/test_time_compute/models/scoring_config.py +112 -0
- nat/experimental/test_time_compute/models/search_config.py +120 -0
- nat/experimental/test_time_compute/models/selection_config.py +154 -0
- nat/experimental/test_time_compute/models/stage_enums.py +43 -0
- nat/experimental/test_time_compute/models/strategy_base.py +66 -0
- nat/experimental/test_time_compute/models/tool_use_config.py +41 -0
- nat/experimental/test_time_compute/models/ttc_item.py +48 -0
- nat/experimental/test_time_compute/register.py +36 -0
- nat/experimental/test_time_compute/scoring/__init__.py +0 -0
- nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +168 -0
- nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +168 -0
- nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +111 -0
- nat/experimental/test_time_compute/search/__init__.py +0 -0
- nat/experimental/test_time_compute/search/multi_llm_planner.py +128 -0
- nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +122 -0
- nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +128 -0
- nat/experimental/test_time_compute/selection/__init__.py +0 -0
- nat/experimental/test_time_compute/selection/best_of_n_selector.py +63 -0
- nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +131 -0
- nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +159 -0
- nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +128 -0
- nat/experimental/test_time_compute/selection/threshold_selector.py +58 -0
- nat/front_ends/__init__.py +14 -0
- nat/front_ends/console/__init__.py +14 -0
- nat/front_ends/console/authentication_flow_handler.py +233 -0
- nat/front_ends/console/console_front_end_config.py +32 -0
- nat/front_ends/console/console_front_end_plugin.py +96 -0
- nat/front_ends/console/register.py +25 -0
- nat/front_ends/cron/__init__.py +14 -0
- nat/front_ends/fastapi/__init__.py +14 -0
- nat/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
- nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
- nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +107 -0
- nat/front_ends/fastapi/fastapi_front_end_config.py +241 -0
- nat/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
- nat/front_ends/fastapi/fastapi_front_end_plugin.py +116 -0
- nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +1087 -0
- nat/front_ends/fastapi/html_snippets/__init__.py +14 -0
- nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
- nat/front_ends/fastapi/intermediate_steps_subscriber.py +80 -0
- nat/front_ends/fastapi/job_store.py +183 -0
- nat/front_ends/fastapi/main.py +72 -0
- nat/front_ends/fastapi/message_handler.py +320 -0
- nat/front_ends/fastapi/message_validator.py +352 -0
- nat/front_ends/fastapi/register.py +25 -0
- nat/front_ends/fastapi/response_helpers.py +195 -0
- nat/front_ends/fastapi/step_adaptor.py +319 -0
- nat/front_ends/mcp/__init__.py +14 -0
- nat/front_ends/mcp/mcp_front_end_config.py +36 -0
- nat/front_ends/mcp/mcp_front_end_plugin.py +81 -0
- nat/front_ends/mcp/mcp_front_end_plugin_worker.py +143 -0
- nat/front_ends/mcp/register.py +27 -0
- nat/front_ends/mcp/tool_converter.py +241 -0
- nat/front_ends/register.py +22 -0
- nat/front_ends/simple_base/__init__.py +14 -0
- nat/front_ends/simple_base/simple_front_end_plugin_base.py +54 -0
- nat/llm/__init__.py +0 -0
- nat/llm/aws_bedrock_llm.py +57 -0
- nat/llm/nim_llm.py +46 -0
- nat/llm/openai_llm.py +46 -0
- nat/llm/register.py +23 -0
- nat/llm/utils/__init__.py +14 -0
- nat/llm/utils/env_config_value.py +94 -0
- nat/llm/utils/error.py +17 -0
- nat/memory/__init__.py +20 -0
- nat/memory/interfaces.py +183 -0
- nat/memory/models.py +112 -0
- nat/meta/pypi.md +58 -0
- nat/object_store/__init__.py +20 -0
- nat/object_store/in_memory_object_store.py +76 -0
- nat/object_store/interfaces.py +84 -0
- nat/object_store/models.py +38 -0
- nat/object_store/register.py +20 -0
- nat/observability/__init__.py +14 -0
- nat/observability/exporter/__init__.py +14 -0
- nat/observability/exporter/base_exporter.py +449 -0
- nat/observability/exporter/exporter.py +78 -0
- nat/observability/exporter/file_exporter.py +33 -0
- nat/observability/exporter/processing_exporter.py +322 -0
- nat/observability/exporter/raw_exporter.py +52 -0
- nat/observability/exporter/span_exporter.py +288 -0
- nat/observability/exporter_manager.py +335 -0
- nat/observability/mixin/__init__.py +14 -0
- nat/observability/mixin/batch_config_mixin.py +26 -0
- nat/observability/mixin/collector_config_mixin.py +23 -0
- nat/observability/mixin/file_mixin.py +288 -0
- nat/observability/mixin/file_mode.py +23 -0
- nat/observability/mixin/resource_conflict_mixin.py +134 -0
- nat/observability/mixin/serialize_mixin.py +61 -0
- nat/observability/mixin/type_introspection_mixin.py +183 -0
- nat/observability/processor/__init__.py +14 -0
- nat/observability/processor/batching_processor.py +310 -0
- nat/observability/processor/callback_processor.py +42 -0
- nat/observability/processor/intermediate_step_serializer.py +28 -0
- nat/observability/processor/processor.py +71 -0
- nat/observability/register.py +96 -0
- nat/observability/utils/__init__.py +14 -0
- nat/observability/utils/dict_utils.py +236 -0
- nat/observability/utils/time_utils.py +31 -0
- nat/plugins/.namespace +1 -0
- nat/profiler/__init__.py +0 -0
- nat/profiler/calc/__init__.py +14 -0
- nat/profiler/calc/calc_runner.py +627 -0
- nat/profiler/calc/calculations.py +288 -0
- nat/profiler/calc/data_models.py +188 -0
- nat/profiler/calc/plot.py +345 -0
- nat/profiler/callbacks/__init__.py +0 -0
- nat/profiler/callbacks/agno_callback_handler.py +295 -0
- nat/profiler/callbacks/base_callback_class.py +20 -0
- nat/profiler/callbacks/langchain_callback_handler.py +290 -0
- nat/profiler/callbacks/llama_index_callback_handler.py +205 -0
- nat/profiler/callbacks/semantic_kernel_callback_handler.py +238 -0
- nat/profiler/callbacks/token_usage_base_model.py +27 -0
- nat/profiler/data_frame_row.py +51 -0
- nat/profiler/data_models.py +24 -0
- nat/profiler/decorators/__init__.py +0 -0
- nat/profiler/decorators/framework_wrapper.py +131 -0
- nat/profiler/decorators/function_tracking.py +254 -0
- nat/profiler/forecasting/__init__.py +0 -0
- nat/profiler/forecasting/config.py +18 -0
- nat/profiler/forecasting/model_trainer.py +75 -0
- nat/profiler/forecasting/models/__init__.py +22 -0
- nat/profiler/forecasting/models/forecasting_base_model.py +40 -0
- nat/profiler/forecasting/models/linear_model.py +197 -0
- nat/profiler/forecasting/models/random_forest_regressor.py +269 -0
- nat/profiler/inference_metrics_model.py +28 -0
- nat/profiler/inference_optimization/__init__.py +0 -0
- nat/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
- nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +460 -0
- nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +258 -0
- nat/profiler/inference_optimization/data_models.py +386 -0
- nat/profiler/inference_optimization/experimental/__init__.py +0 -0
- nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +468 -0
- nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +405 -0
- nat/profiler/inference_optimization/llm_metrics.py +212 -0
- nat/profiler/inference_optimization/prompt_caching.py +163 -0
- nat/profiler/inference_optimization/token_uniqueness.py +107 -0
- nat/profiler/inference_optimization/workflow_runtimes.py +72 -0
- nat/profiler/intermediate_property_adapter.py +102 -0
- nat/profiler/profile_runner.py +473 -0
- nat/profiler/utils.py +184 -0
- nat/registry_handlers/__init__.py +0 -0
- nat/registry_handlers/local/__init__.py +0 -0
- nat/registry_handlers/local/local_handler.py +176 -0
- nat/registry_handlers/local/register_local.py +37 -0
- nat/registry_handlers/metadata_factory.py +60 -0
- nat/registry_handlers/package_utils.py +571 -0
- nat/registry_handlers/pypi/__init__.py +0 -0
- nat/registry_handlers/pypi/pypi_handler.py +251 -0
- nat/registry_handlers/pypi/register_pypi.py +40 -0
- nat/registry_handlers/register.py +21 -0
- nat/registry_handlers/registry_handler_base.py +157 -0
- nat/registry_handlers/rest/__init__.py +0 -0
- nat/registry_handlers/rest/register_rest.py +56 -0
- nat/registry_handlers/rest/rest_handler.py +237 -0
- nat/registry_handlers/schemas/__init__.py +0 -0
- nat/registry_handlers/schemas/headers.py +42 -0
- nat/registry_handlers/schemas/package.py +68 -0
- nat/registry_handlers/schemas/publish.py +68 -0
- nat/registry_handlers/schemas/pull.py +82 -0
- nat/registry_handlers/schemas/remove.py +36 -0
- nat/registry_handlers/schemas/search.py +91 -0
- nat/registry_handlers/schemas/status.py +47 -0
- nat/retriever/__init__.py +0 -0
- nat/retriever/interface.py +41 -0
- nat/retriever/milvus/__init__.py +14 -0
- nat/retriever/milvus/register.py +81 -0
- nat/retriever/milvus/retriever.py +228 -0
- nat/retriever/models.py +77 -0
- nat/retriever/nemo_retriever/__init__.py +14 -0
- nat/retriever/nemo_retriever/register.py +60 -0
- nat/retriever/nemo_retriever/retriever.py +190 -0
- nat/retriever/register.py +22 -0
- nat/runtime/__init__.py +14 -0
- nat/runtime/loader.py +220 -0
- nat/runtime/runner.py +195 -0
- nat/runtime/session.py +162 -0
- nat/runtime/user_metadata.py +130 -0
- nat/settings/__init__.py +0 -0
- nat/settings/global_settings.py +318 -0
- nat/test/.namespace +1 -0
- nat/tool/__init__.py +0 -0
- nat/tool/chat_completion.py +74 -0
- nat/tool/code_execution/README.md +151 -0
- nat/tool/code_execution/__init__.py +0 -0
- nat/tool/code_execution/code_sandbox.py +267 -0
- nat/tool/code_execution/local_sandbox/.gitignore +1 -0
- nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +60 -0
- nat/tool/code_execution/local_sandbox/__init__.py +13 -0
- nat/tool/code_execution/local_sandbox/local_sandbox_server.py +198 -0
- nat/tool/code_execution/local_sandbox/sandbox.requirements.txt +6 -0
- nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +50 -0
- nat/tool/code_execution/register.py +74 -0
- nat/tool/code_execution/test_code_execution_sandbox.py +414 -0
- nat/tool/code_execution/utils.py +100 -0
- nat/tool/datetime_tools.py +42 -0
- nat/tool/document_search.py +141 -0
- nat/tool/github_tools/__init__.py +0 -0
- nat/tool/github_tools/create_github_commit.py +133 -0
- nat/tool/github_tools/create_github_issue.py +87 -0
- nat/tool/github_tools/create_github_pr.py +106 -0
- nat/tool/github_tools/get_github_file.py +106 -0
- nat/tool/github_tools/get_github_issue.py +166 -0
- nat/tool/github_tools/get_github_pr.py +256 -0
- nat/tool/github_tools/update_github_issue.py +100 -0
- nat/tool/mcp/__init__.py +14 -0
- nat/tool/mcp/exceptions.py +142 -0
- nat/tool/mcp/mcp_client.py +255 -0
- nat/tool/mcp/mcp_tool.py +96 -0
- nat/tool/memory_tools/__init__.py +0 -0
- nat/tool/memory_tools/add_memory_tool.py +79 -0
- nat/tool/memory_tools/delete_memory_tool.py +67 -0
- nat/tool/memory_tools/get_memory_tool.py +72 -0
- nat/tool/nvidia_rag.py +95 -0
- nat/tool/register.py +38 -0
- nat/tool/retriever.py +94 -0
- nat/tool/server_tools.py +66 -0
- nat/utils/__init__.py +0 -0
- nat/utils/data_models/__init__.py +0 -0
- nat/utils/data_models/schema_validator.py +58 -0
- nat/utils/debugging_utils.py +43 -0
- nat/utils/dump_distro_mapping.py +32 -0
- nat/utils/exception_handlers/__init__.py +0 -0
- nat/utils/exception_handlers/automatic_retries.py +289 -0
- nat/utils/exception_handlers/mcp.py +211 -0
- nat/utils/exception_handlers/schemas.py +114 -0
- nat/utils/io/__init__.py +0 -0
- nat/utils/io/model_processing.py +28 -0
- nat/utils/io/yaml_tools.py +119 -0
- nat/utils/log_utils.py +37 -0
- nat/utils/metadata_utils.py +74 -0
- nat/utils/optional_imports.py +142 -0
- nat/utils/producer_consumer_queue.py +178 -0
- nat/utils/reactive/__init__.py +0 -0
- nat/utils/reactive/base/__init__.py +0 -0
- nat/utils/reactive/base/observable_base.py +65 -0
- nat/utils/reactive/base/observer_base.py +55 -0
- nat/utils/reactive/base/subject_base.py +79 -0
- nat/utils/reactive/observable.py +59 -0
- nat/utils/reactive/observer.py +76 -0
- nat/utils/reactive/subject.py +131 -0
- nat/utils/reactive/subscription.py +49 -0
- nat/utils/settings/__init__.py +0 -0
- nat/utils/settings/global_settings.py +197 -0
- nat/utils/string_utils.py +38 -0
- nat/utils/type_converter.py +290 -0
- nat/utils/type_utils.py +484 -0
- nat/utils/url_utils.py +27 -0
- nvidia_nat-1.2.0.dist-info/METADATA +365 -0
- nvidia_nat-1.2.0.dist-info/RECORD +435 -0
- nvidia_nat-1.2.0.dist-info/WHEEL +5 -0
- nvidia_nat-1.2.0.dist-info/entry_points.txt +21 -0
- nvidia_nat-1.2.0.dist-info/licenses/LICENSE-3rd-party.txt +5478 -0
- nvidia_nat-1.2.0.dist-info/licenses/LICENSE.md +201 -0
- nvidia_nat-1.2.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from pydantic import AliasChoices
|
|
17
|
+
from pydantic import ConfigDict
|
|
18
|
+
from pydantic import Field
|
|
19
|
+
|
|
20
|
+
from nat.builder.builder import Builder
|
|
21
|
+
from nat.builder.embedder import EmbedderProviderInfo
|
|
22
|
+
from nat.cli.register_workflow import register_embedder_provider
|
|
23
|
+
from nat.data_models.embedder import EmbedderBaseConfig
|
|
24
|
+
from nat.data_models.retry_mixin import RetryMixin
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class OpenAIEmbedderModelConfig(EmbedderBaseConfig, RetryMixin, name="openai"):
|
|
28
|
+
"""An OpenAI LLM provider to be used with an LLM client."""
|
|
29
|
+
|
|
30
|
+
model_config = ConfigDict(protected_namespaces=())
|
|
31
|
+
|
|
32
|
+
api_key: str | None = Field(default=None, description="OpenAI API key to interact with hosted model.")
|
|
33
|
+
base_url: str | None = Field(default=None, description="Base url to the hosted model.")
|
|
34
|
+
model_name: str = Field(validation_alias=AliasChoices("model_name", "model"),
|
|
35
|
+
serialization_alias="model",
|
|
36
|
+
description="The OpenAI hosted model name.")
|
|
37
|
+
max_retries: int = Field(default=2, description="The max number of retries for the request.")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@register_embedder_provider(config_type=OpenAIEmbedderModelConfig)
|
|
41
|
+
async def openai_llm(config: OpenAIEmbedderModelConfig, builder: Builder):
|
|
42
|
+
|
|
43
|
+
yield EmbedderProviderInfo(config=config, description="An OpenAI model for use with an Embedder client.")
|
nat/embedder/register.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
# pylint: disable=unused-import
|
|
17
|
+
# flake8: noqa
|
|
18
|
+
# isort:skip_file
|
|
19
|
+
|
|
20
|
+
# Import any providers which need to be automatically registered here
|
|
21
|
+
from . import nim_embedder
|
|
22
|
+
from . import openai_embedder
|
nat/eval/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
nat/eval/config.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
from pydantic import BaseModel
|
|
19
|
+
|
|
20
|
+
from nat.eval.evaluator.evaluator_model import EvalInput
|
|
21
|
+
from nat.eval.evaluator.evaluator_model import EvalOutput
|
|
22
|
+
from nat.eval.usage_stats import UsageStats
|
|
23
|
+
from nat.profiler.data_models import ProfilerResults
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class EvaluationRunConfig(BaseModel):
|
|
27
|
+
"""
|
|
28
|
+
Parameters used for a single evaluation run.
|
|
29
|
+
"""
|
|
30
|
+
config_file: Path
|
|
31
|
+
dataset: str | None = None # dataset file path can be specified in the config file
|
|
32
|
+
result_json_path: str = "$"
|
|
33
|
+
skip_workflow: bool = False
|
|
34
|
+
skip_completed_entries: bool = False
|
|
35
|
+
endpoint: str | None = None # only used when running the workflow remotely
|
|
36
|
+
endpoint_timeout: int = 300
|
|
37
|
+
reps: int = 1
|
|
38
|
+
override: tuple[tuple[str, str], ...] = ()
|
|
39
|
+
# If false, the output will not be written to the output directory. This is
|
|
40
|
+
# useful when running evaluation via another tool.
|
|
41
|
+
write_output: bool = True
|
|
42
|
+
# if true, the dataset is adjusted to a multiple of the concurrency
|
|
43
|
+
adjust_dataset_size: bool = False
|
|
44
|
+
# number of passes at each concurrency, if 0 the dataset is adjusted to a multiple of the
|
|
45
|
+
# concurrency. The is only used if adjust_dataset_size is true
|
|
46
|
+
num_passes: int = 0
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class EvaluationRunOutput(BaseModel):
|
|
50
|
+
"""
|
|
51
|
+
Output of a single evaluation run.
|
|
52
|
+
"""
|
|
53
|
+
workflow_output_file: Path | None
|
|
54
|
+
evaluator_output_files: list[Path]
|
|
55
|
+
workflow_interrupted: bool
|
|
56
|
+
|
|
57
|
+
eval_input: EvalInput
|
|
58
|
+
evaluation_results: list[tuple[str, EvalOutput]]
|
|
59
|
+
usage_stats: UsageStats | None = None
|
|
60
|
+
profiler_results: ProfilerResults
|
|
File without changes
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
import logging
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
import boto3
|
|
19
|
+
import requests
|
|
20
|
+
from botocore.exceptions import NoCredentialsError
|
|
21
|
+
|
|
22
|
+
from nat.data_models.dataset_handler import EvalDatasetConfig
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DatasetDownloader:
|
|
28
|
+
"""
|
|
29
|
+
Download remote datasets using signed URLs or S3 credentials.
|
|
30
|
+
|
|
31
|
+
One DatasetDownloader object is needed for each dataset to be downloaded.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, dataset_config: EvalDatasetConfig):
|
|
35
|
+
self.dataset_config = dataset_config
|
|
36
|
+
self._s3_client = None
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def s3_config(self):
|
|
40
|
+
return self.dataset_config.s3
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def s3_client(self):
|
|
44
|
+
"""Lazy init the S3 client."""
|
|
45
|
+
if not self._s3_client:
|
|
46
|
+
try:
|
|
47
|
+
self._s3_client = boto3.client("s3",
|
|
48
|
+
endpoint_url=self.s3_config.endpoint_url,
|
|
49
|
+
aws_access_key_id=self.s3_config.access_key,
|
|
50
|
+
aws_secret_access_key=self.s3_config.secret_key)
|
|
51
|
+
except NoCredentialsError as e:
|
|
52
|
+
logger.error("AWS credentials not available: %s", e)
|
|
53
|
+
raise
|
|
54
|
+
except Exception as e:
|
|
55
|
+
logger.error("Failed to initialize S3 client: %s", e)
|
|
56
|
+
raise
|
|
57
|
+
return self._s3_client
|
|
58
|
+
|
|
59
|
+
@staticmethod
|
|
60
|
+
def ensure_directory_exists(file_path: str):
|
|
61
|
+
"""Ensure the directory for the file exists."""
|
|
62
|
+
Path(file_path).parent.mkdir(parents=True, exist_ok=True)
|
|
63
|
+
|
|
64
|
+
def download_with_signed_url(self, remote_file_path: str, local_file_path: str, timeout: int = 300):
|
|
65
|
+
"""Download a file using a signed URL."""
|
|
66
|
+
try:
|
|
67
|
+
response = requests.get(remote_file_path, stream=True, timeout=timeout)
|
|
68
|
+
response.raise_for_status()
|
|
69
|
+
with open(local_file_path, "wb") as file:
|
|
70
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
71
|
+
file.write(chunk)
|
|
72
|
+
logger.info("File downloaded successfully to %s using signed URL.", local_file_path)
|
|
73
|
+
except requests.exceptions.RequestException as e:
|
|
74
|
+
logger.error("Error downloading file using signed URL: %s", e)
|
|
75
|
+
raise
|
|
76
|
+
|
|
77
|
+
def download_with_boto3(self, remote_file_path: str, local_file_path: str):
|
|
78
|
+
"""Download a file using boto3 and credentials."""
|
|
79
|
+
try:
|
|
80
|
+
self.s3_client.download_file(self.dataset_config.s3.bucket, remote_file_path, local_file_path)
|
|
81
|
+
logger.info("File downloaded successfully to %s using S3 client.", local_file_path)
|
|
82
|
+
except Exception as e:
|
|
83
|
+
logger.error("Error downloading file from S3: %s", e)
|
|
84
|
+
raise
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def is_file_path_url(file_path: str) -> bool:
|
|
88
|
+
"""Check if the file path is a URL."""
|
|
89
|
+
return file_path.startswith("http")
|
|
90
|
+
|
|
91
|
+
def download_file(self, remote_file_path: str, local_file_path: str):
|
|
92
|
+
"""Download a file using the appropriate method."""
|
|
93
|
+
self.ensure_directory_exists(local_file_path)
|
|
94
|
+
if self.is_file_path_url(remote_file_path):
|
|
95
|
+
logger.info("Using signed URL to download the file %s...", remote_file_path)
|
|
96
|
+
self.download_with_signed_url(remote_file_path, local_file_path, timeout=120)
|
|
97
|
+
else:
|
|
98
|
+
logger.info("Using S3 credentials to download the file %s...", remote_file_path)
|
|
99
|
+
self.download_with_boto3(remote_file_path, local_file_path)
|
|
100
|
+
|
|
101
|
+
def download_dataset(self):
|
|
102
|
+
"""Download datasets defined in the evaluation configuration."""
|
|
103
|
+
if self.dataset_config.remote_file_path:
|
|
104
|
+
logger.info("Downloading remote dataset %s")
|
|
105
|
+
self.download_file(remote_file_path=self.dataset_config.remote_file_path,
|
|
106
|
+
local_file_path=self.dataset_config.file_path)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
from nat.data_models.dataset_handler import EvalFilterConfig
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DatasetFilter:
|
|
22
|
+
"""
|
|
23
|
+
Apply allowlist and denylist filters to the DataFrame based on specified column filters.
|
|
24
|
+
- If a allowlist is provided, only keep rows matching the filter values.
|
|
25
|
+
- If a denylist is provided, remove rows matching the filter values.
|
|
26
|
+
- If the filter column does not exist in the DataFrame, the filtering is skipped for that column.
|
|
27
|
+
|
|
28
|
+
This is a utility class that is dataset agnostic and can be used to filter any DataFrame based on the provided
|
|
29
|
+
filter configuration.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, filter_config: EvalFilterConfig):
|
|
33
|
+
|
|
34
|
+
self.filter_config = filter_config
|
|
35
|
+
|
|
36
|
+
def apply_filters(self, df) -> pd.DataFrame:
|
|
37
|
+
|
|
38
|
+
filtered_df = df.copy()
|
|
39
|
+
|
|
40
|
+
# Apply allowlist (only keep specified rows)
|
|
41
|
+
if self.filter_config.allowlist:
|
|
42
|
+
for column, values in self.filter_config.allowlist.field.items():
|
|
43
|
+
if column in filtered_df.columns:
|
|
44
|
+
filtered_df = filtered_df[filtered_df[column].isin(values)]
|
|
45
|
+
|
|
46
|
+
# Apply denylist (remove specified rows)
|
|
47
|
+
if self.filter_config.denylist:
|
|
48
|
+
for column, values in self.filter_config.denylist.field.items():
|
|
49
|
+
if column in filtered_df.columns:
|
|
50
|
+
filtered_df = filtered_df[~filtered_df[column].isin(values)]
|
|
51
|
+
|
|
52
|
+
return filtered_df
|
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import math
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
import pandas as pd
|
|
21
|
+
|
|
22
|
+
from nat.data_models.dataset_handler import EvalDatasetConfig
|
|
23
|
+
from nat.data_models.dataset_handler import EvalDatasetCustomConfig
|
|
24
|
+
from nat.data_models.dataset_handler import EvalDatasetJsonConfig
|
|
25
|
+
from nat.data_models.intermediate_step import IntermediateStep
|
|
26
|
+
from nat.data_models.intermediate_step import IntermediateStepType
|
|
27
|
+
from nat.eval.dataset_handler.dataset_downloader import DatasetDownloader
|
|
28
|
+
from nat.eval.dataset_handler.dataset_filter import DatasetFilter
|
|
29
|
+
from nat.eval.evaluator.evaluator_model import EvalInput
|
|
30
|
+
from nat.eval.evaluator.evaluator_model import EvalInputItem
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class DatasetHandler:
|
|
34
|
+
"""
|
|
35
|
+
Read the datasets and pre-process (apply filters, deduplicate etc.) before turning them into EvalInput objects.
|
|
36
|
+
One DatasetHandler object is needed for each dataset to be evaluated.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self,
|
|
40
|
+
dataset_config: EvalDatasetConfig,
|
|
41
|
+
reps: int,
|
|
42
|
+
concurrency: int,
|
|
43
|
+
num_passes: int = 1,
|
|
44
|
+
adjust_dataset_size: bool = False):
|
|
45
|
+
from nat.eval.intermediate_step_adapter import IntermediateStepAdapter
|
|
46
|
+
|
|
47
|
+
self.dataset_config = dataset_config
|
|
48
|
+
self.dataset_filter = DatasetFilter(dataset_config.filter)
|
|
49
|
+
self.reps = reps
|
|
50
|
+
|
|
51
|
+
# number of passes at specific concurrency
|
|
52
|
+
self.concurrency = concurrency
|
|
53
|
+
self.num_passes = num_passes
|
|
54
|
+
self.adjust_dataset_size = adjust_dataset_size
|
|
55
|
+
|
|
56
|
+
# Helpers
|
|
57
|
+
self.intermediate_step_adapter = IntermediateStepAdapter()
|
|
58
|
+
|
|
59
|
+
def is_structured_input(self) -> bool:
|
|
60
|
+
'''Check if the input is structured or unstructured'''
|
|
61
|
+
return not self.dataset_config.structure.disable
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def id_key(self) -> str:
|
|
65
|
+
return self.dataset_config.id_key
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def question_key(self) -> str:
|
|
69
|
+
return self.dataset_config.structure.question_key
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def answer_key(self) -> str:
|
|
73
|
+
return self.dataset_config.structure.answer_key
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def generated_answer_key(self) -> str:
|
|
77
|
+
return self.dataset_config.structure.generated_answer_key
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def trajectory_key(self) -> str:
|
|
81
|
+
return self.dataset_config.structure.trajectory_key
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def expected_trajectory_key(self) -> str:
|
|
85
|
+
return self.dataset_config.structure.expected_trajectory_key
|
|
86
|
+
|
|
87
|
+
def get_eval_input_from_df(self, input_df: pd.DataFrame) -> EvalInput:
|
|
88
|
+
|
|
89
|
+
def create_eval_item(row: pd.Series, structured: bool) -> EvalInputItem:
|
|
90
|
+
"""Helper function to create EvalInputItem."""
|
|
91
|
+
return EvalInputItem(
|
|
92
|
+
id=row.get(self.id_key, ""),
|
|
93
|
+
input_obj=row.to_json() if not structured else row.get(self.question_key, ""),
|
|
94
|
+
expected_output_obj=row.get(self.answer_key, "") if structured else "",
|
|
95
|
+
output_obj=row.get(self.generated_answer_key, "") if structured else "",
|
|
96
|
+
trajectory=row.get(self.trajectory_key, []) if structured else [],
|
|
97
|
+
expected_trajectory=row.get(self.expected_trajectory_key, []) if structured else [],
|
|
98
|
+
full_dataset_entry=row.to_dict(),
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# if input dataframe is empty return an empty list
|
|
102
|
+
if input_df.empty:
|
|
103
|
+
return EvalInput(eval_input_items=[])
|
|
104
|
+
|
|
105
|
+
structured = self.is_structured_input()
|
|
106
|
+
if structured:
|
|
107
|
+
# For structured input, question is mandatory. Ignore rows with missing or empty questions
|
|
108
|
+
input_df = input_df[input_df[self.question_key].notnull() & input_df[self.question_key].str.strip().ne("")]
|
|
109
|
+
eval_input_items = [create_eval_item(row, structured) for _, row in input_df.iterrows()]
|
|
110
|
+
|
|
111
|
+
return EvalInput(eval_input_items=eval_input_items)
|
|
112
|
+
|
|
113
|
+
def setup_reps(self, input_df: pd.DataFrame) -> pd.DataFrame:
|
|
114
|
+
"""replicate the rows and update the id to id_key + "_rep" + rep_number"""
|
|
115
|
+
# Replicate the rows
|
|
116
|
+
input_df = pd.concat([input_df] * self.reps, ignore_index=True)
|
|
117
|
+
# Compute repetition index
|
|
118
|
+
rep_index = input_df.groupby(self.dataset_config.id_key).cumcount().astype(str)
|
|
119
|
+
# Convert id_key to string (id can be integer) if needed and update IDs
|
|
120
|
+
input_df[self.dataset_config.id_key] = input_df[self.dataset_config.id_key].astype(str) + "_rep" + rep_index
|
|
121
|
+
# Ensure unique ID values after modification
|
|
122
|
+
input_df.drop_duplicates(subset=[self.dataset_config.id_key], inplace=True)
|
|
123
|
+
|
|
124
|
+
return input_df
|
|
125
|
+
|
|
126
|
+
def adjust_dataset(self, input_df: pd.DataFrame) -> pd.DataFrame:
|
|
127
|
+
"""
|
|
128
|
+
Adjust the dataset so its length is a multiple of concurrency.
|
|
129
|
+
|
|
130
|
+
If num_passes > 0:
|
|
131
|
+
dataset size is adjusted to concurrency * num_passes
|
|
132
|
+
else:
|
|
133
|
+
dataset size is adjusted to the largest multiple of concurrency
|
|
134
|
+
that is less than or equal to the current dataset size
|
|
135
|
+
"""
|
|
136
|
+
if self.concurrency <= 0:
|
|
137
|
+
raise ValueError("Concurrency must be > 0")
|
|
138
|
+
|
|
139
|
+
if self.num_passes < 0:
|
|
140
|
+
raise ValueError("num_passes must be >= 0")
|
|
141
|
+
|
|
142
|
+
original_size = input_df.shape[0]
|
|
143
|
+
|
|
144
|
+
# Calculate target size
|
|
145
|
+
if self.num_passes > 0:
|
|
146
|
+
# When num_passes is specified, always use concurrency * num_passes
|
|
147
|
+
# This respects the user's intent for exact number of passes
|
|
148
|
+
target_size = self.concurrency * self.num_passes
|
|
149
|
+
else:
|
|
150
|
+
# When num_passes = 0, use the largest multiple of concurrency <= original_size
|
|
151
|
+
# If original_size < concurrency, we need at least concurrency rows
|
|
152
|
+
if original_size >= self.concurrency:
|
|
153
|
+
target_size = (original_size // self.concurrency) * self.concurrency
|
|
154
|
+
else:
|
|
155
|
+
target_size = self.concurrency
|
|
156
|
+
|
|
157
|
+
if target_size == 0:
|
|
158
|
+
raise ValueError("Input dataset too small for even one batch at given concurrency.")
|
|
159
|
+
|
|
160
|
+
id_col = self.dataset_config.id_key
|
|
161
|
+
|
|
162
|
+
# If we need more rows than we have, replicate the dataset
|
|
163
|
+
if original_size < target_size:
|
|
164
|
+
# Clean existing _rep suffix if present
|
|
165
|
+
input_df[id_col] = input_df[id_col].astype(str).str.replace(r"_rep\d+$", "", regex=True)
|
|
166
|
+
|
|
167
|
+
# Calculate how many complete copies we need
|
|
168
|
+
copies_needed = math.ceil(target_size / original_size)
|
|
169
|
+
|
|
170
|
+
# Create the replicated dataframe
|
|
171
|
+
replicated_dfs = []
|
|
172
|
+
for i in range(copies_needed):
|
|
173
|
+
df_copy = input_df.copy()
|
|
174
|
+
if i > 0: # Add suffix to all but the first copy
|
|
175
|
+
df_copy[id_col] = df_copy[id_col].astype(str) + f"_rep{i}"
|
|
176
|
+
replicated_dfs.append(df_copy)
|
|
177
|
+
|
|
178
|
+
input_df = pd.concat(replicated_dfs, ignore_index=True)
|
|
179
|
+
|
|
180
|
+
# Return exactly the target size
|
|
181
|
+
return input_df.head(target_size)
|
|
182
|
+
|
|
183
|
+
def get_eval_input_from_dataset(self, dataset: str) -> EvalInput:
|
|
184
|
+
# read the dataset and convert it to EvalInput
|
|
185
|
+
|
|
186
|
+
# if a dataset file has been provided in the command line, use that
|
|
187
|
+
dataset_config = EvalDatasetJsonConfig(file_path=dataset) if dataset else self.dataset_config
|
|
188
|
+
|
|
189
|
+
# Handle custom dataset type with special processing
|
|
190
|
+
if isinstance(self.dataset_config, EvalDatasetCustomConfig):
|
|
191
|
+
return self._handle_custom_dataset(dataset)
|
|
192
|
+
|
|
193
|
+
# Download the dataset if it is remote
|
|
194
|
+
downloader = DatasetDownloader(dataset_config=dataset_config)
|
|
195
|
+
downloader.download_dataset()
|
|
196
|
+
|
|
197
|
+
parser, kwargs = dataset_config.parser()
|
|
198
|
+
# Parse the dataset into a DataFrame
|
|
199
|
+
input_df = parser(dataset_config.file_path, **kwargs)
|
|
200
|
+
|
|
201
|
+
# Apply standard preprocessing and convert to EvalInput
|
|
202
|
+
return self._preprocess_eval_dataframe(input_df)
|
|
203
|
+
|
|
204
|
+
def _preprocess_dataframe(self, input_df: pd.DataFrame) -> pd.DataFrame:
|
|
205
|
+
"""
|
|
206
|
+
Apply standard preprocessing to a DataFrame: filters, deduplication, repetitions, and size adjustment.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
input_df: DataFrame to preprocess
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
Preprocessed DataFrame
|
|
213
|
+
"""
|
|
214
|
+
# Apply filters and deduplicate
|
|
215
|
+
input_df = self.dataset_filter.apply_filters(input_df)
|
|
216
|
+
input_df.drop_duplicates(subset=[self.dataset_config.id_key], inplace=True)
|
|
217
|
+
|
|
218
|
+
if self.reps > 1 and self.adjust_dataset_size:
|
|
219
|
+
raise ValueError("reps and adjust_dataset_size are mutually exclusive")
|
|
220
|
+
|
|
221
|
+
# If more than one repetition is needed, replicate the rows
|
|
222
|
+
if self.reps > 1:
|
|
223
|
+
input_df = self.setup_reps(input_df)
|
|
224
|
+
elif self.adjust_dataset_size:
|
|
225
|
+
input_df = self.adjust_dataset(input_df)
|
|
226
|
+
|
|
227
|
+
return input_df
|
|
228
|
+
|
|
229
|
+
def _preprocess_eval_dataframe(self, input_df: pd.DataFrame) -> EvalInput:
|
|
230
|
+
"""
|
|
231
|
+
Apply standard preprocessing to a DataFrame and convert to EvalInput.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
input_df: DataFrame to preprocess
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
Preprocessed EvalInput object
|
|
238
|
+
"""
|
|
239
|
+
processed_df = self._preprocess_dataframe(input_df)
|
|
240
|
+
return self.get_eval_input_from_df(processed_df)
|
|
241
|
+
|
|
242
|
+
def _preprocess_eval_input(self, eval_input: EvalInput) -> EvalInput:
|
|
243
|
+
"""
|
|
244
|
+
Apply standard preprocessing to an EvalInput object.
|
|
245
|
+
|
|
246
|
+
Thin wrapper that converts EvalInput to DataFrame, processes it, and converts back.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
eval_input: EvalInput object to preprocess
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
Preprocessed EvalInput object
|
|
253
|
+
"""
|
|
254
|
+
if not eval_input.eval_input_items:
|
|
255
|
+
return eval_input
|
|
256
|
+
|
|
257
|
+
input_df = self._eval_input_to_dataframe(eval_input)
|
|
258
|
+
return self._preprocess_eval_dataframe(input_df)
|
|
259
|
+
|
|
260
|
+
def _handle_custom_dataset(self, dataset: str | None) -> EvalInput:
|
|
261
|
+
"""
|
|
262
|
+
Handle custom dataset type by calling the user-defined function
|
|
263
|
+
and applying standard preprocessing to the result.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
dataset: Optional dataset file path from command line
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
Preprocessed EvalInput object
|
|
270
|
+
"""
|
|
271
|
+
# Determine input path - use command line dataset or config file_path
|
|
272
|
+
input_path = Path(dataset) if dataset else Path(self.dataset_config.file_path)
|
|
273
|
+
|
|
274
|
+
# Download the dataset if it is remote (for custom datasets too)
|
|
275
|
+
downloader = DatasetDownloader(dataset_config=self.dataset_config)
|
|
276
|
+
downloader.download_dataset()
|
|
277
|
+
|
|
278
|
+
# Load and call custom function
|
|
279
|
+
custom_function, kwargs = self.dataset_config.parser()
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
# Call the custom function with file_path and kwargs
|
|
283
|
+
eval_input = custom_function(file_path=input_path, **kwargs)
|
|
284
|
+
|
|
285
|
+
if not isinstance(eval_input, EvalInput):
|
|
286
|
+
raise ValueError(f"Custom function must return an EvalInput object, "
|
|
287
|
+
f"but returned {type(eval_input)}")
|
|
288
|
+
|
|
289
|
+
except Exception as e:
|
|
290
|
+
raise RuntimeError(f"Error calling custom dataset function: {e}") from e
|
|
291
|
+
|
|
292
|
+
# Apply standard preprocessing (filters, deduplication, repetitions)
|
|
293
|
+
return self._preprocess_eval_input(eval_input)
|
|
294
|
+
|
|
295
|
+
def _eval_input_to_dataframe(self, eval_input: EvalInput) -> pd.DataFrame:
|
|
296
|
+
"""
|
|
297
|
+
Convert an EvalInput object to a pandas DataFrame for processing.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
eval_input: EvalInput object to convert
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
DataFrame representation of the EvalInput
|
|
304
|
+
"""
|
|
305
|
+
data = []
|
|
306
|
+
for item in eval_input.eval_input_items:
|
|
307
|
+
row = item.full_dataset_entry.copy() if item.full_dataset_entry else {}
|
|
308
|
+
|
|
309
|
+
# Ensure key fields are present
|
|
310
|
+
row[self.id_key] = item.id
|
|
311
|
+
if self.is_structured_input():
|
|
312
|
+
row[self.question_key] = item.input_obj
|
|
313
|
+
row[self.answer_key] = item.expected_output_obj
|
|
314
|
+
row[self.generated_answer_key] = item.output_obj
|
|
315
|
+
row[self.trajectory_key] = item.trajectory
|
|
316
|
+
row[self.expected_trajectory_key] = item.expected_trajectory
|
|
317
|
+
|
|
318
|
+
data.append(row)
|
|
319
|
+
|
|
320
|
+
return pd.DataFrame(data)
|
|
321
|
+
|
|
322
|
+
def filter_intermediate_steps(self,
|
|
323
|
+
intermediate_steps: list[IntermediateStep],
|
|
324
|
+
event_filter: list[IntermediateStepType] | None = None) -> list[dict]:
|
|
325
|
+
"""
|
|
326
|
+
Filter out the intermediate steps that are not relevant for evaluation.
|
|
327
|
+
The output is written with with the intention of re-running the evaluation using the original config file.
|
|
328
|
+
"""
|
|
329
|
+
if event_filter is None:
|
|
330
|
+
event_filter = self.intermediate_step_adapter.DEFAULT_EVENT_FILTER
|
|
331
|
+
filtered_steps = self.intermediate_step_adapter.filter_intermediate_steps(intermediate_steps, event_filter)
|
|
332
|
+
return self.intermediate_step_adapter.serialize_intermediate_steps(filtered_steps)
|
|
333
|
+
|
|
334
|
+
def publish_eval_input(self,
|
|
335
|
+
eval_input,
|
|
336
|
+
workflow_output_step_filter: list[IntermediateStepType] | None = None) -> str:
|
|
337
|
+
"""
|
|
338
|
+
Convert the EvalInput object to a JSON output for storing in a file. Use the orginal keys to
|
|
339
|
+
allow re-running evaluation using the orignal config file and '--skip_workflow' option.
|
|
340
|
+
"""
|
|
341
|
+
|
|
342
|
+
def parse_if_json_string(value):
|
|
343
|
+
if isinstance(value, str):
|
|
344
|
+
try:
|
|
345
|
+
return json.loads(value)
|
|
346
|
+
except json.JSONDecodeError:
|
|
347
|
+
return value
|
|
348
|
+
if hasattr(value, "model_dump"):
|
|
349
|
+
return value.model_dump()
|
|
350
|
+
return value
|
|
351
|
+
|
|
352
|
+
indent = 2
|
|
353
|
+
if self.is_structured_input():
|
|
354
|
+
# Extract structured data from EvalInputItems
|
|
355
|
+
data = [{
|
|
356
|
+
self.id_key: item.id,
|
|
357
|
+
self.question_key: item.input_obj,
|
|
358
|
+
self.answer_key: item.expected_output_obj,
|
|
359
|
+
self.generated_answer_key: item.output_obj,
|
|
360
|
+
self.trajectory_key: self.filter_intermediate_steps(item.trajectory, workflow_output_step_filter),
|
|
361
|
+
self.expected_trajectory_key: self.filter_intermediate_steps(item.expected_trajectory),
|
|
362
|
+
} for item in eval_input.eval_input_items]
|
|
363
|
+
else:
|
|
364
|
+
# Unstructured case: return only raw output objects as a JSON array
|
|
365
|
+
data = [parse_if_json_string(item.output_obj) for item in eval_input.eval_input_items]
|
|
366
|
+
|
|
367
|
+
return json.dumps(data, indent=indent, ensure_ascii=False, default=str)
|