PyPI - nvidia-nat - Versions diffs - 1.1.0a20251020__py3-none-any.whl - Mend

nvidia-nat 1.1.0a20251020__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (480) hide show

aiq/__init__.py +66 -0
nat/agent/__init__.py +0 -0
nat/agent/base.py +265 -0
nat/agent/dual_node.py +72 -0
nat/agent/prompt_optimizer/__init__.py +0 -0
nat/agent/prompt_optimizer/prompt.py +68 -0
nat/agent/prompt_optimizer/register.py +149 -0
nat/agent/react_agent/__init__.py +0 -0
nat/agent/react_agent/agent.py +394 -0
nat/agent/react_agent/output_parser.py +104 -0
nat/agent/react_agent/prompt.py +44 -0
nat/agent/react_agent/register.py +168 -0
nat/agent/reasoning_agent/__init__.py +0 -0
nat/agent/reasoning_agent/reasoning_agent.py +227 -0
nat/agent/register.py +23 -0
nat/agent/rewoo_agent/__init__.py +0 -0
nat/agent/rewoo_agent/agent.py +593 -0
nat/agent/rewoo_agent/prompt.py +107 -0
nat/agent/rewoo_agent/register.py +175 -0
nat/agent/tool_calling_agent/__init__.py +0 -0
nat/agent/tool_calling_agent/agent.py +246 -0
nat/agent/tool_calling_agent/register.py +129 -0
nat/authentication/__init__.py +14 -0
nat/authentication/api_key/__init__.py +14 -0
nat/authentication/api_key/api_key_auth_provider.py +96 -0
nat/authentication/api_key/api_key_auth_provider_config.py +124 -0
nat/authentication/api_key/register.py +26 -0
nat/authentication/credential_validator/__init__.py +14 -0
nat/authentication/credential_validator/bearer_token_validator.py +557 -0
nat/authentication/exceptions/__init__.py +14 -0
nat/authentication/exceptions/api_key_exceptions.py +38 -0
nat/authentication/http_basic_auth/__init__.py +0 -0
nat/authentication/http_basic_auth/http_basic_auth_provider.py +81 -0
nat/authentication/http_basic_auth/register.py +30 -0
nat/authentication/interfaces.py +96 -0
nat/authentication/oauth2/__init__.py +14 -0
nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +140 -0
nat/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +39 -0
nat/authentication/oauth2/oauth2_resource_server_config.py +124 -0
nat/authentication/oauth2/register.py +25 -0
nat/authentication/register.py +20 -0
nat/builder/__init__.py +0 -0
nat/builder/builder.py +317 -0
nat/builder/component_utils.py +320 -0
nat/builder/context.py +321 -0
nat/builder/embedder.py +24 -0
nat/builder/eval_builder.py +166 -0
nat/builder/evaluator.py +29 -0
nat/builder/framework_enum.py +25 -0
nat/builder/front_end.py +73 -0
nat/builder/function.py +714 -0
nat/builder/function_base.py +380 -0
nat/builder/function_info.py +625 -0
nat/builder/intermediate_step_manager.py +206 -0
nat/builder/llm.py +25 -0
nat/builder/retriever.py +25 -0
nat/builder/user_interaction_manager.py +78 -0
nat/builder/workflow.py +160 -0
nat/builder/workflow_builder.py +1365 -0
nat/cli/__init__.py +14 -0
nat/cli/cli_utils/__init__.py +0 -0
nat/cli/cli_utils/config_override.py +231 -0
nat/cli/cli_utils/validation.py +37 -0
nat/cli/commands/__init__.py +0 -0
nat/cli/commands/configure/__init__.py +0 -0
nat/cli/commands/configure/channel/__init__.py +0 -0
nat/cli/commands/configure/channel/add.py +28 -0
nat/cli/commands/configure/channel/channel.py +34 -0
nat/cli/commands/configure/channel/remove.py +30 -0
nat/cli/commands/configure/channel/update.py +30 -0
nat/cli/commands/configure/configure.py +33 -0
nat/cli/commands/evaluate.py +139 -0
nat/cli/commands/info/__init__.py +14 -0
nat/cli/commands/info/info.py +47 -0
nat/cli/commands/info/list_channels.py +32 -0
nat/cli/commands/info/list_components.py +128 -0
nat/cli/commands/mcp/__init__.py +14 -0
nat/cli/commands/mcp/mcp.py +986 -0
nat/cli/commands/object_store/__init__.py +14 -0
nat/cli/commands/object_store/object_store.py +227 -0
nat/cli/commands/optimize.py +90 -0
nat/cli/commands/registry/__init__.py +14 -0
nat/cli/commands/registry/publish.py +88 -0
nat/cli/commands/registry/pull.py +118 -0
nat/cli/commands/registry/registry.py +36 -0
nat/cli/commands/registry/remove.py +108 -0
nat/cli/commands/registry/search.py +153 -0
nat/cli/commands/sizing/__init__.py +14 -0
nat/cli/commands/sizing/calc.py +297 -0
nat/cli/commands/sizing/sizing.py +27 -0
nat/cli/commands/start.py +257 -0
nat/cli/commands/uninstall.py +81 -0
nat/cli/commands/validate.py +47 -0
nat/cli/commands/workflow/__init__.py +14 -0
nat/cli/commands/workflow/templates/__init__.py.j2 +0 -0
nat/cli/commands/workflow/templates/config.yml.j2 +17 -0
nat/cli/commands/workflow/templates/pyproject.toml.j2 +25 -0
nat/cli/commands/workflow/templates/register.py.j2 +4 -0
nat/cli/commands/workflow/templates/workflow.py.j2 +50 -0
nat/cli/commands/workflow/workflow.py +37 -0
nat/cli/commands/workflow/workflow_commands.py +403 -0
nat/cli/entrypoint.py +141 -0
nat/cli/main.py +60 -0
nat/cli/register_workflow.py +522 -0
nat/cli/type_registry.py +1069 -0
nat/control_flow/__init__.py +0 -0
nat/control_flow/register.py +20 -0
nat/control_flow/router_agent/__init__.py +0 -0
nat/control_flow/router_agent/agent.py +329 -0
nat/control_flow/router_agent/prompt.py +48 -0
nat/control_flow/router_agent/register.py +91 -0
nat/control_flow/sequential_executor.py +166 -0
nat/data_models/__init__.py +14 -0
nat/data_models/agent.py +34 -0
nat/data_models/api_server.py +843 -0
nat/data_models/authentication.py +245 -0
nat/data_models/common.py +171 -0
nat/data_models/component.py +60 -0
nat/data_models/component_ref.py +179 -0
nat/data_models/config.py +434 -0
nat/data_models/dataset_handler.py +169 -0
nat/data_models/discovery_metadata.py +305 -0
nat/data_models/embedder.py +27 -0
nat/data_models/evaluate.py +130 -0
nat/data_models/evaluator.py +26 -0
nat/data_models/front_end.py +26 -0
nat/data_models/function.py +64 -0
nat/data_models/function_dependencies.py +80 -0
nat/data_models/gated_field_mixin.py +242 -0
nat/data_models/interactive.py +246 -0
nat/data_models/intermediate_step.py +302 -0
nat/data_models/invocation_node.py +38 -0
nat/data_models/llm.py +27 -0
nat/data_models/logging.py +26 -0
nat/data_models/memory.py +27 -0
nat/data_models/object_store.py +44 -0
nat/data_models/optimizable.py +119 -0
nat/data_models/optimizer.py +149 -0
nat/data_models/profiler.py +54 -0
nat/data_models/registry_handler.py +26 -0
nat/data_models/retriever.py +30 -0
nat/data_models/retry_mixin.py +35 -0
nat/data_models/span.py +228 -0
nat/data_models/step_adaptor.py +64 -0
nat/data_models/streaming.py +33 -0
nat/data_models/swe_bench_model.py +54 -0
nat/data_models/telemetry_exporter.py +26 -0
nat/data_models/temperature_mixin.py +44 -0
nat/data_models/thinking_mixin.py +86 -0
nat/data_models/top_p_mixin.py +44 -0
nat/data_models/ttc_strategy.py +30 -0
nat/embedder/__init__.py +0 -0
nat/embedder/azure_openai_embedder.py +46 -0
nat/embedder/nim_embedder.py +59 -0
nat/embedder/openai_embedder.py +42 -0
nat/embedder/register.py +22 -0
nat/eval/__init__.py +14 -0
nat/eval/config.py +62 -0
nat/eval/dataset_handler/__init__.py +0 -0
nat/eval/dataset_handler/dataset_downloader.py +106 -0
nat/eval/dataset_handler/dataset_filter.py +52 -0
nat/eval/dataset_handler/dataset_handler.py +431 -0
nat/eval/evaluate.py +565 -0
nat/eval/evaluator/__init__.py +14 -0
nat/eval/evaluator/base_evaluator.py +77 -0
nat/eval/evaluator/evaluator_model.py +58 -0
nat/eval/intermediate_step_adapter.py +99 -0
nat/eval/rag_evaluator/__init__.py +0 -0
nat/eval/rag_evaluator/evaluate.py +178 -0
nat/eval/rag_evaluator/register.py +143 -0
nat/eval/register.py +26 -0
nat/eval/remote_workflow.py +133 -0
nat/eval/runners/__init__.py +14 -0
nat/eval/runners/config.py +39 -0
nat/eval/runners/multi_eval_runner.py +54 -0
nat/eval/runtime_evaluator/__init__.py +14 -0
nat/eval/runtime_evaluator/evaluate.py +123 -0
nat/eval/runtime_evaluator/register.py +100 -0
nat/eval/runtime_event_subscriber.py +52 -0
nat/eval/swe_bench_evaluator/__init__.py +0 -0
nat/eval/swe_bench_evaluator/evaluate.py +215 -0
nat/eval/swe_bench_evaluator/register.py +36 -0
nat/eval/trajectory_evaluator/__init__.py +0 -0
nat/eval/trajectory_evaluator/evaluate.py +75 -0
nat/eval/trajectory_evaluator/register.py +40 -0
nat/eval/tunable_rag_evaluator/__init__.py +0 -0
nat/eval/tunable_rag_evaluator/evaluate.py +242 -0
nat/eval/tunable_rag_evaluator/register.py +52 -0
nat/eval/usage_stats.py +41 -0
nat/eval/utils/__init__.py +0 -0
nat/eval/utils/eval_trace_ctx.py +89 -0
nat/eval/utils/output_uploader.py +140 -0
nat/eval/utils/tqdm_position_registry.py +40 -0
nat/eval/utils/weave_eval.py +193 -0
nat/experimental/__init__.py +0 -0
nat/experimental/decorators/__init__.py +0 -0
nat/experimental/decorators/experimental_warning_decorator.py +154 -0
nat/experimental/test_time_compute/__init__.py +0 -0
nat/experimental/test_time_compute/editing/__init__.py +0 -0
nat/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +147 -0
nat/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +204 -0
nat/experimental/test_time_compute/editing/motivation_aware_summarization.py +107 -0
nat/experimental/test_time_compute/functions/__init__.py +0 -0
nat/experimental/test_time_compute/functions/execute_score_select_function.py +105 -0
nat/experimental/test_time_compute/functions/plan_select_execute_function.py +228 -0
nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +205 -0
nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +146 -0
nat/experimental/test_time_compute/models/__init__.py +0 -0
nat/experimental/test_time_compute/models/editor_config.py +132 -0
nat/experimental/test_time_compute/models/scoring_config.py +112 -0
nat/experimental/test_time_compute/models/search_config.py +120 -0
nat/experimental/test_time_compute/models/selection_config.py +154 -0
nat/experimental/test_time_compute/models/stage_enums.py +43 -0
nat/experimental/test_time_compute/models/strategy_base.py +67 -0
nat/experimental/test_time_compute/models/tool_use_config.py +41 -0
nat/experimental/test_time_compute/models/ttc_item.py +48 -0
nat/experimental/test_time_compute/register.py +35 -0
nat/experimental/test_time_compute/scoring/__init__.py +0 -0
nat/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +168 -0
nat/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +168 -0
nat/experimental/test_time_compute/scoring/motivation_aware_scorer.py +111 -0
nat/experimental/test_time_compute/search/__init__.py +0 -0
nat/experimental/test_time_compute/search/multi_llm_planner.py +128 -0
nat/experimental/test_time_compute/search/multi_query_retrieval_search.py +122 -0
nat/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +128 -0
nat/experimental/test_time_compute/selection/__init__.py +0 -0
nat/experimental/test_time_compute/selection/best_of_n_selector.py +63 -0
nat/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +131 -0
nat/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +157 -0
nat/experimental/test_time_compute/selection/llm_based_plan_selector.py +128 -0
nat/experimental/test_time_compute/selection/threshold_selector.py +58 -0
nat/front_ends/__init__.py +14 -0
nat/front_ends/console/__init__.py +14 -0
nat/front_ends/console/authentication_flow_handler.py +285 -0
nat/front_ends/console/console_front_end_config.py +32 -0
nat/front_ends/console/console_front_end_plugin.py +108 -0
nat/front_ends/console/register.py +25 -0
nat/front_ends/cron/__init__.py +14 -0
nat/front_ends/fastapi/__init__.py +14 -0
nat/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
nat/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +27 -0
nat/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +142 -0
nat/front_ends/fastapi/dask_client_mixin.py +65 -0
nat/front_ends/fastapi/fastapi_front_end_config.py +272 -0
nat/front_ends/fastapi/fastapi_front_end_controller.py +68 -0
nat/front_ends/fastapi/fastapi_front_end_plugin.py +247 -0
nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +1257 -0
nat/front_ends/fastapi/html_snippets/__init__.py +14 -0
nat/front_ends/fastapi/html_snippets/auth_code_grant_success.py +35 -0
nat/front_ends/fastapi/intermediate_steps_subscriber.py +80 -0
nat/front_ends/fastapi/job_store.py +602 -0
nat/front_ends/fastapi/main.py +64 -0
nat/front_ends/fastapi/message_handler.py +344 -0
nat/front_ends/fastapi/message_validator.py +351 -0
nat/front_ends/fastapi/register.py +25 -0
nat/front_ends/fastapi/response_helpers.py +195 -0
nat/front_ends/fastapi/step_adaptor.py +319 -0
nat/front_ends/fastapi/utils.py +57 -0
nat/front_ends/mcp/__init__.py +14 -0
nat/front_ends/mcp/introspection_token_verifier.py +73 -0
nat/front_ends/mcp/mcp_front_end_config.py +90 -0
nat/front_ends/mcp/mcp_front_end_plugin.py +113 -0
nat/front_ends/mcp/mcp_front_end_plugin_worker.py +268 -0
nat/front_ends/mcp/memory_profiler.py +320 -0
nat/front_ends/mcp/register.py +27 -0
nat/front_ends/mcp/tool_converter.py +290 -0
nat/front_ends/register.py +21 -0
nat/front_ends/simple_base/__init__.py +14 -0
nat/front_ends/simple_base/simple_front_end_plugin_base.py +56 -0
nat/llm/__init__.py +0 -0
nat/llm/aws_bedrock_llm.py +69 -0
nat/llm/azure_openai_llm.py +57 -0
nat/llm/litellm_llm.py +69 -0
nat/llm/nim_llm.py +58 -0
nat/llm/openai_llm.py +54 -0
nat/llm/register.py +27 -0
nat/llm/utils/__init__.py +14 -0
nat/llm/utils/env_config_value.py +93 -0
nat/llm/utils/error.py +17 -0
nat/llm/utils/thinking.py +215 -0
nat/memory/__init__.py +20 -0
nat/memory/interfaces.py +183 -0
nat/memory/models.py +112 -0
nat/meta/pypi.md +58 -0
nat/object_store/__init__.py +20 -0
nat/object_store/in_memory_object_store.py +76 -0
nat/object_store/interfaces.py +84 -0
nat/object_store/models.py +38 -0
nat/object_store/register.py +19 -0
nat/observability/__init__.py +14 -0
nat/observability/exporter/__init__.py +14 -0
nat/observability/exporter/base_exporter.py +449 -0
nat/observability/exporter/exporter.py +78 -0
nat/observability/exporter/file_exporter.py +33 -0
nat/observability/exporter/processing_exporter.py +550 -0
nat/observability/exporter/raw_exporter.py +52 -0
nat/observability/exporter/span_exporter.py +308 -0
nat/observability/exporter_manager.py +335 -0
nat/observability/mixin/__init__.py +14 -0
nat/observability/mixin/batch_config_mixin.py +26 -0
nat/observability/mixin/collector_config_mixin.py +23 -0
nat/observability/mixin/file_mixin.py +288 -0
nat/observability/mixin/file_mode.py +23 -0
nat/observability/mixin/redaction_config_mixin.py +42 -0
nat/observability/mixin/resource_conflict_mixin.py +134 -0
nat/observability/mixin/serialize_mixin.py +61 -0
nat/observability/mixin/tagging_config_mixin.py +62 -0
nat/observability/mixin/type_introspection_mixin.py +496 -0
nat/observability/processor/__init__.py +14 -0
nat/observability/processor/batching_processor.py +308 -0
nat/observability/processor/callback_processor.py +42 -0
nat/observability/processor/falsy_batch_filter_processor.py +55 -0
nat/observability/processor/intermediate_step_serializer.py +28 -0
nat/observability/processor/processor.py +74 -0
nat/observability/processor/processor_factory.py +70 -0
nat/observability/processor/redaction/__init__.py +24 -0
nat/observability/processor/redaction/contextual_redaction_processor.py +125 -0
nat/observability/processor/redaction/contextual_span_redaction_processor.py +66 -0
nat/observability/processor/redaction/redaction_processor.py +177 -0
nat/observability/processor/redaction/span_header_redaction_processor.py +92 -0
nat/observability/processor/span_tagging_processor.py +68 -0
nat/observability/register.py +114 -0
nat/observability/utils/__init__.py +14 -0
nat/observability/utils/dict_utils.py +236 -0
nat/observability/utils/time_utils.py +31 -0
nat/plugins/.namespace +1 -0
nat/profiler/__init__.py +0 -0
nat/profiler/calc/__init__.py +14 -0
nat/profiler/calc/calc_runner.py +626 -0
nat/profiler/calc/calculations.py +288 -0
nat/profiler/calc/data_models.py +188 -0
nat/profiler/calc/plot.py +345 -0
nat/profiler/callbacks/__init__.py +0 -0
nat/profiler/callbacks/agno_callback_handler.py +295 -0
nat/profiler/callbacks/base_callback_class.py +20 -0
nat/profiler/callbacks/langchain_callback_handler.py +297 -0
nat/profiler/callbacks/llama_index_callback_handler.py +205 -0
nat/profiler/callbacks/semantic_kernel_callback_handler.py +238 -0
nat/profiler/callbacks/token_usage_base_model.py +27 -0
nat/profiler/data_frame_row.py +51 -0
nat/profiler/data_models.py +24 -0
nat/profiler/decorators/__init__.py +0 -0
nat/profiler/decorators/framework_wrapper.py +180 -0
nat/profiler/decorators/function_tracking.py +411 -0
nat/profiler/forecasting/__init__.py +0 -0
nat/profiler/forecasting/config.py +18 -0
nat/profiler/forecasting/model_trainer.py +75 -0
nat/profiler/forecasting/models/__init__.py +22 -0
nat/profiler/forecasting/models/forecasting_base_model.py +42 -0
nat/profiler/forecasting/models/linear_model.py +197 -0
nat/profiler/forecasting/models/random_forest_regressor.py +269 -0
nat/profiler/inference_metrics_model.py +28 -0
nat/profiler/inference_optimization/__init__.py +0 -0
nat/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +460 -0
nat/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +258 -0
nat/profiler/inference_optimization/data_models.py +386 -0
nat/profiler/inference_optimization/experimental/__init__.py +0 -0
nat/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +468 -0
nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +404 -0
nat/profiler/inference_optimization/llm_metrics.py +212 -0
nat/profiler/inference_optimization/prompt_caching.py +163 -0
nat/profiler/inference_optimization/token_uniqueness.py +107 -0
nat/profiler/inference_optimization/workflow_runtimes.py +72 -0
nat/profiler/intermediate_property_adapter.py +102 -0
nat/profiler/parameter_optimization/__init__.py +0 -0
nat/profiler/parameter_optimization/optimizable_utils.py +93 -0
nat/profiler/parameter_optimization/optimizer_runtime.py +67 -0
nat/profiler/parameter_optimization/parameter_optimizer.py +153 -0
nat/profiler/parameter_optimization/parameter_selection.py +107 -0
nat/profiler/parameter_optimization/pareto_visualizer.py +380 -0
nat/profiler/parameter_optimization/prompt_optimizer.py +384 -0
nat/profiler/parameter_optimization/update_helpers.py +66 -0
nat/profiler/profile_runner.py +478 -0
nat/profiler/utils.py +186 -0
nat/registry_handlers/__init__.py +0 -0
nat/registry_handlers/local/__init__.py +0 -0
nat/registry_handlers/local/local_handler.py +176 -0
nat/registry_handlers/local/register_local.py +37 -0
nat/registry_handlers/metadata_factory.py +60 -0
nat/registry_handlers/package_utils.py +570 -0
nat/registry_handlers/pypi/__init__.py +0 -0
nat/registry_handlers/pypi/pypi_handler.py +248 -0
nat/registry_handlers/pypi/register_pypi.py +40 -0
nat/registry_handlers/register.py +20 -0
nat/registry_handlers/registry_handler_base.py +157 -0
nat/registry_handlers/rest/__init__.py +0 -0
nat/registry_handlers/rest/register_rest.py +56 -0
nat/registry_handlers/rest/rest_handler.py +236 -0
nat/registry_handlers/schemas/__init__.py +0 -0
nat/registry_handlers/schemas/headers.py +42 -0
nat/registry_handlers/schemas/package.py +68 -0
nat/registry_handlers/schemas/publish.py +68 -0
nat/registry_handlers/schemas/pull.py +82 -0
nat/registry_handlers/schemas/remove.py +36 -0
nat/registry_handlers/schemas/search.py +91 -0
nat/registry_handlers/schemas/status.py +47 -0
nat/retriever/__init__.py +0 -0
nat/retriever/interface.py +41 -0
nat/retriever/milvus/__init__.py +14 -0
nat/retriever/milvus/register.py +81 -0
nat/retriever/milvus/retriever.py +228 -0
nat/retriever/models.py +77 -0
nat/retriever/nemo_retriever/__init__.py +14 -0
nat/retriever/nemo_retriever/register.py +60 -0
nat/retriever/nemo_retriever/retriever.py +190 -0
nat/retriever/register.py +21 -0
nat/runtime/__init__.py +14 -0
nat/runtime/loader.py +220 -0
nat/runtime/runner.py +292 -0
nat/runtime/session.py +223 -0
nat/runtime/user_metadata.py +130 -0
nat/settings/__init__.py +0 -0
nat/settings/global_settings.py +329 -0
nat/test/.namespace +1 -0
nat/tool/__init__.py +0 -0
nat/tool/chat_completion.py +77 -0
nat/tool/code_execution/README.md +151 -0
nat/tool/code_execution/__init__.py +0 -0
nat/tool/code_execution/code_sandbox.py +267 -0
nat/tool/code_execution/local_sandbox/.gitignore +1 -0
nat/tool/code_execution/local_sandbox/Dockerfile.sandbox +60 -0
nat/tool/code_execution/local_sandbox/__init__.py +13 -0
nat/tool/code_execution/local_sandbox/local_sandbox_server.py +198 -0
nat/tool/code_execution/local_sandbox/sandbox.requirements.txt +6 -0
nat/tool/code_execution/local_sandbox/start_local_sandbox.sh +50 -0
nat/tool/code_execution/register.py +74 -0
nat/tool/code_execution/test_code_execution_sandbox.py +414 -0
nat/tool/code_execution/utils.py +100 -0
nat/tool/datetime_tools.py +82 -0
nat/tool/document_search.py +141 -0
nat/tool/github_tools.py +450 -0
nat/tool/memory_tools/__init__.py +0 -0
nat/tool/memory_tools/add_memory_tool.py +79 -0
nat/tool/memory_tools/delete_memory_tool.py +66 -0
nat/tool/memory_tools/get_memory_tool.py +72 -0
nat/tool/nvidia_rag.py +95 -0
nat/tool/register.py +31 -0
nat/tool/retriever.py +95 -0
nat/tool/server_tools.py +66 -0
nat/utils/__init__.py +0 -0
nat/utils/callable_utils.py +70 -0
nat/utils/data_models/__init__.py +0 -0
nat/utils/data_models/schema_validator.py +58 -0
nat/utils/debugging_utils.py +43 -0
nat/utils/decorators.py +210 -0
nat/utils/dump_distro_mapping.py +32 -0
nat/utils/exception_handlers/__init__.py +0 -0
nat/utils/exception_handlers/automatic_retries.py +342 -0
nat/utils/exception_handlers/schemas.py +114 -0
nat/utils/io/__init__.py +0 -0
nat/utils/io/model_processing.py +28 -0
nat/utils/io/yaml_tools.py +119 -0
nat/utils/log_levels.py +25 -0
nat/utils/log_utils.py +37 -0
nat/utils/metadata_utils.py +74 -0
nat/utils/optional_imports.py +142 -0
nat/utils/producer_consumer_queue.py +178 -0
nat/utils/reactive/__init__.py +0 -0
nat/utils/reactive/base/__init__.py +0 -0
nat/utils/reactive/base/observable_base.py +65 -0
nat/utils/reactive/base/observer_base.py +55 -0
nat/utils/reactive/base/subject_base.py +79 -0
nat/utils/reactive/observable.py +59 -0
nat/utils/reactive/observer.py +76 -0
nat/utils/reactive/subject.py +131 -0
nat/utils/reactive/subscription.py +49 -0
nat/utils/settings/__init__.py +0 -0
nat/utils/settings/global_settings.py +195 -0
nat/utils/string_utils.py +38 -0
nat/utils/type_converter.py +299 -0
nat/utils/type_utils.py +488 -0
nat/utils/url_utils.py +27 -0
nvidia_nat-1.1.0a20251020.dist-info/METADATA +195 -0
nvidia_nat-1.1.0a20251020.dist-info/RECORD +480 -0
nvidia_nat-1.1.0a20251020.dist-info/WHEEL +5 -0
nvidia_nat-1.1.0a20251020.dist-info/entry_points.txt +22 -0
nvidia_nat-1.1.0a20251020.dist-info/licenses/LICENSE-3rd-party.txt +5478 -0
nvidia_nat-1.1.0a20251020.dist-info/licenses/LICENSE.md +201 -0
nvidia_nat-1.1.0a20251020.dist-info/top_level.txt +2 -0

nat/eval/runners/config.py ADDED Viewed

@@ -0,0 +1,39 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import typing
+from pydantic import BaseModel
+from nat.eval.config import EvaluationRunConfig
+from nat.eval.config import EvaluationRunOutput
+class MultiEvaluationRunConfig(BaseModel):
+    """
+    Parameters used for a multi-evaluation run.
+    This includes a dict of configs. The key is an id of any type.
+    Each pass loads the config, applies the overrides and runs to completion
+    before the next pass starts.
+    """
+    configs: dict[typing.Any, EvaluationRunConfig]
+class MultiEvaluationRunOutput(BaseModel):
+    """
+    Output of a multi-evaluation run.
+    The results per-pass are accumulated in the evaluation_run_outputs dict.
+    """
+    evaluation_run_outputs: dict[typing.Any, EvaluationRunOutput]

nat/eval/runners/multi_eval_runner.py ADDED Viewed

@@ -0,0 +1,54 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import copy
+import typing
+from nat.eval.config import EvaluationRunConfig
+from nat.eval.config import EvaluationRunOutput
+from nat.eval.evaluate import EvaluationRun
+from nat.eval.runners.config import MultiEvaluationRunConfig
+class MultiEvaluationRunner:
+    """
+    Run a multi-evaluation run.
+    """
+    def __init__(self, config: MultiEvaluationRunConfig):
+        """
+        Initialize a multi-evaluation run.
+        """
+        self.config = config
+        self.evaluation_run_outputs: dict[typing.Any, EvaluationRunOutput] = {}
+    async def run_all(self):
+        """
+        Run all evaluations defined by the overrides.
+        """
+        for id, config in self.config.configs.items():
+            output = await self.run_single_evaluation(id, config)
+            self.evaluation_run_outputs[id] = output
+        return self.evaluation_run_outputs
+    async def run_single_evaluation(self, id: typing.Any, config: EvaluationRunConfig) -> EvaluationRunOutput:
+        """
+        Run a single evaluation and return the output.
+        """
+        # copy the config in case the caller is using the same config for multiple evaluations
+        config_copy = copy.deepcopy(config)
+        evaluation_run = EvaluationRun(config_copy)
+        return await evaluation_run.run_and_evaluate()

nat/eval/runtime_evaluator/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

nat/eval/runtime_evaluator/evaluate.py ADDED Viewed

@@ -0,0 +1,123 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+from collections import defaultdict
+from dataclasses import dataclass
+from nat.data_models.intermediate_step import IntermediateStepType
+from nat.eval.evaluator.base_evaluator import BaseEvaluator
+from nat.eval.evaluator.evaluator_model import EvalInputItem
+from nat.eval.evaluator.evaluator_model import EvalOutputItem
+from nat.profiler.intermediate_property_adapter import IntermediatePropertyAdaptor
+@dataclass
+class _CallTiming:
+    start_ts: float | None = None
+    end_ts: float | None = None
+    @property
+    def latency(self) -> float | None:
+        if self.start_ts is None or self.end_ts is None:
+            return None
+        return max(0.0, self.end_ts - self.start_ts)
+class AverageLLMLatencyEvaluator(BaseEvaluator):
+    """
+    Mean difference between connected LLM_START and LLM_END events (same UUID).
+    The score is the average latency in seconds for the item. Reasoning contains per-call latencies.
+    """
+    def __init__(self, max_concurrency: int = 8):
+        super().__init__(max_concurrency=max_concurrency, tqdm_desc="Evaluating Avg LLM Latency")
+    async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:  # noqa: D401
+        calls: dict[str, _CallTiming] = defaultdict(_CallTiming)
+        for step in (IntermediatePropertyAdaptor.from_intermediate_step(s) for s in item.trajectory):
+            if step.event_type == IntermediateStepType.LLM_START:
+                calls[step.UUID].start_ts = step.event_timestamp
+            elif step.event_type == IntermediateStepType.LLM_END:
+                calls[step.UUID].end_ts = step.event_timestamp
+        latencies = [ct.latency for ct in calls.values() if ct.latency is not None]
+        avg_latency = sum(latencies) / len(latencies) if latencies else 0.0
+        reasoning = {
+            "num_llm_calls": len(latencies),
+            "latencies": latencies,
+        }
+        return EvalOutputItem(id=item.id, score=round(avg_latency, 4), reasoning=reasoning)
+class AverageWorkflowRuntimeEvaluator(BaseEvaluator):
+    """
+    Average workflow runtime per item: max(event_timestamp) - min(event_timestamp) across the trajectory.
+    The score is the runtime in seconds for the item.
+    """
+    def __init__(self, max_concurrency: int = 8):
+        super().__init__(max_concurrency=max_concurrency, tqdm_desc="Evaluating Avg Workflow Runtime")
+    async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:  # noqa: D401
+        if not item.trajectory:
+            return EvalOutputItem(id=item.id, score=0.0, reasoning={"note": "no steps"})
+        timestamps = [s.event_timestamp for s in item.trajectory]
+        runtime = max(timestamps) - min(timestamps)
+        return EvalOutputItem(id=item.id, score=round(max(0.0, runtime), 4), reasoning={"steps": len(timestamps)})
+class AverageNumberOfLLMCallsEvaluator(BaseEvaluator):
+    """
+    Average number of LLM calls per item. The score is the count for the item.
+    """
+    def __init__(self, max_concurrency: int = 8):
+        super().__init__(max_concurrency=max_concurrency, tqdm_desc="Evaluating Avg # LLM Calls")
+    async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:  # noqa: D401
+        num_calls = sum(1 for s in item.trajectory if s.event_type == IntermediateStepType.LLM_END)
+        return EvalOutputItem(id=item.id, score=float(num_calls), reasoning={"num_llm_end": num_calls})
+class AverageTokensPerLLMEndEvaluator(BaseEvaluator):
+    """
+    Average total tokens per LLM_END event: sum of prompt and completion tokens if available.
+    The score is the average tokens per LLM_END for the item (0 if none).
+    """
+    def __init__(self, max_concurrency: int = 8):
+        super().__init__(max_concurrency=max_concurrency, tqdm_desc="Evaluating Avg Tokens/LLM_END")
+    async def evaluate_item(self, item: EvalInputItem) -> EvalOutputItem:  # noqa: D401
+        totals: list[int] = []
+        for step in (IntermediatePropertyAdaptor.from_intermediate_step(s) for s in item.trajectory):
+            if step.event_type == IntermediateStepType.LLM_END:
+                total_tokens = step.token_usage.total_tokens
+                # If framework doesn't set total, compute from prompt+completion
+                if total_tokens == 0:
+                    total_tokens = step.token_usage.prompt_tokens + step.token_usage.completion_tokens
+                totals.append(total_tokens)
+        avg_tokens = (sum(totals) / len(totals)) if totals else 0.0
+        reasoning = {
+            "num_llm_end": len(totals),
+            "totals": totals,
+        }
+        return EvalOutputItem(id=item.id, score=round(avg_tokens, 2), reasoning=reasoning)

nat/eval/runtime_evaluator/register.py ADDED Viewed

@@ -0,0 +1,100 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pydantic import Field
+from nat.builder.builder import EvalBuilder
+from nat.builder.evaluator import EvaluatorInfo
+from nat.cli.register_workflow import register_evaluator
+from nat.data_models.evaluator import EvaluatorBaseConfig
+from nat.eval.evaluator.evaluator_model import EvalInput
+from nat.eval.evaluator.evaluator_model import EvalOutput
+class AverageLLMLatencyConfig(EvaluatorBaseConfig, name="avg_llm_latency"):
+    """Mean difference between connected LLM_START and LLM_END events (same UUID)."""
+    max_concurrency: int = Field(default=8, description="Max concurrency for evaluation.")
+class AverageWorkflowRuntimeConfig(EvaluatorBaseConfig, name="avg_workflow_runtime"):
+    """Average workflow runtime per item (max timestamp - min timestamp)."""
+    max_concurrency: int = Field(default=8, description="Max concurrency for evaluation.")
+class AverageNumberOfLLMCallsConfig(EvaluatorBaseConfig, name="avg_num_llm_calls"):
+    """Average number of LLM calls per item (count of LLM_END)."""
+    max_concurrency: int = Field(default=8, description="Max concurrency for evaluation.")
+class AverageTokensPerLLMEndConfig(EvaluatorBaseConfig, name="avg_tokens_per_llm_end"):
+    """Average total tokens per LLM_END event (prompt + completion if available)."""
+    max_concurrency: int = Field(default=8, description="Max concurrency for evaluation.")
+@register_evaluator(config_type=AverageLLMLatencyConfig)
+async def register_avg_llm_latency_evaluator(config: AverageLLMLatencyConfig, builder: EvalBuilder):
+    from .evaluate import AverageLLMLatencyEvaluator
+    evaluator = AverageLLMLatencyEvaluator(max_concurrency=config.max_concurrency or builder.get_max_concurrency())
+    async def evaluate_fn(eval_input: EvalInput) -> EvalOutput:
+        return await evaluator.evaluate(eval_input)
+    yield EvaluatorInfo(config=config,
+                        evaluate_fn=evaluate_fn,
+                        description="Average LLM latency (s) from LLM_START to LLM_END")
+@register_evaluator(config_type=AverageWorkflowRuntimeConfig)
+async def register_avg_workflow_runtime_evaluator(config: AverageWorkflowRuntimeConfig, builder: EvalBuilder):
+    from .evaluate import AverageWorkflowRuntimeEvaluator
+    evaluator = AverageWorkflowRuntimeEvaluator(max_concurrency=config.max_concurrency or builder.get_max_concurrency())
+    async def evaluate_fn(eval_input: EvalInput) -> EvalOutput:
+        return await evaluator.evaluate(eval_input)
+    yield EvaluatorInfo(config=config, evaluate_fn=evaluate_fn, description="Average workflow runtime (s)")
+@register_evaluator(config_type=AverageNumberOfLLMCallsConfig)
+async def register_avg_num_llm_calls_evaluator(config: AverageNumberOfLLMCallsConfig, builder: EvalBuilder):
+    from .evaluate import AverageNumberOfLLMCallsEvaluator
+    evaluator = AverageNumberOfLLMCallsEvaluator(
+        max_concurrency=config.max_concurrency or builder.get_max_concurrency())
+    async def evaluate_fn(eval_input: EvalInput) -> EvalOutput:
+        return await evaluator.evaluate(eval_input)
+    yield EvaluatorInfo(config=config, evaluate_fn=evaluate_fn, description="Average number of LLM calls")
+@register_evaluator(config_type=AverageTokensPerLLMEndConfig)
+async def register_avg_tokens_per_llm_end_evaluator(config: AverageTokensPerLLMEndConfig, builder: EvalBuilder):
+    from .evaluate import AverageTokensPerLLMEndEvaluator
+    evaluator = AverageTokensPerLLMEndEvaluator(max_concurrency=config.max_concurrency or builder.get_max_concurrency())
+    async def evaluate_fn(eval_input: EvalInput) -> EvalOutput:
+        return await evaluator.evaluate(eval_input)
+    yield EvaluatorInfo(config=config,
+                        evaluate_fn=evaluate_fn,
+                        description="Average total tokens per LLM_END (prompt + completion)")

nat/eval/runtime_event_subscriber.py ADDED Viewed

@@ -0,0 +1,52 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import asyncio
+import logging
+from nat.builder.context import Context
+from nat.data_models.intermediate_step import IntermediateStep
+logger = logging.getLogger(__name__)
+def pull_intermediate() -> asyncio.Future[list[dict]]:
+    """
+    Subscribes to the runner's event stream using callbacks.
+    Intermediate steps are collected and, when complete, the future is set
+    with the list of dumped intermediate steps.
+    """
+    future = asyncio.Future()
+    intermediate_steps = []  # We'll store the dumped steps here.
+    context = Context.get()
+    def on_next_cb(item: IntermediateStep):
+        # Append each new intermediate step (dumped to dict) to the list.
+        intermediate_steps.append(item.model_dump())
+    def on_error_cb(exc: Exception):
+        logger.error("Hit on_error: %s", exc)
+        if not future.done():
+            future.set_exception(exc)
+    def on_complete_cb():
+        logger.debug("Completed reading intermediate steps")
+        if not future.done():
+            future.set_result(intermediate_steps)
+    # Subscribe with our callbacks.
+    context.intermediate_step_manager.subscribe(on_next=on_next_cb, on_error=on_error_cb, on_complete=on_complete_cb)
+    return future

nat/eval/swe_bench_evaluator/__init__.py ADDED Viewed

File without changes

nat/eval/swe_bench_evaluator/evaluate.py ADDED Viewed

@@ -0,0 +1,215 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import logging
+import os
+import shutil
+from pathlib import Path
+from nat.data_models.swe_bench_model import SWEBenchInput
+from nat.data_models.swe_bench_model import SWEBenchOutput
+from nat.eval.evaluator.evaluator_model import EvalInput
+from nat.eval.evaluator.evaluator_model import EvalOutput
+try:
+    import swebench.harness.run_evaluation as swebench_eval
+    from swebench.harness.constants import MAP_REPO_VERSION_TO_SPECS
+except ImportError as exc:
+    raise ImportError("Please install swebench to use this evaluator") from exc
+logger = logging.getLogger(__name__)
+class SweBenchEvaluator:
+    def __init__(self, run_id: str, max_workers: int, output_dir: Path):
+        self.run_id = run_id
+        self.max_workers = max_workers
+        self.output_dir = output_dir
+        # metadata
+        self._unsupported_repos = []
+        self._swe_bench_inputs = []
+        self._swe_bench_outputs = []
+        self._model_name_or_path = "no_llm"
+    def get_model_name_from_output(self, workflow_output: list[dict]) -> str | None:
+        """Fetch the `model_name_or_path` from the first entry in the list."""
+        return workflow_output[0].get("model_name_or_path") if workflow_output else None
+    @staticmethod
+    def empty_report_dir(report_dir: Path):
+        """Remove the current contents of the report directory."""
+        os.makedirs(report_dir, exist_ok=True)
+        # Iterate through all files in the directory and remove them
+        for item in report_dir.iterdir():
+            if item.is_file():  # Remove files only
+                item.unlink()
+            elif item.is_dir():  # Remove subdirectories and their contents
+                shutil.rmtree(item)
+    @staticmethod
+    def move_report_and_logs(swe_bench_report_file: str, logs_dir: str, report_dir: Path):
+        """ Temorary function to move the report and logs to the output directory"""
+        try:
+            shutil.move(swe_bench_report_file, report_dir)
+        except Exception as e:
+            logger.exception("Error moving report file: %s", e)
+        try:
+            dest_logs_dir = os.path.join(report_dir, 'logs')
+            shutil.move(logs_dir, dest_logs_dir)
+        except Exception as e:
+            logger.exception("Error moving logs directory: %s", e)
+    def is_repo_supported(self, repo: str, version: str) -> bool:
+        """Check if the repo is supported by swebench"""
+        try:
+            _ = MAP_REPO_VERSION_TO_SPECS[repo][str(version)]
+        except KeyError:
+            self._unsupported_repos.append({repo, version})
+            return False
+        return True
+    def process_eval_input(self, eval_input: EvalInput) -> tuple[Path, Path]:
+        """Converts EvalInput into lists of SWEBenchInput and SWEBenchOutput models and applies filtering."""
+        # Convert input_obj and output_obj JSON strings to SWEBenchInput and SWEBenchOutput models
+        swebench_inputs = []
+        swebench_outputs = []
+        for item in eval_input.eval_input_items:
+            try:
+                swebench_input = SWEBenchInput.model_validate_json(item.input_obj)  # Convert input JSON to model
+                swebench_input.version = str(swebench_input.version)  # Convert version to string
+                swebench_inputs.append(swebench_input)
+                if item.output_obj:  # Convert output JSON to model if available
+                    swebench_output = SWEBenchOutput.model_validate_json(item.output_obj)
+                    swebench_outputs.append(swebench_output)
+                    # this is bit of a hack to match the swe_bench harness
+                    self._model_name_or_path = swebench_output.model_name_or_path
+            except Exception as e:
+                logger.exception("Failed to parse EvalInputItem %s: %s", item.id, e)
+        # Filter out repos/version not supported by SWEBench
+        supported_inputs = [
+            swebench for swebench in swebench_inputs if self.is_repo_supported(swebench.repo, swebench.version)
+        ]
+        if not supported_inputs:
+            logger.exception("No supported instances; nothing to evaluate")
+            return None, None
+        if len(supported_inputs) < len(swebench_inputs):
+            logger.warning("The following repos are not supported by SWEBench and were skipped:\n %s",
+                           {s.repo
+                            for s in swebench_inputs if s not in supported_inputs})
+        # Write SWEBenchInput to file
+        workflow_input_file = self.output_dir / "nat_workflow_input.json"
+        workflow_input_file.parent.mkdir(parents=True, exist_ok=True)
+        Path(workflow_input_file).write_text(json.dumps([swebench.model_dump() for swebench in supported_inputs],
+                                                        indent=2),
+                                             encoding="utf-8")
+        logger.info("Workflow input written to %s", workflow_input_file)
+        # Filter SWEBenchOutput to include only instance_ids present in SWEBenchInput
+        valid_instance_ids = {swebench.instance_id for swebench in supported_inputs}
+        filtered_outputs = [output for output in swebench_outputs if output.instance_id in valid_instance_ids]
+        if not filtered_outputs:
+            logger.error("No supported outputs; nothing to evaluate", exc_info=True)
+            return None, None
+        # Write SWEBenchOutput to file
+        workflow_output_file = self.output_dir / "nat_workflow_output.json"
+        Path(workflow_output_file).write_text(json.dumps([output.model_dump() for output in filtered_outputs],
+                                                         indent=2),
+                                              encoding="utf-8")
+        logger.info("Workflow output written to %s", workflow_output_file)
+        self._swe_bench_inputs = supported_inputs
+        self._swe_bench_outputs = filtered_outputs
+        return workflow_input_file, workflow_output_file
+    def build_eval_output(self):
+        """Builds the EvalOutput object from the SWEBenchOutput models and the average score."""
+        # WIP: Build a score based on eval run logs
+        for swebench_output in self._swe_bench_outputs:
+            yield {"id": swebench_output.instance_id, "score": "-", "reasoning": "-"}
+    @staticmethod
+    def compute_score(success_cnt: int, total_cnt: int) -> float:
+        if total_cnt == 0:
+            return 0.0
+        score = success_cnt / total_cnt
+        return min(max(score, 0.0), 1.0)
+    async def evaluate(self, eval_input: EvalInput) -> EvalOutput:
+        '''Run the swebench evaluation and store the report in the output directory'''
+        # Process the EvalInput
+        workflow_input_file, workflow_output_file = self.process_eval_input(eval_input)
+        if not workflow_input_file or not workflow_output_file:
+            # nothing to evaluate
+            return EvalOutput(average_score=0.0, eval_output_items=[])
+        report_dir = self.output_dir / "swe_bench_reports"
+        self.empty_report_dir(report_dir)
+        logger.info("Starting swe_bench run %s", self.run_id)
+        swebench_eval.main(dataset_name=str(workflow_input_file),
+                           split="dev",
+                           instance_ids=[],
+                           predictions_path=str(workflow_output_file),
+                           max_workers=self.max_workers,
+                           force_rebuild=False,
+                           cache_level="env",
+                           clean=False,
+                           open_file_limit=4096,
+                           run_id=self.run_id,
+                           timeout=1800,
+                           namespace=None,
+                           rewrite_reports=False,
+                           modal=False,
+                           instance_image_tag='latest',
+                           report_dir=str(report_dir))
+        logger.info("Completed swe_bench run %s", self.run_id)
+        swe_bench_report_file = f"{self._model_name_or_path}.{self.run_id}.json"
+        # There is a bug in swebench because of which report_dir is being ignored. Copy the report to the output dir
+        self.move_report_and_logs(swe_bench_report_file=swe_bench_report_file, logs_dir="logs", report_dir=report_dir)
+        logger.info("SWE_bench report and logs written to %s directory", report_dir)
+        # read the swe_bench report file
+        report_file = report_dir / swe_bench_report_file
+        # if report file is not present, return empty EvalOutput
+        avg_score = 0.0
+        if report_file.exists():
+            with open(report_file, encoding="utf-8") as f:
+                report = json.load(f)
+                resolved_instances = report.get("resolved_instances", 0)
+                total_instances = report.get("total_instances", 0)
+                avg_score = self.compute_score(resolved_instances, total_instances)
+        # Build the EvalOutput from self._swe_bench_outputs and avg_score
+        eval_output_items = list(self.build_eval_output())
+        return EvalOutput(average_score=avg_score, eval_output_items=eval_output_items)

nat/eval/swe_bench_evaluator/register.py ADDED Viewed

@@ -0,0 +1,36 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pydantic import Field
+from nat.builder.builder import EvalBuilder
+from nat.builder.evaluator import EvaluatorInfo
+from nat.cli.register_workflow import register_evaluator
+from nat.data_models.evaluator import EvaluatorBaseConfig
+class SweBenchEvaluatorConfig(EvaluatorBaseConfig, name="swe_bench"):
+    """Code patch evaluation for SWE Bench problems."""
+    run_id: str = Field(description="swe-bench test harness run identifier.")
+@register_evaluator(config_type=SweBenchEvaluatorConfig)
+async def register_swe_bench_evaluator(config: SweBenchEvaluatorConfig, builder: EvalBuilder):
+    from .evaluate import SweBenchEvaluator
+    _evaluator = SweBenchEvaluator(config.run_id, builder.get_max_concurrency(), builder.get_output_dir())
+    yield EvaluatorInfo(config=config, evaluate_fn=_evaluator.evaluate, description="SWE Bench Evaluator")

nat/eval/trajectory_evaluator/__init__.py ADDED Viewed

File without changes