nvidia-nat 1.2.0rc5__py3-none-any.whl → 1.2.0rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aiq → nat}/agent/react_agent/agent.py +12 -12
- {aiq → nat}/agent/react_agent/register.py +20 -20
- {aiq → nat}/agent/reasoning_agent/reasoning_agent.py +14 -14
- {aiq → nat}/agent/rewoo_agent/agent.py +7 -7
- {aiq → nat}/agent/rewoo_agent/prompt.py +11 -12
- {aiq → nat}/agent/rewoo_agent/register.py +47 -49
- {aiq → nat}/agent/tool_calling_agent/agent.py +4 -4
- {aiq → nat}/agent/tool_calling_agent/register.py +8 -8
- {aiq → nat}/authentication/api_key/api_key_auth_provider.py +6 -6
- {aiq → nat}/authentication/api_key/api_key_auth_provider_config.py +5 -5
- {aiq → nat}/authentication/api_key/register.py +4 -4
- {aiq → nat}/authentication/http_basic_auth/http_basic_auth_provider.py +10 -10
- {aiq → nat}/authentication/http_basic_auth/register.py +4 -4
- {aiq → nat}/authentication/interfaces.py +6 -6
- {aiq → nat}/authentication/oauth2/oauth2_auth_code_flow_provider.py +11 -11
- {aiq → nat}/authentication/oauth2/oauth2_auth_code_flow_provider_config.py +1 -1
- {aiq → nat}/authentication/oauth2/register.py +4 -4
- {aiq → nat}/authentication/register.py +3 -3
- {aiq → nat}/builder/builder.py +30 -30
- {aiq → nat}/builder/component_utils.py +23 -23
- {aiq → nat}/builder/context.py +35 -29
- {aiq → nat}/builder/embedder.py +1 -1
- {aiq → nat}/builder/eval_builder.py +13 -13
- {aiq → nat}/builder/evaluator.py +3 -3
- {aiq → nat}/builder/front_end.py +11 -11
- {aiq → nat}/builder/function.py +8 -8
- {aiq → nat}/builder/function_base.py +6 -6
- {aiq → nat}/builder/function_info.py +3 -3
- {aiq → nat}/builder/intermediate_step_manager.py +13 -13
- {aiq → nat}/builder/llm.py +1 -1
- {aiq → nat}/builder/retriever.py +1 -1
- {aiq → nat}/builder/user_interaction_manager.py +14 -10
- {aiq → nat}/builder/workflow.py +25 -25
- {aiq → nat}/builder/workflow_builder.py +86 -86
- {aiq → nat}/cli/cli_utils/config_override.py +2 -2
- {aiq → nat}/cli/cli_utils/validation.py +4 -4
- {aiq → nat}/cli/commands/configure/channel/add.py +2 -2
- {aiq → nat}/cli/commands/configure/channel/channel.py +4 -6
- {aiq → nat}/cli/commands/configure/channel/remove.py +2 -2
- {aiq → nat}/cli/commands/configure/channel/update.py +2 -2
- {aiq → nat}/cli/commands/configure/configure.py +3 -3
- {aiq → nat}/cli/commands/evaluate.py +3 -3
- {aiq → nat}/cli/commands/info/info.py +5 -7
- {aiq → nat}/cli/commands/info/list_channels.py +1 -1
- {aiq → nat}/cli/commands/info/list_components.py +14 -14
- {aiq → nat}/cli/commands/info/list_mcp.py +106 -15
- {aiq → nat}/cli/commands/registry/publish.py +9 -9
- {aiq → nat}/cli/commands/registry/pull.py +10 -10
- {aiq → nat}/cli/commands/registry/registry.py +5 -7
- {aiq → nat}/cli/commands/registry/remove.py +8 -8
- {aiq → nat}/cli/commands/registry/search.py +15 -15
- {aiq → nat}/cli/commands/sizing/calc.py +3 -3
- {aiq → nat}/cli/commands/start.py +15 -15
- {aiq → nat}/cli/commands/uninstall.py +5 -5
- {aiq → nat}/cli/commands/validate.py +1 -1
- {aiq → nat}/cli/commands/workflow/templates/pyproject.toml.j2 +4 -4
- {aiq → nat}/cli/commands/workflow/templates/workflow.py.j2 +4 -4
- {aiq → nat}/cli/commands/workflow/workflow.py +3 -3
- {aiq → nat}/cli/commands/workflow/workflow_commands.py +15 -11
- {aiq → nat}/cli/entrypoint.py +6 -6
- {aiq → nat}/cli/main.py +15 -2
- {aiq → nat}/cli/register_workflow.py +70 -70
- {aiq → nat}/cli/type_registry.py +82 -82
- {aiq → nat}/data_models/api_server.py +121 -99
- {aiq → nat}/data_models/authentication.py +2 -2
- {aiq → nat}/data_models/component.py +5 -1
- {aiq → nat}/data_models/component_ref.py +12 -12
- {aiq → nat}/data_models/config.py +17 -13
- {aiq → nat}/data_models/dataset_handler.py +58 -12
- {aiq → nat}/data_models/discovery_metadata.py +36 -66
- {aiq → nat}/data_models/evaluate.py +9 -9
- {aiq → nat}/data_models/intermediate_step.py +7 -7
- {aiq → nat}/data_models/retriever.py +2 -2
- {aiq → nat}/data_models/span.py +10 -7
- {aiq → nat}/data_models/step_adaptor.py +1 -1
- {aiq → nat}/data_models/telemetry_exporter.py +2 -2
- {aiq → nat}/embedder/nim_embedder.py +5 -5
- {aiq → nat}/embedder/openai_embedder.py +5 -5
- {aiq/retriever → nat/embedder}/register.py +2 -2
- {aiq → nat}/eval/config.py +4 -4
- {aiq → nat}/eval/dataset_handler/dataset_downloader.py +1 -1
- {aiq → nat}/eval/dataset_handler/dataset_filter.py +1 -1
- {aiq → nat}/eval/dataset_handler/dataset_handler.py +127 -14
- {aiq → nat}/eval/evaluate.py +38 -34
- {aiq → nat}/eval/evaluator/base_evaluator.py +9 -5
- {aiq → nat}/eval/evaluator/evaluator_model.py +4 -4
- {aiq → nat}/eval/intermediate_step_adapter.py +2 -2
- {aiq → nat}/eval/rag_evaluator/evaluate.py +8 -8
- {aiq → nat}/eval/rag_evaluator/register.py +7 -7
- {aiq → nat}/eval/remote_workflow.py +8 -8
- {aiq → nat}/eval/runners/config.py +2 -2
- {aiq → nat}/eval/runners/multi_eval_runner.py +4 -4
- {aiq → nat}/eval/runtime_event_subscriber.py +3 -3
- {aiq → nat}/eval/swe_bench_evaluator/evaluate.py +6 -6
- {aiq → nat}/eval/swe_bench_evaluator/register.py +4 -4
- {aiq → nat}/eval/trajectory_evaluator/evaluate.py +5 -5
- {aiq → nat}/eval/trajectory_evaluator/register.py +5 -5
- {aiq → nat}/eval/tunable_rag_evaluator/evaluate.py +3 -3
- {aiq → nat}/eval/tunable_rag_evaluator/register.py +6 -6
- {aiq → nat}/eval/utils/output_uploader.py +1 -1
- {aiq → nat}/eval/utils/weave_eval.py +6 -6
- {aiq → nat}/experimental/decorators/experimental_warning_decorator.py +6 -2
- {aiq → nat}/experimental/test_time_compute/editing/iterative_plan_refinement_editor.py +10 -10
- {aiq → nat}/experimental/test_time_compute/editing/llm_as_a_judge_editor.py +10 -10
- {aiq → nat}/experimental/test_time_compute/editing/motivation_aware_summarization.py +10 -10
- {aiq → nat}/experimental/test_time_compute/functions/execute_score_select_function.py +10 -10
- {aiq → nat}/experimental/test_time_compute/functions/plan_select_execute_function.py +17 -17
- aiq/experimental/test_time_compute/functions/its_tool_orchestration_function.py → nat/experimental/test_time_compute/functions/ttc_tool_orchestration_function.py +12 -12
- aiq/experimental/test_time_compute/functions/its_tool_wrapper_function.py → nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +10 -10
- {aiq → nat}/experimental/test_time_compute/models/editor_config.py +2 -2
- {aiq → nat}/experimental/test_time_compute/models/scoring_config.py +2 -2
- {aiq → nat}/experimental/test_time_compute/models/search_config.py +2 -2
- {aiq → nat}/experimental/test_time_compute/models/selection_config.py +2 -2
- {aiq → nat}/experimental/test_time_compute/models/strategy_base.py +4 -4
- {aiq → nat}/experimental/test_time_compute/register.py +2 -2
- {aiq → nat}/experimental/test_time_compute/scoring/llm_based_agent_scorer.py +11 -11
- {aiq → nat}/experimental/test_time_compute/scoring/llm_based_plan_scorer.py +11 -11
- {aiq → nat}/experimental/test_time_compute/scoring/motivation_aware_scorer.py +10 -10
- {aiq → nat}/experimental/test_time_compute/search/multi_llm_planner.py +10 -10
- {aiq → nat}/experimental/test_time_compute/search/multi_query_retrieval_search.py +10 -10
- {aiq → nat}/experimental/test_time_compute/search/single_shot_multi_plan_planner.py +11 -11
- {aiq → nat}/experimental/test_time_compute/selection/best_of_n_selector.py +7 -7
- {aiq → nat}/experimental/test_time_compute/selection/llm_based_agent_output_selector.py +11 -11
- {aiq → nat}/experimental/test_time_compute/selection/llm_based_output_merging_selector.py +11 -11
- {aiq → nat}/experimental/test_time_compute/selection/llm_based_plan_selector.py +11 -11
- {aiq → nat}/experimental/test_time_compute/selection/threshold_selector.py +7 -7
- {aiq → nat}/front_ends/console/authentication_flow_handler.py +6 -6
- {aiq → nat}/front_ends/console/console_front_end_config.py +2 -2
- {aiq → nat}/front_ends/console/console_front_end_plugin.py +9 -9
- {aiq → nat}/front_ends/console/register.py +5 -5
- {aiq → nat}/front_ends/fastapi/auth_flow_handlers/http_flow_handler.py +4 -4
- {aiq → nat}/front_ends/fastapi/auth_flow_handlers/websocket_flow_handler.py +6 -6
- {aiq → nat}/front_ends/fastapi/fastapi_front_end_config.py +22 -15
- {aiq → nat}/front_ends/fastapi/fastapi_front_end_plugin.py +10 -10
- {aiq → nat}/front_ends/fastapi/fastapi_front_end_plugin_worker.py +110 -115
- {aiq → nat}/front_ends/fastapi/intermediate_steps_subscriber.py +10 -10
- {aiq → nat}/front_ends/fastapi/main.py +8 -8
- {aiq → nat}/front_ends/fastapi/message_handler.py +58 -36
- {aiq → nat}/front_ends/fastapi/message_validator.py +55 -48
- {aiq → nat}/front_ends/fastapi/register.py +5 -5
- {aiq → nat}/front_ends/fastapi/response_helpers.py +26 -26
- {aiq → nat}/front_ends/fastapi/step_adaptor.py +35 -37
- {aiq → nat}/front_ends/mcp/mcp_front_end_config.py +12 -8
- nat/front_ends/mcp/mcp_front_end_plugin.py +81 -0
- nat/front_ends/mcp/mcp_front_end_plugin_worker.py +143 -0
- {aiq → nat}/front_ends/mcp/register.py +5 -5
- {aiq → nat}/front_ends/mcp/tool_converter.py +20 -21
- {aiq → nat}/front_ends/simple_base/simple_front_end_plugin_base.py +6 -6
- {aiq → nat}/llm/aws_bedrock_llm.py +5 -5
- {aiq → nat}/llm/nim_llm.py +5 -5
- {aiq → nat}/llm/openai_llm.py +5 -5
- {aiq → nat}/memory/__init__.py +2 -2
- nat/meta/pypi.md +58 -0
- {aiq → nat}/object_store/__init__.py +2 -2
- {aiq → nat}/object_store/in_memory_object_store.py +6 -6
- {aiq → nat}/observability/exporter/base_exporter.py +9 -9
- {aiq → nat}/observability/exporter/exporter.py +1 -1
- {aiq → nat}/observability/exporter/file_exporter.py +6 -6
- {aiq → nat}/observability/exporter/processing_exporter.py +9 -9
- {aiq → nat}/observability/exporter/raw_exporter.py +4 -4
- {aiq → nat}/observability/exporter/span_exporter.py +57 -34
- {aiq → nat}/observability/exporter_manager.py +6 -6
- {aiq → nat}/observability/mixin/file_mixin.py +2 -2
- {aiq → nat}/observability/processor/batching_processor.py +1 -1
- {aiq → nat}/observability/processor/callback_processor.py +1 -1
- {aiq → nat}/observability/processor/intermediate_step_serializer.py +4 -4
- {aiq → nat}/observability/processor/processor.py +1 -1
- {aiq → nat}/observability/register.py +7 -7
- {aiq → nat}/profiler/calc/calc_runner.py +18 -18
- {aiq → nat}/profiler/calc/calculations.py +3 -3
- {aiq → nat}/profiler/calc/plot.py +2 -2
- {aiq → nat}/profiler/callbacks/agno_callback_handler.py +14 -14
- {aiq → nat}/profiler/callbacks/langchain_callback_handler.py +11 -11
- {aiq → nat}/profiler/callbacks/llama_index_callback_handler.py +12 -12
- {aiq → nat}/profiler/callbacks/semantic_kernel_callback_handler.py +11 -11
- {aiq → nat}/profiler/data_models.py +2 -2
- {aiq → nat}/profiler/decorators/framework_wrapper.py +6 -6
- {aiq → nat}/profiler/decorators/function_tracking.py +10 -10
- {aiq → nat}/profiler/forecasting/model_trainer.py +5 -5
- {aiq → nat}/profiler/forecasting/models/linear_model.py +5 -4
- {aiq → nat}/profiler/forecasting/models/random_forest_regressor.py +5 -4
- {aiq → nat}/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +7 -7
- {aiq → nat}/profiler/inference_optimization/bottleneck_analysis/simple_stack_analysis.py +4 -4
- {aiq → nat}/profiler/inference_optimization/experimental/concurrency_spike_analysis.py +6 -6
- {aiq → nat}/profiler/inference_optimization/experimental/prefix_span_analysis.py +6 -6
- {aiq → nat}/profiler/inference_optimization/llm_metrics.py +2 -2
- {aiq → nat}/profiler/inference_optimization/prompt_caching.py +5 -5
- {aiq → nat}/profiler/inference_optimization/token_uniqueness.py +4 -4
- {aiq → nat}/profiler/inference_optimization/workflow_runtimes.py +3 -3
- {aiq → nat}/profiler/intermediate_property_adapter.py +3 -3
- {aiq → nat}/profiler/profile_runner.py +17 -17
- {aiq → nat}/profiler/utils.py +4 -4
- {aiq → nat}/registry_handlers/local/local_handler.py +19 -19
- {aiq → nat}/registry_handlers/local/register_local.py +4 -4
- {aiq → nat}/registry_handlers/metadata_factory.py +7 -7
- {aiq → nat}/registry_handlers/package_utils.py +37 -33
- {aiq → nat}/registry_handlers/pypi/pypi_handler.py +21 -21
- {aiq → nat}/registry_handlers/pypi/register_pypi.py +6 -6
- {aiq → nat}/registry_handlers/registry_handler_base.py +21 -21
- {aiq → nat}/registry_handlers/rest/register_rest.py +7 -7
- {aiq → nat}/registry_handlers/rest/rest_handler.py +19 -19
- {aiq → nat}/registry_handlers/schemas/package.py +3 -3
- {aiq → nat}/registry_handlers/schemas/publish.py +17 -12
- {aiq → nat}/registry_handlers/schemas/pull.py +6 -6
- {aiq → nat}/registry_handlers/schemas/remove.py +2 -2
- {aiq → nat}/registry_handlers/schemas/search.py +11 -11
- {aiq → nat}/retriever/interface.py +6 -2
- {aiq → nat}/retriever/milvus/register.py +7 -7
- {aiq → nat}/retriever/milvus/retriever.py +8 -8
- {aiq → nat}/retriever/models.py +10 -7
- {aiq → nat}/retriever/nemo_retriever/register.py +6 -6
- {aiq → nat}/retriever/nemo_retriever/retriever.py +10 -10
- {aiq/embedder → nat/retriever}/register.py +2 -4
- {aiq → nat}/runtime/loader.py +38 -33
- {aiq → nat}/runtime/runner.py +30 -25
- {aiq → nat}/runtime/session.py +19 -15
- {aiq → nat}/runtime/user_metadata.py +1 -1
- {aiq → nat}/settings/global_settings.py +11 -11
- {aiq → nat}/tool/chat_completion.py +6 -6
- {aiq → nat}/tool/code_execution/README.md +2 -2
- {aiq → nat}/tool/code_execution/code_sandbox.py +1 -1
- {aiq → nat}/tool/code_execution/register.py +5 -5
- {aiq → nat}/tool/code_execution/test_code_execution_sandbox.py +1 -1
- {aiq → nat}/tool/datetime_tools.py +4 -4
- {aiq → nat}/tool/document_search.py +6 -6
- {aiq → nat}/tool/github_tools/create_github_commit.py +4 -4
- {aiq → nat}/tool/github_tools/create_github_issue.py +4 -4
- {aiq → nat}/tool/github_tools/create_github_pr.py +4 -4
- {aiq → nat}/tool/github_tools/get_github_file.py +4 -4
- {aiq → nat}/tool/github_tools/get_github_issue.py +4 -4
- {aiq → nat}/tool/github_tools/get_github_pr.py +4 -4
- {aiq → nat}/tool/github_tools/update_github_issue.py +4 -4
- {aiq → nat}/tool/mcp/exceptions.py +1 -1
- {aiq → nat}/tool/mcp/mcp_client.py +2 -2
- {aiq → nat}/tool/mcp/mcp_tool.py +7 -7
- {aiq → nat}/tool/memory_tools/add_memory_tool.py +6 -6
- {aiq → nat}/tool/memory_tools/delete_memory_tool.py +6 -6
- {aiq → nat}/tool/memory_tools/get_memory_tool.py +6 -6
- {aiq → nat}/tool/nvidia_rag.py +4 -4
- {aiq → nat}/tool/retriever.py +20 -15
- {aiq → nat}/tool/server_tools.py +16 -16
- {aiq → nat}/utils/dump_distro_mapping.py +2 -2
- {aiq → nat}/utils/exception_handlers/mcp.py +8 -8
- {aiq → nat}/utils/io/yaml_tools.py +1 -1
- {aiq → nat}/utils/metadata_utils.py +2 -2
- {aiq → nat}/utils/reactive/base/observable_base.py +2 -2
- {aiq → nat}/utils/reactive/base/subject_base.py +1 -1
- {aiq → nat}/utils/reactive/observable.py +5 -5
- {aiq → nat}/utils/reactive/observer.py +1 -1
- {aiq → nat}/utils/reactive/subject.py +4 -4
- {aiq → nat}/utils/reactive/subscription.py +1 -1
- {aiq → nat}/utils/settings/global_settings.py +4 -4
- {aiq → nat}/utils/type_converter.py +1 -1
- {nvidia_nat-1.2.0rc5.dist-info → nvidia_nat-1.2.0rc7.dist-info}/METADATA +37 -37
- nvidia_nat-1.2.0rc7.dist-info/RECORD +434 -0
- nvidia_nat-1.2.0rc7.dist-info/entry_points.txt +21 -0
- nvidia_nat-1.2.0rc7.dist-info/top_level.txt +1 -0
- aiq/embedder/langchain_client.py +0 -41
- aiq/front_ends/mcp/mcp_front_end_plugin.py +0 -93
- aiq/meta/module_to_distro.json +0 -3
- aiq/meta/pypi.md +0 -58
- nvidia_nat-1.2.0rc5.dist-info/RECORD +0 -435
- nvidia_nat-1.2.0rc5.dist-info/entry_points.txt +0 -20
- nvidia_nat-1.2.0rc5.dist-info/top_level.txt +0 -1
- {aiq → nat}/agent/__init__.py +0 -0
- {aiq → nat}/agent/base.py +0 -0
- {aiq → nat}/agent/dual_node.py +0 -0
- {aiq → nat}/agent/react_agent/__init__.py +0 -0
- {aiq → nat}/agent/react_agent/output_parser.py +0 -0
- {aiq → nat}/agent/react_agent/prompt.py +0 -0
- {aiq → nat}/agent/reasoning_agent/__init__.py +0 -0
- {aiq → nat}/agent/register.py +0 -0
- {aiq → nat}/agent/rewoo_agent/__init__.py +0 -0
- {aiq → nat}/agent/tool_calling_agent/__init__.py +0 -0
- {aiq → nat}/authentication/__init__.py +0 -0
- {aiq → nat}/authentication/api_key/__init__.py +0 -0
- {aiq → nat}/authentication/exceptions/__init__.py +0 -0
- {aiq → nat}/authentication/exceptions/api_key_exceptions.py +0 -0
- {aiq → nat}/authentication/http_basic_auth/__init__.py +0 -0
- {aiq → nat}/authentication/oauth2/__init__.py +0 -0
- {aiq → nat}/builder/__init__.py +0 -0
- {aiq → nat}/builder/framework_enum.py +0 -0
- {aiq → nat}/cli/__init__.py +0 -0
- {aiq → nat}/cli/cli_utils/__init__.py +0 -0
- {aiq → nat}/cli/commands/__init__.py +0 -0
- {aiq → nat}/cli/commands/configure/__init__.py +0 -0
- {aiq → nat}/cli/commands/configure/channel/__init__.py +0 -0
- {aiq → nat}/cli/commands/info/__init__.py +0 -0
- {aiq → nat}/cli/commands/registry/__init__.py +0 -0
- {aiq → nat}/cli/commands/sizing/__init__.py +0 -0
- {aiq → nat}/cli/commands/sizing/sizing.py +0 -0
- {aiq → nat}/cli/commands/workflow/__init__.py +0 -0
- {aiq → nat}/cli/commands/workflow/templates/__init__.py.j2 +0 -0
- {aiq → nat}/cli/commands/workflow/templates/config.yml.j2 +0 -0
- {aiq → nat}/cli/commands/workflow/templates/register.py.j2 +0 -0
- {aiq → nat}/data_models/__init__.py +0 -0
- {aiq → nat}/data_models/common.py +0 -0
- {aiq → nat}/data_models/embedder.py +0 -0
- {aiq → nat}/data_models/evaluator.py +0 -0
- {aiq → nat}/data_models/front_end.py +0 -0
- {aiq → nat}/data_models/function.py +0 -0
- {aiq → nat}/data_models/function_dependencies.py +0 -0
- {aiq → nat}/data_models/interactive.py +0 -0
- {aiq → nat}/data_models/invocation_node.py +0 -0
- {aiq → nat}/data_models/llm.py +0 -0
- {aiq → nat}/data_models/logging.py +0 -0
- {aiq → nat}/data_models/memory.py +0 -0
- {aiq → nat}/data_models/object_store.py +0 -0
- {aiq → nat}/data_models/profiler.py +0 -0
- {aiq → nat}/data_models/registry_handler.py +0 -0
- {aiq → nat}/data_models/retry_mixin.py +0 -0
- {aiq → nat}/data_models/streaming.py +0 -0
- {aiq → nat}/data_models/swe_bench_model.py +0 -0
- {aiq → nat}/data_models/ttc_strategy.py +0 -0
- {aiq → nat}/embedder/__init__.py +0 -0
- {aiq → nat}/eval/__init__.py +0 -0
- {aiq → nat}/eval/dataset_handler/__init__.py +0 -0
- {aiq → nat}/eval/evaluator/__init__.py +0 -0
- {aiq → nat}/eval/rag_evaluator/__init__.py +0 -0
- {aiq → nat}/eval/register.py +0 -0
- {aiq → nat}/eval/runners/__init__.py +0 -0
- {aiq → nat}/eval/swe_bench_evaluator/__init__.py +0 -0
- {aiq → nat}/eval/trajectory_evaluator/__init__.py +0 -0
- {aiq → nat}/eval/tunable_rag_evaluator/__init__.py +0 -0
- {aiq → nat}/eval/usage_stats.py +0 -0
- {aiq → nat}/eval/utils/__init__.py +0 -0
- {aiq → nat}/eval/utils/tqdm_position_registry.py +0 -0
- {aiq → nat}/experimental/__init__.py +0 -0
- {aiq → nat}/experimental/decorators/__init__.py +0 -0
- {aiq → nat}/experimental/test_time_compute/__init__.py +0 -0
- {aiq → nat}/experimental/test_time_compute/editing/__init__.py +0 -0
- {aiq → nat}/experimental/test_time_compute/functions/__init__.py +0 -0
- {aiq → nat}/experimental/test_time_compute/models/__init__.py +0 -0
- {aiq → nat}/experimental/test_time_compute/models/stage_enums.py +0 -0
- {aiq → nat}/experimental/test_time_compute/models/tool_use_config.py +0 -0
- {aiq → nat}/experimental/test_time_compute/models/ttc_item.py +0 -0
- {aiq → nat}/experimental/test_time_compute/scoring/__init__.py +0 -0
- {aiq → nat}/experimental/test_time_compute/search/__init__.py +0 -0
- {aiq → nat}/experimental/test_time_compute/selection/__init__.py +0 -0
- {aiq → nat}/front_ends/__init__.py +0 -0
- {aiq → nat}/front_ends/console/__init__.py +0 -0
- {aiq → nat}/front_ends/cron/__init__.py +0 -0
- {aiq → nat}/front_ends/fastapi/__init__.py +0 -0
- {aiq → nat}/front_ends/fastapi/auth_flow_handlers/__init__.py +0 -0
- {aiq → nat}/front_ends/fastapi/fastapi_front_end_controller.py +0 -0
- {aiq → nat}/front_ends/fastapi/html_snippets/__init__.py +0 -0
- {aiq → nat}/front_ends/fastapi/html_snippets/auth_code_grant_success.py +0 -0
- {aiq → nat}/front_ends/fastapi/job_store.py +0 -0
- {aiq → nat}/front_ends/mcp/__init__.py +0 -0
- {aiq → nat}/front_ends/register.py +0 -0
- {aiq → nat}/front_ends/simple_base/__init__.py +0 -0
- {aiq → nat}/llm/__init__.py +0 -0
- {aiq → nat}/llm/register.py +0 -0
- {aiq → nat}/llm/utils/__init__.py +0 -0
- {aiq → nat}/llm/utils/env_config_value.py +0 -0
- {aiq → nat}/llm/utils/error.py +0 -0
- {aiq → nat}/memory/interfaces.py +0 -0
- {aiq → nat}/memory/models.py +0 -0
- {aiq → nat}/object_store/interfaces.py +0 -0
- {aiq → nat}/object_store/models.py +0 -0
- {aiq → nat}/object_store/register.py +0 -0
- {aiq → nat}/observability/__init__.py +0 -0
- {aiq → nat}/observability/exporter/__init__.py +0 -0
- {aiq → nat}/observability/mixin/__init__.py +0 -0
- {aiq → nat}/observability/mixin/batch_config_mixin.py +0 -0
- {aiq → nat}/observability/mixin/collector_config_mixin.py +0 -0
- {aiq → nat}/observability/mixin/file_mode.py +0 -0
- {aiq → nat}/observability/mixin/resource_conflict_mixin.py +0 -0
- {aiq → nat}/observability/mixin/serialize_mixin.py +0 -0
- {aiq → nat}/observability/mixin/type_introspection_mixin.py +0 -0
- {aiq → nat}/observability/processor/__init__.py +0 -0
- {aiq → nat}/observability/utils/__init__.py +0 -0
- {aiq → nat}/observability/utils/dict_utils.py +0 -0
- {aiq → nat}/observability/utils/time_utils.py +0 -0
- {aiq → nat}/plugins/.namespace +0 -0
- {aiq → nat}/profiler/__init__.py +0 -0
- {aiq → nat}/profiler/calc/__init__.py +0 -0
- {aiq → nat}/profiler/calc/data_models.py +0 -0
- {aiq → nat}/profiler/callbacks/__init__.py +0 -0
- {aiq → nat}/profiler/callbacks/base_callback_class.py +0 -0
- {aiq → nat}/profiler/callbacks/token_usage_base_model.py +0 -0
- {aiq → nat}/profiler/data_frame_row.py +0 -0
- {aiq → nat}/profiler/decorators/__init__.py +0 -0
- {aiq → nat}/profiler/forecasting/__init__.py +0 -0
- {aiq → nat}/profiler/forecasting/config.py +0 -0
- {aiq → nat}/profiler/forecasting/models/__init__.py +0 -0
- {aiq → nat}/profiler/forecasting/models/forecasting_base_model.py +0 -0
- {aiq → nat}/profiler/inference_metrics_model.py +0 -0
- {aiq → nat}/profiler/inference_optimization/__init__.py +0 -0
- {aiq → nat}/profiler/inference_optimization/bottleneck_analysis/__init__.py +0 -0
- {aiq → nat}/profiler/inference_optimization/data_models.py +0 -0
- {aiq → nat}/profiler/inference_optimization/experimental/__init__.py +0 -0
- {aiq → nat}/registry_handlers/__init__.py +0 -0
- {aiq → nat}/registry_handlers/local/__init__.py +0 -0
- {aiq → nat}/registry_handlers/pypi/__init__.py +0 -0
- {aiq → nat}/registry_handlers/register.py +0 -0
- {aiq → nat}/registry_handlers/rest/__init__.py +0 -0
- {aiq → nat}/registry_handlers/schemas/__init__.py +0 -0
- {aiq → nat}/registry_handlers/schemas/headers.py +0 -0
- {aiq → nat}/registry_handlers/schemas/status.py +0 -0
- {aiq → nat}/retriever/__init__.py +0 -0
- {aiq → nat}/retriever/milvus/__init__.py +0 -0
- {aiq → nat}/retriever/nemo_retriever/__init__.py +0 -0
- {aiq → nat}/runtime/__init__.py +0 -0
- {aiq → nat}/settings/__init__.py +0 -0
- {aiq → nat}/test/.namespace +0 -0
- {aiq → nat}/tool/__init__.py +0 -0
- {aiq → nat}/tool/code_execution/__init__.py +0 -0
- {aiq → nat}/tool/code_execution/local_sandbox/.gitignore +0 -0
- {aiq → nat}/tool/code_execution/local_sandbox/Dockerfile.sandbox +0 -0
- {aiq → nat}/tool/code_execution/local_sandbox/__init__.py +0 -0
- {aiq → nat}/tool/code_execution/local_sandbox/local_sandbox_server.py +0 -0
- {aiq → nat}/tool/code_execution/local_sandbox/sandbox.requirements.txt +0 -0
- {aiq → nat}/tool/code_execution/local_sandbox/start_local_sandbox.sh +0 -0
- {aiq → nat}/tool/code_execution/utils.py +0 -0
- {aiq → nat}/tool/github_tools/__init__.py +0 -0
- {aiq → nat}/tool/mcp/__init__.py +0 -0
- {aiq → nat}/tool/memory_tools/__init__.py +0 -0
- {aiq → nat}/tool/register.py +0 -0
- {aiq → nat}/utils/__init__.py +0 -0
- {aiq → nat}/utils/data_models/__init__.py +0 -0
- {aiq → nat}/utils/data_models/schema_validator.py +0 -0
- {aiq → nat}/utils/debugging_utils.py +0 -0
- {aiq → nat}/utils/exception_handlers/__init__.py +0 -0
- {aiq → nat}/utils/exception_handlers/automatic_retries.py +0 -0
- {aiq → nat}/utils/exception_handlers/schemas.py +0 -0
- {aiq → nat}/utils/io/__init__.py +0 -0
- {aiq → nat}/utils/io/model_processing.py +0 -0
- {aiq → nat}/utils/log_utils.py +0 -0
- {aiq → nat}/utils/optional_imports.py +0 -0
- {aiq → nat}/utils/producer_consumer_queue.py +0 -0
- {aiq → nat}/utils/reactive/__init__.py +0 -0
- {aiq → nat}/utils/reactive/base/__init__.py +0 -0
- {aiq → nat}/utils/reactive/base/observer_base.py +0 -0
- {aiq → nat}/utils/settings/__init__.py +0 -0
- {aiq → nat}/utils/string_utils.py +0 -0
- {aiq → nat}/utils/type_utils.py +0 -0
- {aiq → nat}/utils/url_utils.py +0 -0
- {nvidia_nat-1.2.0rc5.dist-info → nvidia_nat-1.2.0rc7.dist-info}/WHEEL +0 -0
- {nvidia_nat-1.2.0rc5.dist-info → nvidia_nat-1.2.0rc7.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {nvidia_nat-1.2.0rc5.dist-info → nvidia_nat-1.2.0rc7.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -15,17 +15,19 @@
|
|
|
15
15
|
|
|
16
16
|
import json
|
|
17
17
|
import math
|
|
18
|
+
from pathlib import Path
|
|
18
19
|
|
|
19
20
|
import pandas as pd
|
|
20
21
|
|
|
21
|
-
from
|
|
22
|
-
from
|
|
23
|
-
from
|
|
24
|
-
from
|
|
25
|
-
from
|
|
26
|
-
from
|
|
27
|
-
from
|
|
28
|
-
from
|
|
22
|
+
from nat.data_models.dataset_handler import EvalDatasetConfig
|
|
23
|
+
from nat.data_models.dataset_handler import EvalDatasetCustomConfig
|
|
24
|
+
from nat.data_models.dataset_handler import EvalDatasetJsonConfig
|
|
25
|
+
from nat.data_models.intermediate_step import IntermediateStep
|
|
26
|
+
from nat.data_models.intermediate_step import IntermediateStepType
|
|
27
|
+
from nat.eval.dataset_handler.dataset_downloader import DatasetDownloader
|
|
28
|
+
from nat.eval.dataset_handler.dataset_filter import DatasetFilter
|
|
29
|
+
from nat.eval.evaluator.evaluator_model import EvalInput
|
|
30
|
+
from nat.eval.evaluator.evaluator_model import EvalInputItem
|
|
29
31
|
|
|
30
32
|
|
|
31
33
|
class DatasetHandler:
|
|
@@ -38,9 +40,9 @@ class DatasetHandler:
|
|
|
38
40
|
dataset_config: EvalDatasetConfig,
|
|
39
41
|
reps: int,
|
|
40
42
|
concurrency: int,
|
|
41
|
-
num_passes: int
|
|
43
|
+
num_passes: int = 1,
|
|
42
44
|
adjust_dataset_size: bool = False):
|
|
43
|
-
from
|
|
45
|
+
from nat.eval.intermediate_step_adapter import IntermediateStepAdapter
|
|
44
46
|
|
|
45
47
|
self.dataset_config = dataset_config
|
|
46
48
|
self.dataset_filter = DatasetFilter(dataset_config.filter)
|
|
@@ -184,6 +186,10 @@ class DatasetHandler:
|
|
|
184
186
|
# if a dataset file has been provided in the command line, use that
|
|
185
187
|
dataset_config = EvalDatasetJsonConfig(file_path=dataset) if dataset else self.dataset_config
|
|
186
188
|
|
|
189
|
+
# Handle custom dataset type with special processing
|
|
190
|
+
if isinstance(self.dataset_config, EvalDatasetCustomConfig):
|
|
191
|
+
return self._handle_custom_dataset(dataset)
|
|
192
|
+
|
|
187
193
|
# Download the dataset if it is remote
|
|
188
194
|
downloader = DatasetDownloader(dataset_config=dataset_config)
|
|
189
195
|
downloader.download_dataset()
|
|
@@ -192,6 +198,19 @@ class DatasetHandler:
|
|
|
192
198
|
# Parse the dataset into a DataFrame
|
|
193
199
|
input_df = parser(dataset_config.file_path, **kwargs)
|
|
194
200
|
|
|
201
|
+
# Apply standard preprocessing and convert to EvalInput
|
|
202
|
+
return self._preprocess_eval_dataframe(input_df)
|
|
203
|
+
|
|
204
|
+
def _preprocess_dataframe(self, input_df: pd.DataFrame) -> pd.DataFrame:
|
|
205
|
+
"""
|
|
206
|
+
Apply standard preprocessing to a DataFrame: filters, deduplication, repetitions, and size adjustment.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
input_df: DataFrame to preprocess
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
Preprocessed DataFrame
|
|
213
|
+
"""
|
|
195
214
|
# Apply filters and deduplicate
|
|
196
215
|
input_df = self.dataset_filter.apply_filters(input_df)
|
|
197
216
|
input_df.drop_duplicates(subset=[self.dataset_config.id_key], inplace=True)
|
|
@@ -205,12 +224,104 @@ class DatasetHandler:
|
|
|
205
224
|
elif self.adjust_dataset_size:
|
|
206
225
|
input_df = self.adjust_dataset(input_df)
|
|
207
226
|
|
|
208
|
-
|
|
209
|
-
|
|
227
|
+
return input_df
|
|
228
|
+
|
|
229
|
+
def _preprocess_eval_dataframe(self, input_df: pd.DataFrame) -> EvalInput:
|
|
230
|
+
"""
|
|
231
|
+
Apply standard preprocessing to a DataFrame and convert to EvalInput.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
input_df: DataFrame to preprocess
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
Preprocessed EvalInput object
|
|
238
|
+
"""
|
|
239
|
+
processed_df = self._preprocess_dataframe(input_df)
|
|
240
|
+
return self.get_eval_input_from_df(processed_df)
|
|
241
|
+
|
|
242
|
+
def _preprocess_eval_input(self, eval_input: EvalInput) -> EvalInput:
|
|
243
|
+
"""
|
|
244
|
+
Apply standard preprocessing to an EvalInput object.
|
|
245
|
+
|
|
246
|
+
Thin wrapper that converts EvalInput to DataFrame, processes it, and converts back.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
eval_input: EvalInput object to preprocess
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
Preprocessed EvalInput object
|
|
253
|
+
"""
|
|
254
|
+
if not eval_input.eval_input_items:
|
|
255
|
+
return eval_input
|
|
256
|
+
|
|
257
|
+
input_df = self._eval_input_to_dataframe(eval_input)
|
|
258
|
+
return self._preprocess_eval_dataframe(input_df)
|
|
259
|
+
|
|
260
|
+
def _handle_custom_dataset(self, dataset: str | None) -> EvalInput:
|
|
261
|
+
"""
|
|
262
|
+
Handle custom dataset type by calling the user-defined function
|
|
263
|
+
and applying standard preprocessing to the result.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
dataset: Optional dataset file path from command line
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
Preprocessed EvalInput object
|
|
270
|
+
"""
|
|
271
|
+
# Determine input path - use command line dataset or config file_path
|
|
272
|
+
input_path = Path(dataset) if dataset else Path(self.dataset_config.file_path)
|
|
273
|
+
|
|
274
|
+
# Download the dataset if it is remote (for custom datasets too)
|
|
275
|
+
downloader = DatasetDownloader(dataset_config=self.dataset_config)
|
|
276
|
+
downloader.download_dataset()
|
|
277
|
+
|
|
278
|
+
# Load and call custom function
|
|
279
|
+
custom_function, kwargs = self.dataset_config.parser()
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
# Call the custom function with file_path and kwargs
|
|
283
|
+
eval_input = custom_function(file_path=input_path, **kwargs)
|
|
284
|
+
|
|
285
|
+
if not isinstance(eval_input, EvalInput):
|
|
286
|
+
raise ValueError(f"Custom function must return an EvalInput object, "
|
|
287
|
+
f"but returned {type(eval_input)}")
|
|
288
|
+
|
|
289
|
+
except Exception as e:
|
|
290
|
+
raise RuntimeError(f"Error calling custom dataset function: {e}") from e
|
|
291
|
+
|
|
292
|
+
# Apply standard preprocessing (filters, deduplication, repetitions)
|
|
293
|
+
return self._preprocess_eval_input(eval_input)
|
|
294
|
+
|
|
295
|
+
def _eval_input_to_dataframe(self, eval_input: EvalInput) -> pd.DataFrame:
|
|
296
|
+
"""
|
|
297
|
+
Convert an EvalInput object to a pandas DataFrame for processing.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
eval_input: EvalInput object to convert
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
DataFrame representation of the EvalInput
|
|
304
|
+
"""
|
|
305
|
+
data = []
|
|
306
|
+
for item in eval_input.eval_input_items:
|
|
307
|
+
row = item.full_dataset_entry.copy() if item.full_dataset_entry else {}
|
|
308
|
+
|
|
309
|
+
# Ensure key fields are present
|
|
310
|
+
row[self.id_key] = item.id
|
|
311
|
+
if self.is_structured_input():
|
|
312
|
+
row[self.question_key] = item.input_obj
|
|
313
|
+
row[self.answer_key] = item.expected_output_obj
|
|
314
|
+
row[self.generated_answer_key] = item.output_obj
|
|
315
|
+
row[self.trajectory_key] = item.trajectory
|
|
316
|
+
row[self.expected_trajectory_key] = item.expected_trajectory
|
|
317
|
+
|
|
318
|
+
data.append(row)
|
|
319
|
+
|
|
320
|
+
return pd.DataFrame(data)
|
|
210
321
|
|
|
211
322
|
def filter_intermediate_steps(self,
|
|
212
323
|
intermediate_steps: list[IntermediateStep],
|
|
213
|
-
event_filter: list[IntermediateStepType] = None) -> list[dict]:
|
|
324
|
+
event_filter: list[IntermediateStepType] | None = None) -> list[dict]:
|
|
214
325
|
"""
|
|
215
326
|
Filter out the intermediate steps that are not relevant for evaluation.
|
|
216
327
|
The output is written with with the intention of re-running the evaluation using the original config file.
|
|
@@ -220,7 +331,9 @@ class DatasetHandler:
|
|
|
220
331
|
filtered_steps = self.intermediate_step_adapter.filter_intermediate_steps(intermediate_steps, event_filter)
|
|
221
332
|
return self.intermediate_step_adapter.serialize_intermediate_steps(filtered_steps)
|
|
222
333
|
|
|
223
|
-
def publish_eval_input(self,
|
|
334
|
+
def publish_eval_input(self,
|
|
335
|
+
eval_input,
|
|
336
|
+
workflow_output_step_filter: list[IntermediateStepType] | None = None) -> str:
|
|
224
337
|
"""
|
|
225
338
|
Convert the EvalInput object to a JSON output for storing in a file. Use the orginal keys to
|
|
226
339
|
allow re-running evaluation using the orignal config file and '--skip_workflow' option.
|
{aiq → nat}/eval/evaluate.py
RENAMED
|
@@ -23,21 +23,21 @@ from uuid import uuid4
|
|
|
23
23
|
from pydantic import BaseModel
|
|
24
24
|
from tqdm import tqdm
|
|
25
25
|
|
|
26
|
-
from
|
|
27
|
-
from
|
|
28
|
-
from
|
|
29
|
-
from
|
|
30
|
-
from
|
|
31
|
-
from
|
|
32
|
-
from
|
|
33
|
-
from
|
|
34
|
-
from
|
|
35
|
-
from
|
|
36
|
-
from
|
|
37
|
-
from
|
|
38
|
-
from
|
|
39
|
-
from
|
|
40
|
-
from
|
|
26
|
+
from nat.data_models.evaluate import EvalConfig
|
|
27
|
+
from nat.data_models.evaluate import JobEvictionPolicy
|
|
28
|
+
from nat.eval.config import EvaluationRunConfig
|
|
29
|
+
from nat.eval.config import EvaluationRunOutput
|
|
30
|
+
from nat.eval.dataset_handler.dataset_handler import DatasetHandler
|
|
31
|
+
from nat.eval.evaluator.evaluator_model import EvalInput
|
|
32
|
+
from nat.eval.evaluator.evaluator_model import EvalInputItem
|
|
33
|
+
from nat.eval.evaluator.evaluator_model import EvalOutput
|
|
34
|
+
from nat.eval.usage_stats import UsageStats
|
|
35
|
+
from nat.eval.usage_stats import UsageStatsItem
|
|
36
|
+
from nat.eval.usage_stats import UsageStatsLLM
|
|
37
|
+
from nat.eval.utils.output_uploader import OutputUploader
|
|
38
|
+
from nat.eval.utils.weave_eval import WeaveEvaluationIntegration
|
|
39
|
+
from nat.profiler.data_models import ProfilerResults
|
|
40
|
+
from nat.runtime.session import SessionManager
|
|
41
41
|
|
|
42
42
|
logger = logging.getLogger(__name__)
|
|
43
43
|
|
|
@@ -45,13 +45,17 @@ logger = logging.getLogger(__name__)
|
|
|
45
45
|
class EvaluationRun: # pylint: disable=too-many-public-methods
|
|
46
46
|
"""
|
|
47
47
|
Instantiated for each evaluation run and used to store data for that single run.
|
|
48
|
+
|
|
49
|
+
.. warning::
|
|
50
|
+
**Experimental Feature**: The Evaluation API is experimental and may change in future releases.
|
|
51
|
+
Future versions may introduce breaking changes without notice.
|
|
48
52
|
"""
|
|
49
53
|
|
|
50
54
|
def __init__(self, config: EvaluationRunConfig):
|
|
51
55
|
"""
|
|
52
56
|
Initialize an EvaluationRun with configuration.
|
|
53
57
|
"""
|
|
54
|
-
from
|
|
58
|
+
from nat.eval.intermediate_step_adapter import IntermediateStepAdapter
|
|
55
59
|
|
|
56
60
|
# Run-specific configuration
|
|
57
61
|
self.config: EvaluationRunConfig = config
|
|
@@ -79,7 +83,7 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
|
|
|
79
83
|
def _compute_usage_stats(self, item: EvalInputItem):
|
|
80
84
|
"""Compute usage stats for a single item using the intermediate steps"""
|
|
81
85
|
# get the prompt and completion tokens from the intermediate steps
|
|
82
|
-
from
|
|
86
|
+
from nat.profiler.intermediate_property_adapter import IntermediatePropertyAdaptor
|
|
83
87
|
steps = [IntermediatePropertyAdaptor.from_intermediate_step(step) for step in item.trajectory]
|
|
84
88
|
usage_stats_per_llm = {}
|
|
85
89
|
total_tokens = 0
|
|
@@ -129,14 +133,14 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
|
|
|
129
133
|
llm_latency=llm_latency)
|
|
130
134
|
return self.usage_stats.usage_stats_items[item.id]
|
|
131
135
|
|
|
132
|
-
async def run_workflow_local(self, session_manager:
|
|
136
|
+
async def run_workflow_local(self, session_manager: SessionManager):
|
|
133
137
|
'''
|
|
134
138
|
Launch the workflow with the specified questions and extract the output using the jsonpath
|
|
135
139
|
'''
|
|
136
140
|
# import function level dependencies
|
|
137
141
|
from jsonpath_ng import parse
|
|
138
142
|
|
|
139
|
-
from
|
|
143
|
+
from nat.eval.runtime_event_subscriber import pull_intermediate
|
|
140
144
|
|
|
141
145
|
# Run the workflow
|
|
142
146
|
jsonpath_expr = parse(self.config.result_json_path)
|
|
@@ -220,7 +224,7 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
|
|
|
220
224
|
pbar.close()
|
|
221
225
|
|
|
222
226
|
async def run_workflow_remote(self):
|
|
223
|
-
from
|
|
227
|
+
from nat.eval.remote_workflow import EvaluationRemoteWorkflowHandler
|
|
224
228
|
handler = EvaluationRemoteWorkflowHandler(self.config, self.eval_config.general.max_concurrency)
|
|
225
229
|
await handler.run_workflow_remote(self.eval_input)
|
|
226
230
|
for item in self.eval_input.eval_input_items:
|
|
@@ -237,7 +241,7 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
|
|
|
237
241
|
logger.info("Profiler is not enabled. Skipping profiling.")
|
|
238
242
|
return ProfilerResults()
|
|
239
243
|
|
|
240
|
-
from
|
|
244
|
+
from nat.profiler.profile_runner import ProfilerRunner
|
|
241
245
|
|
|
242
246
|
all_stats = []
|
|
243
247
|
for input_item in self.eval_input.eval_input_items:
|
|
@@ -306,7 +310,7 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
|
|
|
306
310
|
except Exception as e:
|
|
307
311
|
logger.exception("Failed to delete old job directory: %s: %s", dir_to_delete, e, exc_info=True)
|
|
308
312
|
|
|
309
|
-
def write_output(self, dataset_handler: DatasetHandler, profiler_results: ProfilerResults):
|
|
313
|
+
def write_output(self, dataset_handler: DatasetHandler, profiler_results: ProfilerResults): # pylint: disable=unused-argument # noqa: E501
|
|
310
314
|
workflow_output_file = self.eval_config.general.output_dir / "workflow_output.json"
|
|
311
315
|
workflow_output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
312
316
|
|
|
@@ -374,17 +378,17 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
|
|
|
374
378
|
await self.weave_eval.afinish_loggers()
|
|
375
379
|
|
|
376
380
|
def apply_overrides(self):
|
|
377
|
-
from
|
|
378
|
-
from
|
|
379
|
-
from
|
|
380
|
-
from
|
|
381
|
-
from
|
|
381
|
+
from nat.cli.cli_utils.config_override import load_and_override_config
|
|
382
|
+
from nat.data_models.config import Config
|
|
383
|
+
from nat.runtime.loader import PluginTypes
|
|
384
|
+
from nat.runtime.loader import discover_and_register_plugins
|
|
385
|
+
from nat.utils.data_models.schema_validator import validate_schema
|
|
382
386
|
|
|
383
387
|
# Register plugins before validation
|
|
384
388
|
discover_and_register_plugins(PluginTypes.CONFIG_OBJECT)
|
|
385
389
|
|
|
386
390
|
config_dict = load_and_override_config(self.config.config_file, self.config.override)
|
|
387
|
-
config = validate_schema(config_dict,
|
|
391
|
+
config = validate_schema(config_dict, Config)
|
|
388
392
|
return config
|
|
389
393
|
|
|
390
394
|
def _get_workflow_alias(self, workflow_type: str | None = None):
|
|
@@ -393,20 +397,20 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
|
|
|
393
397
|
return self.eval_config.general.workflow_alias
|
|
394
398
|
|
|
395
399
|
if not workflow_type or workflow_type == "EmptyFunctionConfig":
|
|
396
|
-
return "
|
|
400
|
+
return "nat-eval"
|
|
397
401
|
|
|
398
402
|
return workflow_type
|
|
399
403
|
|
|
400
404
|
async def run_and_evaluate(self,
|
|
401
|
-
session_manager:
|
|
405
|
+
session_manager: SessionManager | None = None,
|
|
402
406
|
job_id: str | None = None) -> EvaluationRunOutput:
|
|
403
407
|
"""
|
|
404
408
|
Run the workflow with the specified config file and evaluate the dataset
|
|
405
409
|
"""
|
|
406
410
|
logger.info("Starting evaluation run with config file: %s", self.config.config_file)
|
|
407
411
|
|
|
408
|
-
from
|
|
409
|
-
from
|
|
412
|
+
from nat.builder.eval_builder import WorkflowEvalBuilder
|
|
413
|
+
from nat.runtime.loader import load_config
|
|
410
414
|
|
|
411
415
|
# Load and override the config
|
|
412
416
|
if self.config.override:
|
|
@@ -469,8 +473,8 @@ class EvaluationRun: # pylint: disable=too-many-public-methods
|
|
|
469
473
|
else:
|
|
470
474
|
if not self.config.skip_workflow:
|
|
471
475
|
if session_manager is None:
|
|
472
|
-
session_manager =
|
|
473
|
-
|
|
476
|
+
session_manager = SessionManager(eval_workflow.build(),
|
|
477
|
+
max_concurrency=self.eval_config.general.max_concurrency)
|
|
474
478
|
await self.run_workflow_local(session_manager)
|
|
475
479
|
|
|
476
480
|
# Evaluate
|
|
@@ -19,17 +19,21 @@ from abc import abstractmethod
|
|
|
19
19
|
|
|
20
20
|
from tqdm import tqdm
|
|
21
21
|
|
|
22
|
-
from
|
|
23
|
-
from
|
|
24
|
-
from
|
|
25
|
-
from
|
|
26
|
-
from
|
|
22
|
+
from nat.eval.evaluator.evaluator_model import EvalInput
|
|
23
|
+
from nat.eval.evaluator.evaluator_model import EvalInputItem
|
|
24
|
+
from nat.eval.evaluator.evaluator_model import EvalOutput
|
|
25
|
+
from nat.eval.evaluator.evaluator_model import EvalOutputItem
|
|
26
|
+
from nat.eval.utils.tqdm_position_registry import TqdmPositionRegistry
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
class BaseEvaluator(ABC):
|
|
30
30
|
"""
|
|
31
31
|
Base class for custom evaluators.
|
|
32
32
|
|
|
33
|
+
.. warning::
|
|
34
|
+
**Experimental Feature**: The Evaluation API is experimental and may change in future releases.
|
|
35
|
+
Future versions may introduce breaking changes without notice.
|
|
36
|
+
|
|
33
37
|
Each custom evaluator must implement the `evaluate_item` method which is used to evaluate a
|
|
34
38
|
single EvalInputItem.
|
|
35
39
|
"""
|
|
@@ -17,16 +17,16 @@ import typing
|
|
|
17
17
|
|
|
18
18
|
from pydantic import BaseModel
|
|
19
19
|
|
|
20
|
-
from
|
|
20
|
+
from nat.data_models.intermediate_step import IntermediateStep
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class EvalInputItem(BaseModel):
|
|
24
24
|
id: typing.Any
|
|
25
25
|
input_obj: typing.Any
|
|
26
26
|
expected_output_obj: typing.Any
|
|
27
|
-
output_obj: typing.Any
|
|
28
|
-
expected_trajectory: list[IntermediateStep]
|
|
29
|
-
trajectory: list[IntermediateStep]
|
|
27
|
+
output_obj: typing.Any = None # populated by the workflow
|
|
28
|
+
expected_trajectory: list[IntermediateStep] = []
|
|
29
|
+
trajectory: list[IntermediateStep] = [] # populated by the workflow
|
|
30
30
|
full_dataset_entry: typing.Any
|
|
31
31
|
|
|
32
32
|
|
|
@@ -17,8 +17,8 @@ import logging
|
|
|
17
17
|
|
|
18
18
|
from langchain_core.agents import AgentAction
|
|
19
19
|
|
|
20
|
-
from
|
|
21
|
-
from
|
|
20
|
+
from nat.data_models.intermediate_step import IntermediateStep
|
|
21
|
+
from nat.data_models.intermediate_step import IntermediateStepType
|
|
22
22
|
|
|
23
23
|
logger = logging.getLogger(__name__)
|
|
24
24
|
|
|
@@ -25,12 +25,12 @@ from ragas.llms import LangchainLLMWrapper
|
|
|
25
25
|
from ragas.metrics import Metric
|
|
26
26
|
from tqdm import tqdm
|
|
27
27
|
|
|
28
|
-
from
|
|
29
|
-
from
|
|
30
|
-
from
|
|
31
|
-
from
|
|
32
|
-
from
|
|
33
|
-
from
|
|
28
|
+
from nat.data_models.intermediate_step import IntermediateStepType
|
|
29
|
+
from nat.eval.evaluator.evaluator_model import EvalInput
|
|
30
|
+
from nat.eval.evaluator.evaluator_model import EvalInputItem
|
|
31
|
+
from nat.eval.evaluator.evaluator_model import EvalOutput
|
|
32
|
+
from nat.eval.evaluator.evaluator_model import EvalOutputItem
|
|
33
|
+
from nat.eval.utils.tqdm_position_registry import TqdmPositionRegistry
|
|
34
34
|
|
|
35
35
|
logger = logging.getLogger(__name__)
|
|
36
36
|
|
|
@@ -68,7 +68,7 @@ class RAGEvaluator:
|
|
|
68
68
|
|
|
69
69
|
def eval_input_to_ragas(self, eval_input: EvalInput) -> EvaluationDataset:
|
|
70
70
|
"""Converts EvalInput into a Ragas-compatible EvaluationDataset."""
|
|
71
|
-
from
|
|
71
|
+
from nat.eval.intermediate_step_adapter import IntermediateStepAdapter
|
|
72
72
|
event_filter = [IntermediateStepType.TOOL_END, IntermediateStepType.LLM_END, IntermediateStepType.CUSTOM_END]
|
|
73
73
|
samples = []
|
|
74
74
|
|
|
@@ -99,7 +99,7 @@ class RAGEvaluator:
|
|
|
99
99
|
return EvaluationDataset(samples=samples)
|
|
100
100
|
|
|
101
101
|
def ragas_to_eval_output(self, eval_input: EvalInput, results_dataset: EvaluationResult | None) -> EvalOutput:
|
|
102
|
-
"""Converts the ragas EvaluationResult to
|
|
102
|
+
"""Converts the ragas EvaluationResult to nat EvalOutput"""
|
|
103
103
|
|
|
104
104
|
if not results_dataset:
|
|
105
105
|
logger.error("Ragas evaluation failed with no results")
|
|
@@ -19,13 +19,13 @@ from pydantic import BaseModel
|
|
|
19
19
|
from pydantic import Field
|
|
20
20
|
from pydantic import model_validator
|
|
21
21
|
|
|
22
|
-
from
|
|
23
|
-
from
|
|
24
|
-
from
|
|
25
|
-
from
|
|
26
|
-
from
|
|
27
|
-
from
|
|
28
|
-
from
|
|
22
|
+
from nat.builder.builder import EvalBuilder
|
|
23
|
+
from nat.builder.evaluator import EvaluatorInfo
|
|
24
|
+
from nat.builder.framework_enum import LLMFrameworkEnum
|
|
25
|
+
from nat.cli.register_workflow import register_evaluator
|
|
26
|
+
from nat.data_models.evaluator import EvaluatorBaseConfig
|
|
27
|
+
from nat.eval.evaluator.evaluator_model import EvalInput
|
|
28
|
+
from nat.eval.evaluator.evaluator_model import EvalOutput
|
|
29
29
|
|
|
30
30
|
logger = logging.getLogger(__name__)
|
|
31
31
|
|
|
@@ -21,13 +21,13 @@ import aiohttp
|
|
|
21
21
|
from pydantic import ValidationError
|
|
22
22
|
from tqdm import tqdm
|
|
23
23
|
|
|
24
|
-
from
|
|
25
|
-
from
|
|
26
|
-
from
|
|
27
|
-
from
|
|
28
|
-
from
|
|
29
|
-
from
|
|
30
|
-
from
|
|
24
|
+
from nat.data_models.api_server import ResponseIntermediateStep
|
|
25
|
+
from nat.data_models.intermediate_step import IntermediateStep
|
|
26
|
+
from nat.data_models.intermediate_step import IntermediateStepPayload
|
|
27
|
+
from nat.data_models.invocation_node import InvocationNode
|
|
28
|
+
from nat.eval.config import EvaluationRunConfig
|
|
29
|
+
from nat.eval.evaluator.evaluator_model import EvalInput
|
|
30
|
+
from nat.eval.evaluator.evaluator_model import EvalInputItem
|
|
31
31
|
|
|
32
32
|
logger = logging.getLogger(__name__)
|
|
33
33
|
|
|
@@ -80,7 +80,7 @@ class EvaluationRemoteWorkflowHandler:
|
|
|
80
80
|
# This is an intermediate step
|
|
81
81
|
try:
|
|
82
82
|
step_data = json.loads(line[len(INTERMEDIATE_DATA_PREFIX):])
|
|
83
|
-
response_intermediate =
|
|
83
|
+
response_intermediate = ResponseIntermediateStep.model_validate(step_data)
|
|
84
84
|
# The payload is expected to be IntermediateStepPayload
|
|
85
85
|
payload = IntermediateStepPayload.model_validate_json(response_intermediate.payload)
|
|
86
86
|
intermediate_step = IntermediateStep(parent_id="remote",
|
|
@@ -17,8 +17,8 @@ import typing
|
|
|
17
17
|
|
|
18
18
|
from pydantic import BaseModel
|
|
19
19
|
|
|
20
|
-
from
|
|
21
|
-
from
|
|
20
|
+
from nat.eval.config import EvaluationRunConfig
|
|
21
|
+
from nat.eval.config import EvaluationRunOutput
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class MultiEvaluationRunConfig(BaseModel):
|
|
@@ -16,10 +16,10 @@
|
|
|
16
16
|
import copy
|
|
17
17
|
import typing
|
|
18
18
|
|
|
19
|
-
from
|
|
20
|
-
from
|
|
21
|
-
from
|
|
22
|
-
from
|
|
19
|
+
from nat.eval.config import EvaluationRunConfig
|
|
20
|
+
from nat.eval.config import EvaluationRunOutput
|
|
21
|
+
from nat.eval.evaluate import EvaluationRun
|
|
22
|
+
from nat.eval.runners.config import MultiEvaluationRunConfig
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class MultiEvaluationRunner:
|
|
@@ -16,8 +16,8 @@
|
|
|
16
16
|
import asyncio
|
|
17
17
|
import logging
|
|
18
18
|
|
|
19
|
-
from
|
|
20
|
-
from
|
|
19
|
+
from nat.builder.context import Context
|
|
20
|
+
from nat.data_models.intermediate_step import IntermediateStep
|
|
21
21
|
|
|
22
22
|
logger = logging.getLogger(__name__)
|
|
23
23
|
|
|
@@ -30,7 +30,7 @@ def pull_intermediate() -> asyncio.Future[list[dict]]:
|
|
|
30
30
|
"""
|
|
31
31
|
future = asyncio.Future()
|
|
32
32
|
intermediate_steps = [] # We'll store the dumped steps here.
|
|
33
|
-
context =
|
|
33
|
+
context = Context.get()
|
|
34
34
|
|
|
35
35
|
def on_next_cb(item: IntermediateStep):
|
|
36
36
|
# Append each new intermediate step (dumped to dict) to the list.
|
|
@@ -19,10 +19,10 @@ import os
|
|
|
19
19
|
import shutil
|
|
20
20
|
from pathlib import Path
|
|
21
21
|
|
|
22
|
-
from
|
|
23
|
-
from
|
|
24
|
-
from
|
|
25
|
-
from
|
|
22
|
+
from nat.data_models.swe_bench_model import SWEBenchInput
|
|
23
|
+
from nat.data_models.swe_bench_model import SWEBenchOutput
|
|
24
|
+
from nat.eval.evaluator.evaluator_model import EvalInput
|
|
25
|
+
from nat.eval.evaluator.evaluator_model import EvalOutput
|
|
26
26
|
|
|
27
27
|
try:
|
|
28
28
|
import swebench.harness.run_evaluation as swebench_eval
|
|
@@ -123,7 +123,7 @@ class SweBenchEvaluator:
|
|
|
123
123
|
for s in swebench_inputs if s not in supported_inputs})
|
|
124
124
|
|
|
125
125
|
# Write SWEBenchInput to file
|
|
126
|
-
workflow_input_file = self.output_dir / "
|
|
126
|
+
workflow_input_file = self.output_dir / "nat_workflow_input.json"
|
|
127
127
|
workflow_input_file.parent.mkdir(parents=True, exist_ok=True)
|
|
128
128
|
Path(workflow_input_file).write_text(json.dumps([swebench.model_dump() for swebench in supported_inputs],
|
|
129
129
|
indent=2),
|
|
@@ -139,7 +139,7 @@ class SweBenchEvaluator:
|
|
|
139
139
|
return None, None
|
|
140
140
|
|
|
141
141
|
# Write SWEBenchOutput to file
|
|
142
|
-
workflow_output_file = self.output_dir / "
|
|
142
|
+
workflow_output_file = self.output_dir / "nat_workflow_output.json"
|
|
143
143
|
Path(workflow_output_file).write_text(json.dumps([output.model_dump() for output in filtered_outputs],
|
|
144
144
|
indent=2),
|
|
145
145
|
encoding="utf-8")
|
|
@@ -15,10 +15,10 @@
|
|
|
15
15
|
|
|
16
16
|
from pydantic import Field
|
|
17
17
|
|
|
18
|
-
from
|
|
19
|
-
from
|
|
20
|
-
from
|
|
21
|
-
from
|
|
18
|
+
from nat.builder.builder import EvalBuilder
|
|
19
|
+
from nat.builder.evaluator import EvaluatorInfo
|
|
20
|
+
from nat.cli.register_workflow import register_evaluator
|
|
21
|
+
from nat.data_models.evaluator import EvaluatorBaseConfig
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class SweBenchEvaluatorConfig(EvaluatorBaseConfig, name="swe_bench"):
|
|
@@ -19,9 +19,9 @@ from langchain.evaluation import TrajectoryEvalChain
|
|
|
19
19
|
from langchain_core.language_models import BaseChatModel
|
|
20
20
|
from langchain_core.tools import BaseTool
|
|
21
21
|
|
|
22
|
-
from
|
|
23
|
-
from
|
|
24
|
-
from
|
|
22
|
+
from nat.eval.evaluator.base_evaluator import BaseEvaluator
|
|
23
|
+
from nat.eval.evaluator.evaluator_model import EvalInputItem
|
|
24
|
+
from nat.eval.evaluator.evaluator_model import EvalOutputItem
|
|
25
25
|
|
|
26
26
|
logger = logging.getLogger(__name__)
|
|
27
27
|
|
|
@@ -48,8 +48,8 @@ class TrajectoryEvaluator(BaseEvaluator):
|
|
|
48
48
|
"""
|
|
49
49
|
Evaluate a single EvalInputItem and return an EvalOutputItem.
|
|
50
50
|
"""
|
|
51
|
-
from
|
|
52
|
-
from
|
|
51
|
+
from nat.data_models.intermediate_step import IntermediateStepType
|
|
52
|
+
from nat.eval.intermediate_step_adapter import IntermediateStepAdapter
|
|
53
53
|
|
|
54
54
|
intermediate_step_adapter = IntermediateStepAdapter()
|
|
55
55
|
event_filter = [IntermediateStepType.LLM_END, IntermediateStepType.TOOL_END]
|
|
@@ -15,10 +15,10 @@
|
|
|
15
15
|
|
|
16
16
|
from pydantic import Field
|
|
17
17
|
|
|
18
|
-
from
|
|
19
|
-
from
|
|
20
|
-
from
|
|
21
|
-
from
|
|
18
|
+
from nat.builder.builder import EvalBuilder
|
|
19
|
+
from nat.builder.evaluator import EvaluatorInfo
|
|
20
|
+
from nat.cli.register_workflow import register_evaluator
|
|
21
|
+
from nat.data_models.evaluator import EvaluatorBaseConfig
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class TrajectoryEvaluatorConfig(EvaluatorBaseConfig, name="trajectory"):
|
|
@@ -29,7 +29,7 @@ class TrajectoryEvaluatorConfig(EvaluatorBaseConfig, name="trajectory"):
|
|
|
29
29
|
|
|
30
30
|
@register_evaluator(config_type=TrajectoryEvaluatorConfig)
|
|
31
31
|
async def register_trajectory_evaluator(config: TrajectoryEvaluatorConfig, builder: EvalBuilder):
|
|
32
|
-
from
|
|
32
|
+
from nat.builder.framework_enum import LLMFrameworkEnum
|
|
33
33
|
|
|
34
34
|
from .evaluate import TrajectoryEvaluator
|
|
35
35
|
llm = await builder.get_llm(config.llm_name, wrapper_type=LLMFrameworkEnum.LANGCHAIN)
|