eval-protocol 0.2.41__tar.gz → 0.2.43__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.2.41/eval_protocol.egg-info → eval_protocol-0.2.43}/PKG-INFO +1 -1
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/_version.py +3 -3
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/fireworks_tracing.py +1 -3
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/auth.py +92 -28
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli.py +109 -20
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/upload.py +171 -30
- eval_protocol-0.2.43/eval_protocol/directory_utils.py +39 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/evaluation.py +17 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/platform_api.py +20 -9
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/remote_rollout_processor.py +22 -3
- {eval_protocol-0.2.41 → eval_protocol-0.2.43/eval_protocol.egg-info}/PKG-INFO +1 -1
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol.egg-info/SOURCES.txt +4 -2
- eval_protocol-0.2.43/tests/test_directory_utils.py +95 -0
- eval_protocol-0.2.43/tests/test_upload_entrypoint.py +227 -0
- eval_protocol-0.2.41/vite-app/dist/assets/index-D3tKqxWU.js → eval_protocol-0.2.43/vite-app/dist/assets/index-C81y9r9l.js +2 -2
- eval_protocol-0.2.41/vite-app/dist/assets/index-D3tKqxWU.js.map → eval_protocol-0.2.43/vite-app/dist/assets/index-C81y9r9l.js.map +1 -1
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vite-app/dist/index.html +1 -1
- eval_protocol-0.2.41/eval_protocol/directory_utils.py +0 -55
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/LICENSE +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/README.md +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/development/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/development/utils/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/base.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/bigquery.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/langchain.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/langsmith.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/openai_responses.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/adapters/utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/test_aime25.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/test_gpqa.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/deploy.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/preview.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/common_utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/config.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/data_loader/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/data_loader/factory_data_loader.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/data_loader/inline_data_loader.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/data_loader/models.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/event_bus/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/generation/clients.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/log_utils/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/execution/manager.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/README.md +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/server.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/models.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/evaluation_test.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/exception_config.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/execution.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/handle_persist_flow.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/parameterize.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/plugin.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/rollout_processor.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/store_experiment_link.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/store_results_url.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/pytest/validate_signature.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/quickstart/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/quickstart/llm_judge.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/quickstart/llm_judge_langfuse.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/quickstart/llm_judge_langsmith.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/quickstart/llm_judge_openai_responses.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/quickstart/utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/server.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/types/errors.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/types/remote_rollout_processor.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/check_server_status.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/logs_models.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/show_results_url.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/subprocess_utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol.egg-info/requires.txt +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/pyproject.toml +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/setup.cfg +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/setup.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_auth.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_cli.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_config.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_deploy_integration.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_evaluation_integration.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_evaluation_postprocess.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_evaluation_preview_integration.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_format.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_human_id.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_integration.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_length.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_logs_server.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_logs_server_simple.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_math.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_minimal.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_models.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_packaging.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_quickstart_utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_readiness.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_repetition.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_retry_mechanism.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_server.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_show_results_url.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_status_migration_changes.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_status_migration_integration.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_status_model.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_tau_bench_airline_smoke.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/tests/test_vite_server.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/airline/policy.md +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/mock/policy.md +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/retail/policy.md +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/versioneer.py +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vite-app/dist/assets/index-DpYZaoAr.css +0 -0
- {eval_protocol-0.2.41 → eval_protocol-0.2.43}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.43
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-10-
|
|
11
|
+
"date": "2025-10-08T08:52:41-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.2.
|
|
14
|
+
"full-revisionid": "535169e7193e6500d8d323e7dbc31c14dca98b96",
|
|
15
|
+
"version": "0.2.43"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -371,9 +371,7 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
371
371
|
error_msg = error_detail or e.response.text
|
|
372
372
|
|
|
373
373
|
# Retry on 404 if it's due to incomplete/missing traces (backend still indexing)
|
|
374
|
-
if e.response.status_code == 404
|
|
375
|
-
"Incomplete traces" in error_detail or "No traces found" in error_detail
|
|
376
|
-
):
|
|
374
|
+
if e.response.status_code == 404:
|
|
377
375
|
should_retry = True
|
|
378
376
|
except Exception:
|
|
379
377
|
error_msg = e.response.text
|
|
@@ -6,10 +6,48 @@ from typing import Dict, Optional # Added Dict
|
|
|
6
6
|
|
|
7
7
|
logger = logging.getLogger(__name__)
|
|
8
8
|
|
|
9
|
+
# Default locations (used for tests and as fallback). Actual resolution is dynamic via _get_auth_ini_file().
|
|
9
10
|
FIREWORKS_CONFIG_DIR = Path.home() / ".fireworks"
|
|
10
11
|
AUTH_INI_FILE = FIREWORKS_CONFIG_DIR / "auth.ini"
|
|
11
12
|
|
|
12
13
|
|
|
14
|
+
def _get_profile_base_dir() -> Path:
|
|
15
|
+
"""
|
|
16
|
+
Resolve the Fireworks configuration base directory following firectl behavior:
|
|
17
|
+
- Default: ~/.fireworks
|
|
18
|
+
- If FIREWORKS_PROFILE is set and non-empty: ~/.fireworks/profiles/<profile>
|
|
19
|
+
"""
|
|
20
|
+
profile_name = os.environ.get("FIREWORKS_PROFILE", "").strip()
|
|
21
|
+
base_dir = Path.home() / ".fireworks"
|
|
22
|
+
if profile_name:
|
|
23
|
+
base_dir = base_dir / "profiles" / profile_name
|
|
24
|
+
return base_dir
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _get_auth_ini_file() -> Path:
|
|
28
|
+
"""
|
|
29
|
+
Determine the auth.ini file path.
|
|
30
|
+
Priority:
|
|
31
|
+
1) FIREWORKS_AUTH_FILE env var when set
|
|
32
|
+
2) ~/.fireworks[/profiles/<profile>]/auth.ini (profile driven)
|
|
33
|
+
"""
|
|
34
|
+
auth_file_env = os.environ.get("FIREWORKS_AUTH_FILE")
|
|
35
|
+
if auth_file_env:
|
|
36
|
+
return Path(auth_file_env)
|
|
37
|
+
return _get_profile_base_dir() / "auth.ini"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _is_profile_active() -> bool:
|
|
41
|
+
"""
|
|
42
|
+
Returns True if a specific profile or explicit auth file is active.
|
|
43
|
+
In this case, profile-based credentials should take precedence over env vars.
|
|
44
|
+
"""
|
|
45
|
+
if os.environ.get("FIREWORKS_AUTH_FILE"):
|
|
46
|
+
return True
|
|
47
|
+
prof = os.environ.get("FIREWORKS_PROFILE", "").strip()
|
|
48
|
+
return bool(prof)
|
|
49
|
+
|
|
50
|
+
|
|
13
51
|
def _parse_simple_auth_file(file_path: Path) -> Dict[str, str]:
|
|
14
52
|
"""
|
|
15
53
|
Parses an auth file with simple key=value lines.
|
|
@@ -20,7 +58,7 @@ def _parse_simple_auth_file(file_path: Path) -> Dict[str, str]:
|
|
|
20
58
|
if not file_path.exists():
|
|
21
59
|
return creds
|
|
22
60
|
try:
|
|
23
|
-
with open(file_path, "r") as f:
|
|
61
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
24
62
|
for line in f:
|
|
25
63
|
line = line.strip()
|
|
26
64
|
if not line or line.startswith("#") or line.startswith(";"):
|
|
@@ -39,7 +77,7 @@ def _parse_simple_auth_file(file_path: Path) -> Dict[str, str]:
|
|
|
39
77
|
if key in ["api_key", "account_id"] and value:
|
|
40
78
|
creds[key] = value
|
|
41
79
|
except Exception as e:
|
|
42
|
-
logger.warning(
|
|
80
|
+
logger.warning("Error during simple parsing of %s: %s", str(file_path), e)
|
|
43
81
|
return creds
|
|
44
82
|
|
|
45
83
|
|
|
@@ -48,13 +86,14 @@ def _get_credential_from_config_file(key_name: str) -> Optional[str]:
|
|
|
48
86
|
Helper to get a specific credential (api_key or account_id) from auth.ini.
|
|
49
87
|
Tries simple parsing first, then configparser.
|
|
50
88
|
"""
|
|
51
|
-
|
|
89
|
+
auth_ini_path = _get_auth_ini_file()
|
|
90
|
+
if not auth_ini_path.exists():
|
|
52
91
|
return None
|
|
53
92
|
|
|
54
93
|
# 1. Try simple key-value parsing first
|
|
55
|
-
simple_creds = _parse_simple_auth_file(
|
|
94
|
+
simple_creds = _parse_simple_auth_file(auth_ini_path)
|
|
56
95
|
if key_name in simple_creds:
|
|
57
|
-
logger.debug(
|
|
96
|
+
logger.debug("Using %s from simple key-value parsing of %s.", key_name, str(auth_ini_path))
|
|
58
97
|
return simple_creds[key_name]
|
|
59
98
|
|
|
60
99
|
# 2. Fallback to configparser if not found via simple parsing or if simple parsing failed
|
|
@@ -62,30 +101,35 @@ def _get_credential_from_config_file(key_name: str) -> Optional[str]:
|
|
|
62
101
|
# but only if simple parsing didn't yield the key.
|
|
63
102
|
try:
|
|
64
103
|
config = configparser.ConfigParser()
|
|
65
|
-
config.read(
|
|
104
|
+
config.read(auth_ini_path)
|
|
66
105
|
|
|
67
106
|
# Try [fireworks] section
|
|
68
107
|
if "fireworks" in config and config.has_option("fireworks", key_name):
|
|
69
108
|
value_from_file = config.get("fireworks", key_name)
|
|
70
109
|
if value_from_file:
|
|
71
|
-
logger.debug(
|
|
110
|
+
logger.debug("Using %s from [fireworks] section in %s.", key_name, str(auth_ini_path))
|
|
72
111
|
return value_from_file
|
|
73
112
|
|
|
74
113
|
# Try default section (configparser might place items without section header here)
|
|
75
114
|
if config.has_option(config.default_section, key_name):
|
|
76
115
|
value_from_default = config.get(config.default_section, key_name)
|
|
77
116
|
if value_from_default:
|
|
78
|
-
logger.debug(
|
|
117
|
+
logger.debug(
|
|
118
|
+
"Using %s from default section [%s] in %s.",
|
|
119
|
+
key_name,
|
|
120
|
+
config.default_section,
|
|
121
|
+
str(auth_ini_path),
|
|
122
|
+
)
|
|
79
123
|
return value_from_default
|
|
80
124
|
|
|
81
125
|
except configparser.MissingSectionHeaderError:
|
|
82
126
|
# This error implies the file is purely key-value, which simple parsing should have handled.
|
|
83
127
|
# If simple parsing failed to get the key, then it's likely not there or malformed.
|
|
84
|
-
logger.debug(
|
|
128
|
+
logger.debug("%s has no section headers, and simple parsing did not find %s.", str(auth_ini_path), key_name)
|
|
85
129
|
except configparser.Error as e_config:
|
|
86
|
-
logger.warning(
|
|
130
|
+
logger.warning("Configparser error reading %s for %s: %s", str(auth_ini_path), key_name, e_config)
|
|
87
131
|
except Exception as e_general:
|
|
88
|
-
logger.warning(
|
|
132
|
+
logger.warning("Unexpected error reading %s for %s: %s", str(auth_ini_path), key_name, e_general)
|
|
89
133
|
|
|
90
134
|
return None
|
|
91
135
|
|
|
@@ -101,14 +145,24 @@ def get_fireworks_api_key() -> Optional[str]:
|
|
|
101
145
|
Returns:
|
|
102
146
|
The API key if found, otherwise None.
|
|
103
147
|
"""
|
|
104
|
-
|
|
105
|
-
if
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
148
|
+
# If a profile is active, prefer profile file first, then env
|
|
149
|
+
if _is_profile_active():
|
|
150
|
+
api_key_from_file = _get_credential_from_config_file("api_key")
|
|
151
|
+
if api_key_from_file:
|
|
152
|
+
return api_key_from_file
|
|
153
|
+
api_key = os.environ.get("FIREWORKS_API_KEY")
|
|
154
|
+
if api_key:
|
|
155
|
+
logger.debug("Using FIREWORKS_API_KEY from environment variable (profile active but file missing).")
|
|
156
|
+
return api_key
|
|
157
|
+
else:
|
|
158
|
+
# Default behavior: env overrides file
|
|
159
|
+
api_key = os.environ.get("FIREWORKS_API_KEY")
|
|
160
|
+
if api_key:
|
|
161
|
+
logger.debug("Using FIREWORKS_API_KEY from environment variable.")
|
|
162
|
+
return api_key
|
|
163
|
+
api_key_from_file = _get_credential_from_config_file("api_key")
|
|
164
|
+
if api_key_from_file:
|
|
165
|
+
return api_key_from_file
|
|
112
166
|
|
|
113
167
|
logger.debug("Fireworks API key not found in environment variables or auth.ini.")
|
|
114
168
|
return None
|
|
@@ -125,14 +179,24 @@ def get_fireworks_account_id() -> Optional[str]:
|
|
|
125
179
|
Returns:
|
|
126
180
|
The Account ID if found, otherwise None.
|
|
127
181
|
"""
|
|
128
|
-
|
|
129
|
-
if
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
182
|
+
# If a profile is active, prefer profile file first, then env
|
|
183
|
+
if _is_profile_active():
|
|
184
|
+
account_id_from_file = _get_credential_from_config_file("account_id")
|
|
185
|
+
if account_id_from_file:
|
|
186
|
+
return account_id_from_file
|
|
187
|
+
account_id = os.environ.get("FIREWORKS_ACCOUNT_ID")
|
|
188
|
+
if account_id:
|
|
189
|
+
logger.debug("Using FIREWORKS_ACCOUNT_ID from environment variable (profile active but file missing).")
|
|
190
|
+
return account_id
|
|
191
|
+
else:
|
|
192
|
+
# Default behavior: env overrides file
|
|
193
|
+
account_id = os.environ.get("FIREWORKS_ACCOUNT_ID")
|
|
194
|
+
if account_id:
|
|
195
|
+
logger.debug("Using FIREWORKS_ACCOUNT_ID from environment variable.")
|
|
196
|
+
return account_id
|
|
197
|
+
account_id_from_file = _get_credential_from_config_file("account_id")
|
|
198
|
+
if account_id_from_file:
|
|
199
|
+
return account_id_from_file
|
|
136
200
|
|
|
137
201
|
logger.debug("Fireworks Account ID not found in environment variables or auth.ini.")
|
|
138
202
|
return None
|
|
@@ -152,5 +216,5 @@ def get_fireworks_api_base() -> str:
|
|
|
152
216
|
if os.environ.get("FIREWORKS_API_BASE"):
|
|
153
217
|
logger.debug("Using FIREWORKS_API_BASE from environment variable.")
|
|
154
218
|
else:
|
|
155
|
-
logger.debug(
|
|
219
|
+
logger.debug("FIREWORKS_API_BASE not set in environment, defaulting to %s.", api_base)
|
|
156
220
|
return api_base
|
|
@@ -3,32 +3,47 @@ Command-line interface for Eval Protocol.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import argparse
|
|
6
|
-
import asyncio
|
|
7
|
-
import json
|
|
8
6
|
import logging
|
|
9
7
|
import os
|
|
10
8
|
import sys
|
|
11
|
-
import traceback
|
|
12
|
-
import uuid
|
|
13
9
|
from pathlib import Path
|
|
10
|
+
from typing import Any, cast
|
|
14
11
|
|
|
15
12
|
logger = logging.getLogger(__name__)
|
|
16
13
|
|
|
17
14
|
|
|
18
|
-
from .cli_commands.agent_eval_cmd import agent_eval_command
|
|
19
15
|
from .cli_commands.common import setup_logging
|
|
20
|
-
|
|
21
|
-
from .
|
|
22
|
-
|
|
23
|
-
from .cli_commands
|
|
24
|
-
|
|
25
|
-
|
|
16
|
+
|
|
17
|
+
# Re-export deploy_command for backward compatibility with tests importing from eval_protocol.cli
|
|
18
|
+
try: # pragma: no cover - import-time alias for tests
|
|
19
|
+
from .cli_commands import deploy as _deploy_mod
|
|
20
|
+
|
|
21
|
+
deploy_command = _deploy_mod.deploy_command # type: ignore[attr-defined]
|
|
22
|
+
except Exception: # pragma: no cover
|
|
23
|
+
# If import fails in constrained environments, tests that import it will surface the issue
|
|
24
|
+
deploy_command = None # type: ignore[assignment]
|
|
25
|
+
|
|
26
|
+
# Re-export preview_command for backward compatibility with tests importing from eval_protocol.cli
|
|
27
|
+
try: # pragma: no cover - import-time alias for tests
|
|
28
|
+
from .cli_commands import preview as _preview_mod
|
|
29
|
+
|
|
30
|
+
preview_command = _preview_mod.preview_command # type: ignore[attr-defined]
|
|
31
|
+
except Exception: # pragma: no cover
|
|
32
|
+
preview_command = None # type: ignore[assignment]
|
|
26
33
|
|
|
27
34
|
|
|
28
35
|
def parse_args(args=None):
|
|
29
36
|
"""Parse command line arguments"""
|
|
30
37
|
parser = argparse.ArgumentParser(description="eval-protocol: Tools for evaluation and reward modeling")
|
|
31
38
|
parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
|
|
39
|
+
parser.add_argument(
|
|
40
|
+
"--profile",
|
|
41
|
+
help="Fireworks profile to use (reads ~/.fireworks/profiles/<name>/auth.ini and settings.ini)",
|
|
42
|
+
)
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"--server",
|
|
45
|
+
help="Fireworks API server hostname or URL (e.g., dev.api.fireworks.ai or https://dev.api.fireworks.ai)",
|
|
46
|
+
)
|
|
32
47
|
|
|
33
48
|
subparsers = parser.add_subparsers(dest="command", help="Command to run")
|
|
34
49
|
|
|
@@ -356,12 +371,68 @@ def main():
|
|
|
356
371
|
os.environ["PYTHONPATH"] = f"{current_dir}{os.pathsep}{current_pythonpath}"
|
|
357
372
|
else:
|
|
358
373
|
os.environ["PYTHONPATH"] = current_dir
|
|
359
|
-
logger.debug(
|
|
374
|
+
logger.debug("Added current directory to PYTHONPATH: %s", current_dir)
|
|
360
375
|
|
|
361
376
|
# Also add to sys.path so it takes effect immediately for the current process
|
|
362
377
|
if current_dir not in sys.path:
|
|
363
378
|
sys.path.insert(0, current_dir)
|
|
364
379
|
|
|
380
|
+
# Pre-scan raw argv for global flags anywhere (before parsing or imports)
|
|
381
|
+
raw_argv = sys.argv[1:]
|
|
382
|
+
|
|
383
|
+
def _extract_flag_value(argv_list, flag_name):
|
|
384
|
+
# Supports --flag value and --flag=value
|
|
385
|
+
for i, tok in enumerate(argv_list):
|
|
386
|
+
if tok == flag_name:
|
|
387
|
+
if i + 1 < len(argv_list):
|
|
388
|
+
return argv_list[i + 1]
|
|
389
|
+
elif tok.startswith(flag_name + "="):
|
|
390
|
+
return tok.split("=", 1)[1]
|
|
391
|
+
return None
|
|
392
|
+
|
|
393
|
+
pre_profile = _extract_flag_value(raw_argv, "--profile")
|
|
394
|
+
pre_server = _extract_flag_value(raw_argv, "--server")
|
|
395
|
+
|
|
396
|
+
# Handle Fireworks profile selection early so downstream modules see the env
|
|
397
|
+
profile = pre_profile
|
|
398
|
+
if profile:
|
|
399
|
+
try:
|
|
400
|
+
os.environ["FIREWORKS_PROFILE"] = profile
|
|
401
|
+
# Mirror firectl behavior: ~/.fireworks[/profiles/<profile>]
|
|
402
|
+
base_dir = Path.home() / ".fireworks"
|
|
403
|
+
if profile:
|
|
404
|
+
base_dir = base_dir / "profiles" / profile
|
|
405
|
+
os.makedirs(str(base_dir), mode=0o700, exist_ok=True)
|
|
406
|
+
|
|
407
|
+
# Provide helpful env hints for consumers (optional)
|
|
408
|
+
os.environ["FIREWORKS_AUTH_FILE"] = str(base_dir / "auth.ini")
|
|
409
|
+
os.environ["FIREWORKS_SETTINGS_FILE"] = str(base_dir / "settings.ini")
|
|
410
|
+
logger.debug("Using Fireworks profile '%s' at %s", profile, base_dir)
|
|
411
|
+
except OSError as e:
|
|
412
|
+
logger.warning("Failed to initialize Fireworks profile '%s': %s", profile, e)
|
|
413
|
+
|
|
414
|
+
# Proactively resolve and export account_id from the active profile to avoid stale .env overrides
|
|
415
|
+
try:
|
|
416
|
+
from eval_protocol.auth import get_fireworks_account_id as _resolve_account_id
|
|
417
|
+
|
|
418
|
+
resolved_account = _resolve_account_id()
|
|
419
|
+
if resolved_account:
|
|
420
|
+
os.environ["FIREWORKS_ACCOUNT_ID"] = resolved_account
|
|
421
|
+
logger.debug("Resolved account_id from profile '%s': %s", profile, resolved_account)
|
|
422
|
+
except Exception as e: # noqa: B902
|
|
423
|
+
logger.debug("Unable to resolve account_id from profile '%s': %s", profile, e)
|
|
424
|
+
|
|
425
|
+
# Handle Fireworks server selection early
|
|
426
|
+
server = pre_server
|
|
427
|
+
if server:
|
|
428
|
+
# Normalize to full URL if just a hostname is supplied
|
|
429
|
+
normalized = server.strip()
|
|
430
|
+
if not normalized.startswith("http://") and not normalized.startswith("https://"):
|
|
431
|
+
normalized = f"https://{normalized}"
|
|
432
|
+
os.environ["FIREWORKS_API_BASE"] = normalized
|
|
433
|
+
logger.debug("Using Fireworks API base: %s", normalized)
|
|
434
|
+
|
|
435
|
+
# Now parse args normally (so help/commands work), after globals applied
|
|
365
436
|
# Store original sys.argv[0] because Hydra might manipulate it
|
|
366
437
|
# and we need it if we're not calling a Hydra app.
|
|
367
438
|
original_script_name = sys.argv[0]
|
|
@@ -370,16 +441,28 @@ def main():
|
|
|
370
441
|
setup_logging(args.verbose, getattr(args, "debug", False))
|
|
371
442
|
|
|
372
443
|
if args.command == "preview":
|
|
444
|
+
if preview_command is None:
|
|
445
|
+
raise ImportError("preview_command is unavailable")
|
|
373
446
|
return preview_command(args)
|
|
374
447
|
elif args.command == "deploy":
|
|
448
|
+
if deploy_command is None:
|
|
449
|
+
raise ImportError("deploy_command is unavailable")
|
|
375
450
|
return deploy_command(args)
|
|
376
451
|
elif args.command == "deploy-mcp":
|
|
452
|
+
from .cli_commands.deploy_mcp import deploy_mcp_command
|
|
453
|
+
|
|
377
454
|
return deploy_mcp_command(args)
|
|
378
455
|
elif args.command == "agent-eval":
|
|
456
|
+
from .cli_commands.agent_eval_cmd import agent_eval_command
|
|
457
|
+
|
|
379
458
|
return agent_eval_command(args)
|
|
380
459
|
elif args.command == "logs":
|
|
460
|
+
from .cli_commands.logs import logs_command
|
|
461
|
+
|
|
381
462
|
return logs_command(args)
|
|
382
463
|
elif args.command == "upload":
|
|
464
|
+
from .cli_commands.upload import upload_command
|
|
465
|
+
|
|
383
466
|
return upload_command(args)
|
|
384
467
|
elif args.command == "run":
|
|
385
468
|
# For the 'run' command, Hydra takes over argument parsing.
|
|
@@ -393,7 +476,7 @@ def main():
|
|
|
393
476
|
local_conf_dir = os.path.join(current_dir, "conf")
|
|
394
477
|
|
|
395
478
|
if not has_config_path and os.path.isdir(local_conf_dir):
|
|
396
|
-
logger.info(
|
|
479
|
+
logger.info("Auto-detected local conf directory: %s", local_conf_dir)
|
|
397
480
|
hydra_specific_args = [
|
|
398
481
|
"--config-path",
|
|
399
482
|
local_conf_dir,
|
|
@@ -410,18 +493,21 @@ def main():
|
|
|
410
493
|
path_val = hydra_specific_args[i]
|
|
411
494
|
abs_path = os.path.abspath(path_val)
|
|
412
495
|
logger.debug(
|
|
413
|
-
|
|
496
|
+
"Converting relative --config-path '%s' (space separated) to absolute '%s'",
|
|
497
|
+
path_val,
|
|
498
|
+
abs_path,
|
|
414
499
|
)
|
|
415
500
|
processed_hydra_args.append(abs_path)
|
|
416
501
|
else:
|
|
417
502
|
logger.error("--config-path specified without a value.")
|
|
418
|
-
pass
|
|
419
503
|
elif arg.startswith("--config-path="):
|
|
420
504
|
flag_part, path_val = arg.split("=", 1)
|
|
421
505
|
processed_hydra_args.append(flag_part)
|
|
422
506
|
abs_path = os.path.abspath(path_val)
|
|
423
507
|
logger.debug(
|
|
424
|
-
|
|
508
|
+
"Converting relative --config-path '%s' (equals separated) to absolute '%s'",
|
|
509
|
+
path_val,
|
|
510
|
+
abs_path,
|
|
425
511
|
)
|
|
426
512
|
processed_hydra_args.append(abs_path)
|
|
427
513
|
else:
|
|
@@ -429,14 +515,17 @@ def main():
|
|
|
429
515
|
i += 1
|
|
430
516
|
|
|
431
517
|
sys.argv = [sys.argv[0]] + processed_hydra_args
|
|
432
|
-
logger.info(
|
|
518
|
+
logger.info("SYSCALL_ARGV_FOR_HYDRA (after potential abspath conversion): %s", sys.argv)
|
|
433
519
|
|
|
434
520
|
try:
|
|
435
|
-
hydra_cli_entry_point
|
|
521
|
+
from .cli_commands.run_eval_cmd import hydra_cli_entry_point
|
|
522
|
+
|
|
523
|
+
hydra_entry = cast(Any, hydra_cli_entry_point)
|
|
524
|
+
hydra_entry() # type: ignore # pylint: disable=no-value-for-parameter
|
|
436
525
|
return 0
|
|
437
|
-
except Exception as e:
|
|
526
|
+
except Exception as e: # pylint: disable=broad-except
|
|
438
527
|
error_msg = str(e)
|
|
439
|
-
logger.error(
|
|
528
|
+
logger.error("Evaluation failed: %s", e)
|
|
440
529
|
|
|
441
530
|
# Provide helpful suggestions for common Hydra/config errors
|
|
442
531
|
if "Cannot find primary config" in error_msg:
|