eval-protocol 0.3.9.dev1__tar.gz → 0.3.10.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.3.9.dev1/eval_protocol.egg-info → eval_protocol-0.3.10.dev2}/PKG-INFO +2 -2
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/_version.py +3 -3
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/fireworks_tracing.py +9 -2
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/auth.py +72 -2
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli.py +8 -6
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/create_rft.py +66 -100
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/local_test.py +7 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/upload.py +3 -3
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/evaluation.py +53 -32
- eval_protocol-0.3.10.dev2/eval_protocol/fireworks_client.py +132 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/platform_api.py +17 -27
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/evaluation_test.py +2 -2
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/remote_rollout_processor.py +16 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/tracing_utils.py +18 -3
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2/eval_protocol.egg-info}/PKG-INFO +2 -2
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol.egg-info/SOURCES.txt +5 -3
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol.egg-info/requires.txt +1 -1
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/pyproject.toml +1 -1
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_cli_create_rft.py +17 -61
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_ep_upload_e2e.py +51 -140
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_evaluation.py +22 -7
- eval_protocol-0.3.10.dev2/tests/test_fireworks_client.py +143 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_upload_entrypoint.py +10 -12
- eval_protocol-0.3.10.dev2/vite-app/dist/assets/index-10cZ11iB.js +137 -0
- eval_protocol-0.3.10.dev2/vite-app/dist/assets/index-10cZ11iB.js.map +1 -0
- eval_protocol-0.3.10.dev2/vite-app/dist/assets/index-DOD73Wyg.css +1 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vite-app/dist/index.html +2 -2
- eval_protocol-0.3.9.dev1/vite-app/dist/assets/index-CuQbfdPD.js +0 -46
- eval_protocol-0.3.9.dev1/vite-app/dist/assets/index-CuQbfdPD.js.map +0 -1
- eval_protocol-0.3.9.dev1/vite-app/dist/assets/index-iZp_HgyW.css +0 -1
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/LICENSE +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/README.md +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/development/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/development/utils/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/base.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/bigquery.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/dataframe.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/langchain.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/langsmith.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/openai_responses.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/weave.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_aime25.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_glm_streaming_compliance.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_gpqa.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/export_docs.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/common_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/config.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/factory_data_loader.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/inline_data_loader.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/jsonl_data_loader.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/data_loader/models.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/directory_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/event_bus/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/exceptions.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/fireworks_rft.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/generation/clients.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/openai_rft.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/tinker_cookbook.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/tinker_rollout_processor.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/init.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/rollout_context.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/log_utils/util.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/execution/manager.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/execution/vllm_policy.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/README.md +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/server.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/models.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/app.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/auth.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/main.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/models.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/buffer.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_klavis_sandbox_rollout_processor.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/evaluation_test_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/exception_config.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/execution.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/github_action_rollout_processor.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/handle_persist_flow.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/integrations/openenv_trl_vllm.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/openenv_rollout_processor.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/parameterize.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/plugin.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/priority_scheduler.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/rollout_processor.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/rollout_result_post_processor.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/store_experiment_link.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/store_results_url.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/pytest/validate_signature.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/llm_judge.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/svg_agent/evaluator/test_svgagent.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/svg_agent/evaluator/utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/quickstart/utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/server.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/training/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/training/gepa_trainer.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/training/gepa_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/training/trainer.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/training/utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/types/errors.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/types/remote_rollout_processor.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/browser_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/check_server_status.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/evaluation_row_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/logs_models.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/show_results_url.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/subprocess_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/setup.cfg +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/setup.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_auth.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_cli_local_test.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_config.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_directory_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_evaluation_postprocess.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_event_bus_helper.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_exception_config.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_exceptions.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_format.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_human_id.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_integration.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_length.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_litellm_policy_provider_fields.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_logs_server.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_logs_server_simple.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_math.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_message_field_filtering.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_minimal.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_models.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_openai_rft_integration.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_packaging.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_priority_scheduler.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_quickstart_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_readiness.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_repetition.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_retry_mechanism.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_rollout_logprobs.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_server.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_show_results_url.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_sqlite_hardening.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_status_migration_changes.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_status_migration_integration.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_status_model.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_tau_bench_airline_smoke.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_training_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/tests/test_vite_server.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/airline/policy.md +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/mock/policy.md +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/retail/policy.md +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/versioneer.py +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.10.dev2
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -29,7 +29,7 @@ Requires-Dist: pytest>=6.0.0
|
|
|
29
29
|
Requires-Dist: pytest-asyncio>=0.21.0
|
|
30
30
|
Requires-Dist: peewee>=3.18.2
|
|
31
31
|
Requires-Dist: backoff>=2.2.0
|
|
32
|
-
Requires-Dist: fireworks-ai==1.0.
|
|
32
|
+
Requires-Dist: fireworks-ai==1.0.0a22
|
|
33
33
|
Requires-Dist: questionary>=2.0.0
|
|
34
34
|
Requires-Dist: toml>=0.10.0
|
|
35
35
|
Requires-Dist: loguru>=0.6.0
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2026-01-
|
|
11
|
+
"date": "2026-01-13T16:25:00-0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.3.
|
|
14
|
+
"full-revisionid": "66f191a09db5364b9cd9bb21230e1f48e50be724",
|
|
15
|
+
"version": "0.3.10.dev.2"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
{eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/adapters/fireworks_tracing.py
RENAMED
|
@@ -253,6 +253,7 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
253
253
|
project_id: Optional[str] = None,
|
|
254
254
|
base_url: str = "https://tracing.fireworks.ai",
|
|
255
255
|
timeout: int = 300,
|
|
256
|
+
api_key: Optional[str] = None,
|
|
256
257
|
):
|
|
257
258
|
"""Initialize the Fireworks Tracing adapter.
|
|
258
259
|
|
|
@@ -260,10 +261,16 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
260
261
|
project_id: Optional project ID. If not provided, uses the default project configured on the server.
|
|
261
262
|
base_url: The base URL of the tracing proxy (default: https://tracing.fireworks.ai)
|
|
262
263
|
timeout: Request timeout in seconds (default: 300)
|
|
264
|
+
api_key: Optional API key. If not provided, falls back to FIREWORKS_API_KEY environment variable.
|
|
263
265
|
"""
|
|
264
266
|
self.project_id = project_id
|
|
265
267
|
self.base_url = base_url.rstrip("/")
|
|
266
268
|
self.timeout = timeout
|
|
269
|
+
self._api_key = api_key
|
|
270
|
+
|
|
271
|
+
def _get_api_key(self) -> Optional[str]:
|
|
272
|
+
"""Get the API key, preferring instance-level key over environment variable."""
|
|
273
|
+
return self._api_key or os.environ.get("FIREWORKS_API_KEY")
|
|
267
274
|
|
|
268
275
|
def search_logs(self, tags: List[str], limit: int = 100, hours_back: int = 24) -> List[Dict[str, Any]]:
|
|
269
276
|
"""Fetch logs from Fireworks tracing gateway /logs endpoint.
|
|
@@ -276,7 +283,7 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
276
283
|
from ..common_utils import get_user_agent
|
|
277
284
|
|
|
278
285
|
headers = {
|
|
279
|
-
"Authorization": f"Bearer {
|
|
286
|
+
"Authorization": f"Bearer {self._get_api_key()}",
|
|
280
287
|
"User-Agent": get_user_agent(),
|
|
281
288
|
}
|
|
282
289
|
params: Dict[str, Any] = {"tags": tags, "limit": limit, "hours_back": hours_back, "program": "eval_protocol"}
|
|
@@ -407,7 +414,7 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
407
414
|
from ..common_utils import get_user_agent
|
|
408
415
|
|
|
409
416
|
headers = {
|
|
410
|
-
"Authorization": f"Bearer {
|
|
417
|
+
"Authorization": f"Bearer {self._get_api_key()}",
|
|
411
418
|
"User-Agent": get_user_agent(),
|
|
412
419
|
}
|
|
413
420
|
|
|
@@ -1,12 +1,75 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
|
-
from typing import Optional
|
|
3
|
+
from typing import Dict, Optional
|
|
4
4
|
|
|
5
5
|
import requests
|
|
6
|
+
from dotenv import dotenv_values, find_dotenv, load_dotenv
|
|
6
7
|
|
|
7
8
|
logger = logging.getLogger(__name__)
|
|
8
9
|
|
|
9
10
|
|
|
11
|
+
def find_dotenv_path(search_path: Optional[str] = None) -> Optional[str]:
|
|
12
|
+
"""
|
|
13
|
+
Find the .env file path, searching .env.dev first, then .env.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
search_path: Directory to search from. If None, uses current working directory.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Path to the .env file if found, otherwise None.
|
|
20
|
+
"""
|
|
21
|
+
# If a specific search path is provided, look there first
|
|
22
|
+
if search_path:
|
|
23
|
+
env_dev_path = os.path.join(search_path, ".env.dev")
|
|
24
|
+
if os.path.isfile(env_dev_path):
|
|
25
|
+
return env_dev_path
|
|
26
|
+
env_path = os.path.join(search_path, ".env")
|
|
27
|
+
if os.path.isfile(env_path):
|
|
28
|
+
return env_path
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
# Otherwise use find_dotenv to search up the directory tree
|
|
32
|
+
env_dev_path = find_dotenv(filename=".env.dev", raise_error_if_not_found=False, usecwd=True)
|
|
33
|
+
if env_dev_path:
|
|
34
|
+
return env_dev_path
|
|
35
|
+
env_path = find_dotenv(filename=".env", raise_error_if_not_found=False, usecwd=True)
|
|
36
|
+
if env_path:
|
|
37
|
+
return env_path
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_dotenv_values(search_path: Optional[str] = None) -> Dict[str, Optional[str]]:
|
|
42
|
+
"""
|
|
43
|
+
Get all key-value pairs from the .env file.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
search_path: Directory to search from. If None, uses current working directory.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Dictionary of environment variable names to values.
|
|
50
|
+
"""
|
|
51
|
+
dotenv_path = find_dotenv_path(search_path)
|
|
52
|
+
if dotenv_path:
|
|
53
|
+
return dotenv_values(dotenv_path)
|
|
54
|
+
return {}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# --- Load .env files ---
|
|
58
|
+
# Attempt to load .env.dev first, then .env as a fallback.
|
|
59
|
+
# This happens when the module is imported.
|
|
60
|
+
# We use override=False (default) so that existing environment variables
|
|
61
|
+
# (e.g., set in the shell) are NOT overridden by .env files.
|
|
62
|
+
_DOTENV_PATH = find_dotenv_path()
|
|
63
|
+
if _DOTENV_PATH:
|
|
64
|
+
load_dotenv(dotenv_path=_DOTENV_PATH, override=False)
|
|
65
|
+
logger.debug(f"eval_protocol.auth: Loaded environment variables from: {_DOTENV_PATH}")
|
|
66
|
+
else:
|
|
67
|
+
logger.debug(
|
|
68
|
+
"eval_protocol.auth: No .env.dev or .env file found. Relying on shell/existing environment variables."
|
|
69
|
+
)
|
|
70
|
+
# --- End .env loading ---
|
|
71
|
+
|
|
72
|
+
|
|
10
73
|
def get_fireworks_api_key() -> Optional[str]:
|
|
11
74
|
"""
|
|
12
75
|
Retrieves the Fireworks API key.
|
|
@@ -73,6 +136,8 @@ def verify_api_key_and_get_account_id(
|
|
|
73
136
|
Args:
|
|
74
137
|
api_key: Optional explicit API key. When None, resolves via get_fireworks_api_key().
|
|
75
138
|
api_base: Optional explicit API base. When None, resolves via get_fireworks_api_base().
|
|
139
|
+
If api_base is api.fireworks.ai, it is used directly. Otherwise, defaults to
|
|
140
|
+
dev.api.fireworks.ai for the verification call.
|
|
76
141
|
|
|
77
142
|
Returns:
|
|
78
143
|
The resolved account id if verification succeeds and the header is present; otherwise None.
|
|
@@ -81,7 +146,12 @@ def verify_api_key_and_get_account_id(
|
|
|
81
146
|
resolved_key = api_key or get_fireworks_api_key()
|
|
82
147
|
if not resolved_key:
|
|
83
148
|
return None
|
|
84
|
-
|
|
149
|
+
provided_base = api_base or get_fireworks_api_base()
|
|
150
|
+
# Use api.fireworks.ai if explicitly provided, otherwise fall back to dev
|
|
151
|
+
if "api.fireworks.ai" in provided_base:
|
|
152
|
+
resolved_base = provided_base
|
|
153
|
+
else:
|
|
154
|
+
resolved_base = "https://dev.api.fireworks.ai"
|
|
85
155
|
|
|
86
156
|
from .common_utils import get_user_agent
|
|
87
157
|
|
|
@@ -81,13 +81,12 @@ def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParse
|
|
|
81
81
|
"--env-file",
|
|
82
82
|
help="Path to .env file containing secrets to upload (default: .env in current directory)",
|
|
83
83
|
)
|
|
84
|
-
upload_parser.add_argument(
|
|
85
|
-
"--force",
|
|
86
|
-
action="store_true",
|
|
87
|
-
help="Overwrite existing evaluator with the same ID",
|
|
88
|
-
)
|
|
89
84
|
|
|
90
85
|
# Auto-generate flags from SDK Fireworks().evaluators.create() signature
|
|
86
|
+
# Note: We use Fireworks() directly here instead of create_fireworks_client()
|
|
87
|
+
# because we only need the method signature for introspection, not a fully
|
|
88
|
+
# authenticated client. create_fireworks_client() would trigger an HTTP request
|
|
89
|
+
# to verify the API key, causing delays even for --help invocations.
|
|
91
90
|
create_evaluator_fn = Fireworks().evaluators.create
|
|
92
91
|
|
|
93
92
|
upload_skip_fields = {
|
|
@@ -137,7 +136,6 @@ def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParse
|
|
|
137
136
|
|
|
138
137
|
rft_parser.add_argument("--yes", "-y", action="store_true", help="Non-interactive mode")
|
|
139
138
|
rft_parser.add_argument("--dry-run", action="store_true", help="Print planned SDK call without sending")
|
|
140
|
-
rft_parser.add_argument("--force", action="store_true", help="Overwrite existing evaluator with the same ID")
|
|
141
139
|
rft_parser.add_argument("--skip-validation", action="store_true", help="Skip local dataset/evaluator validation")
|
|
142
140
|
rft_parser.add_argument(
|
|
143
141
|
"--ignore-docker",
|
|
@@ -198,6 +196,10 @@ def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParse
|
|
|
198
196
|
"loss_config.method": "RL loss method for underlying trainers. One of {grpo,dapo}.",
|
|
199
197
|
}
|
|
200
198
|
|
|
199
|
+
# Note: We use Fireworks() directly here instead of create_fireworks_client()
|
|
200
|
+
# because we only need the method signature for introspection, not a fully
|
|
201
|
+
# authenticated client. create_fireworks_client() would trigger an HTTP request
|
|
202
|
+
# to verify the API key, causing delays even for --help invocations.
|
|
201
203
|
create_rft_job_fn = Fireworks().reinforcement_fine_tuning_jobs.create
|
|
202
204
|
|
|
203
205
|
add_args_from_callable_signature(
|
{eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/create_rft.py
RENAMED
|
@@ -7,19 +7,18 @@ import sys
|
|
|
7
7
|
import time
|
|
8
8
|
from typing import Any, Callable, Dict, Optional
|
|
9
9
|
import inspect
|
|
10
|
-
import requests
|
|
11
10
|
import tempfile
|
|
12
11
|
from pydantic import ValidationError
|
|
13
12
|
|
|
14
13
|
from ..auth import get_fireworks_api_base, get_fireworks_api_key
|
|
15
|
-
from ..
|
|
14
|
+
from ..fireworks_client import create_fireworks_client
|
|
15
|
+
from ..common_utils import load_jsonl
|
|
16
16
|
from ..fireworks_rft import (
|
|
17
17
|
create_dataset_from_jsonl,
|
|
18
18
|
detect_dataset_builder,
|
|
19
19
|
materialize_dataset_via_builder,
|
|
20
20
|
)
|
|
21
21
|
from ..models import EvaluationRow
|
|
22
|
-
from .upload import upload_command
|
|
23
22
|
from .utils import (
|
|
24
23
|
_build_entry_point,
|
|
25
24
|
_build_trimmed_dataset_id,
|
|
@@ -35,8 +34,6 @@ from .utils import (
|
|
|
35
34
|
)
|
|
36
35
|
from .local_test import run_evaluator_test
|
|
37
36
|
|
|
38
|
-
from fireworks import Fireworks
|
|
39
|
-
|
|
40
37
|
|
|
41
38
|
def _extract_dataset_adapter(
|
|
42
39
|
test_file_path: str, test_func_name: str
|
|
@@ -223,64 +220,68 @@ def _extract_jsonl_from_input_dataset(test_file_path: str, test_func_name: str)
|
|
|
223
220
|
return None
|
|
224
221
|
|
|
225
222
|
|
|
226
|
-
def
|
|
227
|
-
|
|
223
|
+
def _poll_evaluator_version_status(
|
|
224
|
+
evaluator_id: str,
|
|
225
|
+
version_id: str,
|
|
226
|
+
api_key: str,
|
|
227
|
+
api_base: str,
|
|
228
|
+
timeout_minutes: int = 10,
|
|
228
229
|
) -> bool:
|
|
229
230
|
"""
|
|
230
|
-
Poll evaluator status until it becomes ACTIVE or times out.
|
|
231
|
+
Poll a specific evaluator version status until it becomes ACTIVE or times out.
|
|
232
|
+
|
|
233
|
+
Uses the Fireworks SDK to get the specified version of the evaluator and checks
|
|
234
|
+
its build state.
|
|
231
235
|
|
|
232
236
|
Args:
|
|
233
|
-
|
|
237
|
+
evaluator_id: The evaluator ID (not full resource name)
|
|
238
|
+
version_id: The specific version ID to poll
|
|
234
239
|
api_key: Fireworks API key
|
|
235
240
|
api_base: Fireworks API base URL
|
|
236
241
|
timeout_minutes: Maximum time to wait in minutes
|
|
237
242
|
|
|
238
243
|
Returns:
|
|
239
|
-
True if evaluator becomes ACTIVE, False if timeout or BUILD_FAILED
|
|
244
|
+
True if evaluator version becomes ACTIVE, False if timeout or BUILD_FAILED
|
|
240
245
|
"""
|
|
241
|
-
headers = {
|
|
242
|
-
"Authorization": f"Bearer {api_key}",
|
|
243
|
-
"Content-Type": "application/json",
|
|
244
|
-
"User-Agent": get_user_agent(),
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
check_url = f"{api_base}/v1/{evaluator_resource_name}"
|
|
248
246
|
timeout_seconds = timeout_minutes * 60
|
|
249
247
|
poll_interval = 10 # seconds
|
|
250
248
|
start_time = time.time()
|
|
251
249
|
|
|
252
|
-
print(
|
|
250
|
+
print(
|
|
251
|
+
f"Polling evaluator version '{version_id}' status (timeout: {timeout_minutes}m, interval: {poll_interval}s)..."
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
client = create_fireworks_client(api_key=api_key, base_url=api_base)
|
|
253
255
|
|
|
254
256
|
while time.time() - start_time < timeout_seconds:
|
|
255
257
|
try:
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
status = evaluator_data.get("status", "")
|
|
258
|
+
version = client.evaluator_versions.get(version_id, evaluator_id=evaluator_id)
|
|
259
|
+
state = version.state or "STATE_UNSPECIFIED"
|
|
260
|
+
status_msg = ""
|
|
261
|
+
if version.status and version.status.message:
|
|
262
|
+
status_msg = version.status.message
|
|
262
263
|
|
|
263
264
|
if state == "ACTIVE":
|
|
264
|
-
print("✅ Evaluator is ACTIVE and ready!")
|
|
265
|
+
print("✅ Evaluator version is ACTIVE and ready!")
|
|
265
266
|
return True
|
|
266
267
|
elif state == "BUILD_FAILED":
|
|
267
|
-
print(f"❌ Evaluator build failed. Status: {
|
|
268
|
+
print(f"❌ Evaluator version build failed. Status: {status_msg}")
|
|
268
269
|
return False
|
|
269
270
|
elif state == "BUILDING":
|
|
270
271
|
elapsed_minutes = (time.time() - start_time) / 60
|
|
271
|
-
print(f"⏳ Evaluator is still building... ({elapsed_minutes:.1f}m elapsed)")
|
|
272
|
+
print(f"⏳ Evaluator version is still building... ({elapsed_minutes:.1f}m elapsed)")
|
|
272
273
|
else:
|
|
273
|
-
print(f"⏳ Evaluator state: {state}, status: {
|
|
274
|
+
print(f"⏳ Evaluator version state: {state}, status: {status_msg}")
|
|
274
275
|
|
|
275
|
-
except
|
|
276
|
-
print(f"Warning: Failed to check evaluator status: {e}")
|
|
276
|
+
except Exception as e:
|
|
277
|
+
print(f"Warning: Failed to check evaluator version status: {e}")
|
|
277
278
|
|
|
278
279
|
# Wait before next poll
|
|
279
280
|
time.sleep(poll_interval)
|
|
280
281
|
|
|
281
282
|
# Timeout reached
|
|
282
283
|
elapsed_minutes = (time.time() - start_time) / 60
|
|
283
|
-
print(f"⏰ Timeout after {elapsed_minutes:.1f}m - evaluator is not yet ACTIVE")
|
|
284
|
+
print(f"⏰ Timeout after {elapsed_minutes:.1f}m - evaluator version is not yet ACTIVE")
|
|
284
285
|
return False
|
|
285
286
|
|
|
286
287
|
|
|
@@ -565,42 +566,16 @@ def _upload_dataset(
|
|
|
565
566
|
def _upload_and_ensure_evaluator(
|
|
566
567
|
project_root: str,
|
|
567
568
|
evaluator_id: str,
|
|
568
|
-
evaluator_resource_name: str,
|
|
569
569
|
api_key: str,
|
|
570
570
|
api_base: str,
|
|
571
|
-
force: bool,
|
|
572
571
|
) -> bool:
|
|
573
|
-
"""
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
"Content-Type": "application/json",
|
|
580
|
-
"User-Agent": get_user_agent(),
|
|
581
|
-
}
|
|
582
|
-
resp = requests.get(f"{api_base}/v1/{evaluator_resource_name}", headers=headers, timeout=10)
|
|
583
|
-
if resp.ok:
|
|
584
|
-
state = resp.json().get("state", "STATE_UNSPECIFIED")
|
|
585
|
-
print(f"✓ Evaluator exists (state: {state}). Skipping upload (use --force to overwrite).")
|
|
586
|
-
# Poll for ACTIVE before proceeding
|
|
587
|
-
print(f"Waiting for evaluator '{evaluator_id}' to become ACTIVE...")
|
|
588
|
-
if not _poll_evaluator_status(
|
|
589
|
-
evaluator_resource_name=evaluator_resource_name,
|
|
590
|
-
api_key=api_key,
|
|
591
|
-
api_base=api_base,
|
|
592
|
-
timeout_minutes=10,
|
|
593
|
-
):
|
|
594
|
-
dashboard_url = _build_evaluator_dashboard_url(evaluator_id)
|
|
595
|
-
print("\n❌ Evaluator is not ready within the timeout period.")
|
|
596
|
-
print(f"📊 Please check the evaluator status at: {dashboard_url}")
|
|
597
|
-
print(" Wait for it to become ACTIVE, then run 'eval-protocol create rft' again.")
|
|
598
|
-
return False
|
|
599
|
-
return True
|
|
600
|
-
except requests.exceptions.RequestException:
|
|
601
|
-
pass
|
|
572
|
+
"""Upload evaluator and ensure its version becomes ACTIVE.
|
|
573
|
+
|
|
574
|
+
Creates/updates the evaluator and uploads the code, then polls the specific
|
|
575
|
+
version until it becomes ACTIVE.
|
|
576
|
+
"""
|
|
577
|
+
from eval_protocol.evaluation import create_evaluation
|
|
602
578
|
|
|
603
|
-
# Ensure evaluator exists by invoking the upload flow programmatically
|
|
604
579
|
try:
|
|
605
580
|
tests = _discover_tests(project_root)
|
|
606
581
|
selected_entry: Optional[str] = None
|
|
@@ -617,43 +592,37 @@ def _upload_and_ensure_evaluator(
|
|
|
617
592
|
)
|
|
618
593
|
return False
|
|
619
594
|
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
force=force, # Pass through the --force flag
|
|
627
|
-
yes=True,
|
|
628
|
-
env_file=None, # Add the new env_file parameter
|
|
595
|
+
print(f"\nUploading evaluator '{evaluator_id}'...")
|
|
596
|
+
result, version_id = create_evaluation(
|
|
597
|
+
evaluator_id=evaluator_id,
|
|
598
|
+
display_name=evaluator_id,
|
|
599
|
+
description=f"Evaluator for {evaluator_id}",
|
|
600
|
+
entry_point=selected_entry,
|
|
629
601
|
)
|
|
630
602
|
|
|
631
|
-
if
|
|
632
|
-
print(
|
|
603
|
+
if not version_id:
|
|
604
|
+
print("Warning: Evaluator created but version upload failed.")
|
|
605
|
+
return False
|
|
633
606
|
|
|
634
|
-
|
|
635
|
-
if rc == 0:
|
|
636
|
-
print(f"✓ Uploaded/ensured evaluator: {evaluator_id}")
|
|
607
|
+
print(f"✓ Uploaded evaluator: {evaluator_id} (version: {version_id})")
|
|
637
608
|
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
609
|
+
# Poll for the specific evaluator version status
|
|
610
|
+
print(f"Waiting for evaluator '{evaluator_id}' version '{version_id}' to become ACTIVE...")
|
|
611
|
+
is_active = _poll_evaluator_version_status(
|
|
612
|
+
evaluator_id=evaluator_id,
|
|
613
|
+
version_id=version_id,
|
|
614
|
+
api_key=api_key,
|
|
615
|
+
api_base=api_base,
|
|
616
|
+
timeout_minutes=10,
|
|
617
|
+
)
|
|
646
618
|
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
return False
|
|
653
|
-
return True
|
|
654
|
-
else:
|
|
655
|
-
print("Warning: Evaluator upload did not complete successfully; proceeding to RFT creation.")
|
|
619
|
+
if not is_active:
|
|
620
|
+
dashboard_url = _build_evaluator_dashboard_url(evaluator_id)
|
|
621
|
+
print("\n❌ Evaluator version is not ready within the timeout period.")
|
|
622
|
+
print(f"📊 Please check the evaluator status at: {dashboard_url}")
|
|
623
|
+
print(" Wait for it to become ACTIVE, then run 'eval-protocol create rft' again.")
|
|
656
624
|
return False
|
|
625
|
+
return True
|
|
657
626
|
except Exception as e:
|
|
658
627
|
print(f"Warning: Failed to upload evaluator automatically: {e}")
|
|
659
628
|
return False
|
|
@@ -672,7 +641,7 @@ def _create_rft_job(
|
|
|
672
641
|
) -> int:
|
|
673
642
|
"""Build and submit the RFT job request (via Fireworks SDK)."""
|
|
674
643
|
|
|
675
|
-
signature = inspect.signature(
|
|
644
|
+
signature = inspect.signature(create_fireworks_client().reinforcement_fine_tuning_jobs.create)
|
|
676
645
|
|
|
677
646
|
# Build top-level SDK kwargs
|
|
678
647
|
sdk_kwargs: Dict[str, Any] = {
|
|
@@ -711,7 +680,7 @@ def _create_rft_job(
|
|
|
711
680
|
return 0
|
|
712
681
|
|
|
713
682
|
try:
|
|
714
|
-
fw: Fireworks =
|
|
683
|
+
fw: Fireworks = create_fireworks_client(api_key=api_key, base_url=api_base)
|
|
715
684
|
job: ReinforcementFineTuningJob = fw.reinforcement_fine_tuning_jobs.create(account_id=account_id, **sdk_kwargs)
|
|
716
685
|
job_name = job.name
|
|
717
686
|
print(f"\n✅ Created Reinforcement Fine-tuning Job: {job_name}")
|
|
@@ -739,7 +708,6 @@ def create_rft_command(args) -> int:
|
|
|
739
708
|
evaluator_arg: Optional[str] = getattr(args, "evaluator", None)
|
|
740
709
|
non_interactive: bool = bool(getattr(args, "yes", False))
|
|
741
710
|
dry_run: bool = bool(getattr(args, "dry_run", False))
|
|
742
|
-
force: bool = bool(getattr(args, "force", False))
|
|
743
711
|
skip_validation: bool = bool(getattr(args, "skip_validation", False))
|
|
744
712
|
ignore_docker: bool = bool(getattr(args, "ignore_docker", False))
|
|
745
713
|
docker_build_extra: str = getattr(args, "docker_build_extra", "") or ""
|
|
@@ -810,14 +778,12 @@ def create_rft_command(args) -> int:
|
|
|
810
778
|
if not dataset_id or not dataset_resource:
|
|
811
779
|
return 1
|
|
812
780
|
|
|
813
|
-
# 5) Ensure evaluator exists and is ACTIVE (upload + poll if needed)
|
|
781
|
+
# 5) Ensure evaluator exists and its latest version is ACTIVE (upload + poll if needed)
|
|
814
782
|
if not _upload_and_ensure_evaluator(
|
|
815
783
|
project_root=project_root,
|
|
816
784
|
evaluator_id=evaluator_id,
|
|
817
|
-
evaluator_resource_name=evaluator_resource_name,
|
|
818
785
|
api_key=api_key,
|
|
819
786
|
api_base=api_base,
|
|
820
|
-
force=force,
|
|
821
787
|
):
|
|
822
788
|
return 1
|
|
823
789
|
|
{eval_protocol-0.3.9.dev1 → eval_protocol-0.3.10.dev2}/eval_protocol/cli_commands/local_test.py
RENAMED
|
@@ -5,6 +5,7 @@ import subprocess
|
|
|
5
5
|
import sys
|
|
6
6
|
from typing import List
|
|
7
7
|
|
|
8
|
+
from ..auth import get_dotenv_values
|
|
8
9
|
from .utils import _build_entry_point, _discover_and_select_tests
|
|
9
10
|
|
|
10
11
|
|
|
@@ -71,6 +72,12 @@ def _run_pytest_in_docker(
|
|
|
71
72
|
workdir,
|
|
72
73
|
]
|
|
73
74
|
|
|
75
|
+
# Forward environment variables from .env file to the container
|
|
76
|
+
dotenv_vars = get_dotenv_values(project_root)
|
|
77
|
+
for key, value in dotenv_vars.items():
|
|
78
|
+
if value is not None:
|
|
79
|
+
cmd += ["-e", f"{key}={value}"]
|
|
80
|
+
|
|
74
81
|
# If EP_SUMMARY_JSON is set on the host, mirror it into the container so that
|
|
75
82
|
# pytest evaluation tests can write summary artifacts that are visible to the
|
|
76
83
|
# host. We map paths under the host logs directory (~/.eval_protocol) into the
|
|
@@ -289,7 +289,6 @@ def upload_command(args: argparse.Namespace) -> int:
|
|
|
289
289
|
base_id = getattr(args, "id", None)
|
|
290
290
|
display_name = getattr(args, "display_name", None)
|
|
291
291
|
description = getattr(args, "description", None)
|
|
292
|
-
force = bool(getattr(args, "force", False))
|
|
293
292
|
env_file = getattr(args, "env_file", None)
|
|
294
293
|
|
|
295
294
|
# Load secrets from .env file and ensure they're available on Fireworks
|
|
@@ -378,17 +377,18 @@ def upload_command(args: argparse.Namespace) -> int:
|
|
|
378
377
|
|
|
379
378
|
print(f"\nUploading evaluator '{evaluator_id}' for {qualname.split('.')[-1]}...")
|
|
380
379
|
try:
|
|
381
|
-
result = create_evaluation(
|
|
380
|
+
result, version_id = create_evaluation(
|
|
382
381
|
evaluator_id=evaluator_id,
|
|
383
382
|
display_name=display_name or evaluator_id,
|
|
384
383
|
description=description or f"Evaluator for {qualname}",
|
|
385
|
-
force=force,
|
|
386
384
|
entry_point=entry_point,
|
|
387
385
|
)
|
|
388
386
|
name = result.get("name", evaluator_id) if isinstance(result, dict) else evaluator_id
|
|
389
387
|
|
|
390
388
|
# Print success message with Fireworks dashboard link
|
|
391
389
|
print(f"\n✅ Successfully uploaded evaluator: {evaluator_id}")
|
|
390
|
+
if version_id:
|
|
391
|
+
print(f" Version: {version_id}")
|
|
392
392
|
print("📊 View in Fireworks Dashboard:")
|
|
393
393
|
dashboard_url = _build_evaluator_dashboard_url(evaluator_id)
|
|
394
394
|
print(f" {dashboard_url}\n")
|