eval-protocol 0.2.64.dev1__tar.gz → 0.2.84.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.2.64.dev1/eval_protocol.egg-info → eval_protocol-0.2.84.dev1}/PKG-INFO +1 -1
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/_version.py +3 -3
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/adapters/fireworks_tracing.py +12 -2
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/auth.py +76 -4
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/benchmarks/test_frozen_lake.py +1 -1
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/cli.py +78 -0
- eval_protocol-0.2.84.dev1/eval_protocol/cli_commands/create_rft.py +707 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/cli_commands/upload.py +109 -64
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/common_utils.py +17 -0
- eval_protocol-0.2.84.dev1/eval_protocol/data_loader/__init__.py +5 -0
- eval_protocol-0.2.84.dev1/eval_protocol/data_loader/jsonl_data_loader.py +42 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/evaluation.py +47 -2
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/event_bus/__init__.py +15 -2
- eval_protocol-0.2.84.dev1/eval_protocol/exceptions.py +177 -0
- eval_protocol-0.2.84.dev1/eval_protocol/fireworks_rft.py +221 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/generation/clients.py +3 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp/mcp_multi_client.py +43 -2
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/models.py +198 -5
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/platform_api.py +18 -10
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/default_agent_rollout_processor.py +4 -1
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +25 -27
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/default_single_turn_rollout_process.py +20 -2
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/evaluation_test.py +26 -13
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/evaluation_test_postprocess.py +2 -1
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/evaluation_test_utils.py +1 -1
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/exception_config.py +14 -1
- eval_protocol-0.2.84.dev1/eval_protocol/pytest/execution.py +111 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/handle_persist_flow.py +29 -22
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/plugin.py +15 -5
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/remote_rollout_processor.py +13 -13
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/tracing_utils.py +10 -7
- eval_protocol-0.2.84.dev1/eval_protocol/quickstart/svg_agent/evaluator/test_svgagent.py +223 -0
- eval_protocol-0.2.84.dev1/eval_protocol/quickstart/svg_agent/evaluator/utils.py +523 -0
- eval_protocol-0.2.84.dev1/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +202 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1/eval_protocol.egg-info}/PKG-INFO +1 -1
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol.egg-info/SOURCES.txt +12 -3
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_auth.py +10 -5
- eval_protocol-0.2.84.dev1/tests/test_cli_create_rft_infer.py +710 -0
- eval_protocol-0.2.84.dev1/tests/test_exceptions.py +371 -0
- eval_protocol-0.2.84.dev1/tests/test_message_field_filtering.py +64 -0
- eval_protocol-0.2.84.dev1/vite-app/dist/assets/index-BGlGI2LH.css +1 -0
- eval_protocol-0.2.84.dev1/vite-app/dist/assets/index-CnGlFAnP.js +137 -0
- eval_protocol-0.2.84.dev1/vite-app/dist/assets/index-CnGlFAnP.js.map +1 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vite-app/dist/index.html +2 -2
- eval_protocol-0.2.64.dev1/eval_protocol/data_loader/__init__.py +0 -4
- eval_protocol-0.2.64.dev1/eval_protocol/pytest/execution.py +0 -43
- eval_protocol-0.2.64.dev1/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -162
- eval_protocol-0.2.64.dev1/vite-app/dist/assets/index-BnDJont9.css +0 -1
- eval_protocol-0.2.64.dev1/vite-app/dist/assets/index-Cu9t0G5i.js +0 -137
- eval_protocol-0.2.64.dev1/vite-app/dist/assets/index-Cu9t0G5i.js.map +0 -1
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/LICENSE +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/README.md +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/development/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/development/utils/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/adapters/base.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/adapters/bigquery.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/adapters/langchain.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/adapters/langsmith.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/adapters/openai_responses.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/adapters/utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/adapters/weave.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/benchmarks/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/benchmarks/test_aime25.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/benchmarks/test_gpqa.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/cli_commands/deploy.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/cli_commands/preview.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/config.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/data_loader/factory_data_loader.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/data_loader/inline_data_loader.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/data_loader/models.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/directory_utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/log_utils/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/log_utils/init.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/log_utils/rollout_context.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/log_utils/util.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp/execution/manager.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_servers/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_servers/tau2/README.md +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_servers/tau2/server.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/proxy/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/proxy/proxy_core/app.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/proxy/proxy_core/auth.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/proxy/proxy_core/main.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/proxy/proxy_core/models.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/github_action_rollout_processor.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/parameterize.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/rollout_processor.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/store_experiment_link.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/store_results_url.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/pytest/validate_signature.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/quickstart/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/quickstart/llm_judge.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/quickstart/utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/server.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/types/errors.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/types/remote_rollout_processor.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/utils/browser_utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/utils/check_server_status.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/utils/evaluation_row_utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/utils/logs_models.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/utils/show_results_url.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/utils/subprocess_utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol.egg-info/requires.txt +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/pyproject.toml +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/setup.cfg +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/setup.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_cli.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_config.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_deploy_integration.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_directory_utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_ep_upload_e2e.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_evaluation_integration.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_evaluation_postprocess.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_evaluation_preview_integration.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_event_bus_helper.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_format.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_human_id.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_integration.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_length.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_logs_server.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_logs_server_simple.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_math.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_minimal.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_models.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_packaging.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_quickstart_utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_readiness.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_repetition.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_retry_mechanism.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_server.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_show_results_url.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_status_migration_changes.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_status_migration_integration.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_status_model.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_tau_bench_airline_smoke.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_upload_entrypoint.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/tests/test_vite_server.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/data/domains/airline/policy.md +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/data/domains/mock/policy.md +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/data/domains/retail/policy.md +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/versioneer.py +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.84.dev1
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-
|
|
11
|
+
"date": "2025-11-10T16:26:12-0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.2.
|
|
14
|
+
"full-revisionid": "66542cf6410b379a35d2aec3de041fb37e18b0e2",
|
|
15
|
+
"version": "0.2.84.dev.1"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
{eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/adapters/fireworks_tracing.py
RENAMED
|
@@ -273,7 +273,12 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
273
273
|
if not tags:
|
|
274
274
|
raise ValueError("At least one tag is required to fetch logs")
|
|
275
275
|
|
|
276
|
-
|
|
276
|
+
from ..common_utils import get_user_agent
|
|
277
|
+
|
|
278
|
+
headers = {
|
|
279
|
+
"Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}",
|
|
280
|
+
"User-Agent": get_user_agent(),
|
|
281
|
+
}
|
|
277
282
|
params: Dict[str, Any] = {"tags": tags, "limit": limit, "hours_back": hours_back, "program": "eval_protocol"}
|
|
278
283
|
|
|
279
284
|
# Try /logs first, fall back to /v1/logs if not found
|
|
@@ -398,7 +403,12 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
398
403
|
else:
|
|
399
404
|
url = f"{self.base_url}/v1/traces/pointwise"
|
|
400
405
|
|
|
401
|
-
|
|
406
|
+
from ..common_utils import get_user_agent
|
|
407
|
+
|
|
408
|
+
headers = {
|
|
409
|
+
"Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}",
|
|
410
|
+
"User-Agent": get_user_agent(),
|
|
411
|
+
}
|
|
402
412
|
|
|
403
413
|
result = None
|
|
404
414
|
try:
|
|
@@ -136,6 +136,56 @@ def _get_credential_from_config_file(key_name: str) -> Optional[str]:
|
|
|
136
136
|
return None
|
|
137
137
|
|
|
138
138
|
|
|
139
|
+
def _get_credentials_from_config_file() -> Dict[str, Optional[str]]:
|
|
140
|
+
"""
|
|
141
|
+
Retrieve both api_key and account_id from auth.ini with a single read/parse.
|
|
142
|
+
Tries simple parsing first for both keys, then falls back to configparser for any missing ones.
|
|
143
|
+
Returns a dict with up to two keys: 'api_key' and 'account_id'.
|
|
144
|
+
"""
|
|
145
|
+
results: Dict[str, Optional[str]] = {}
|
|
146
|
+
auth_ini_path = _get_auth_ini_file()
|
|
147
|
+
if not auth_ini_path.exists():
|
|
148
|
+
return results
|
|
149
|
+
|
|
150
|
+
# 1) Simple key=value parsing
|
|
151
|
+
try:
|
|
152
|
+
simple_creds = _parse_simple_auth_file(auth_ini_path)
|
|
153
|
+
if "api_key" in simple_creds and simple_creds["api_key"]:
|
|
154
|
+
results["api_key"] = simple_creds["api_key"]
|
|
155
|
+
if "account_id" in simple_creds and simple_creds["account_id"]:
|
|
156
|
+
results["account_id"] = simple_creds["account_id"]
|
|
157
|
+
if "api_key" in results and "account_id" in results:
|
|
158
|
+
return results
|
|
159
|
+
except Exception as e:
|
|
160
|
+
logger.warning("Error during simple parsing of %s: %s", str(auth_ini_path), e)
|
|
161
|
+
|
|
162
|
+
# 2) ConfigParser for any missing keys
|
|
163
|
+
try:
|
|
164
|
+
config = configparser.ConfigParser()
|
|
165
|
+
config.read(auth_ini_path)
|
|
166
|
+
for key_name in ("api_key", "account_id"):
|
|
167
|
+
if key_name in results and results[key_name]:
|
|
168
|
+
continue
|
|
169
|
+
if "fireworks" in config and config.has_option("fireworks", key_name):
|
|
170
|
+
value_from_file = config.get("fireworks", key_name)
|
|
171
|
+
if value_from_file:
|
|
172
|
+
results[key_name] = value_from_file
|
|
173
|
+
continue
|
|
174
|
+
if config.has_option(config.default_section, key_name):
|
|
175
|
+
value_from_default = config.get(config.default_section, key_name)
|
|
176
|
+
if value_from_default:
|
|
177
|
+
results[key_name] = value_from_default
|
|
178
|
+
except configparser.MissingSectionHeaderError:
|
|
179
|
+
# Purely key=value file without section headers; simple parsing should have handled it already.
|
|
180
|
+
logger.debug("%s has no section headers; falling back to simple parsing results.", str(auth_ini_path))
|
|
181
|
+
except configparser.Error as e_config:
|
|
182
|
+
logger.warning("Configparser error reading %s: %s", str(auth_ini_path), e_config)
|
|
183
|
+
except Exception as e_general:
|
|
184
|
+
logger.warning("Unexpected error reading %s: %s", str(auth_ini_path), e_general)
|
|
185
|
+
|
|
186
|
+
return results
|
|
187
|
+
|
|
188
|
+
|
|
139
189
|
def get_fireworks_api_key() -> Optional[str]:
|
|
140
190
|
"""
|
|
141
191
|
Retrieves the Fireworks API key.
|
|
@@ -177,13 +227,15 @@ def get_fireworks_account_id() -> Optional[str]:
|
|
|
177
227
|
The Account ID is sourced in the following order:
|
|
178
228
|
1. FIREWORKS_ACCOUNT_ID environment variable.
|
|
179
229
|
2. 'account_id' from the [fireworks] section of ~/.fireworks/auth.ini.
|
|
230
|
+
3. If an API key is available (env or auth.ini), resolve via verifyApiKey.
|
|
180
231
|
|
|
181
232
|
Returns:
|
|
182
233
|
The Account ID if found, otherwise None.
|
|
183
234
|
"""
|
|
184
235
|
# If a profile is active, prefer profile file first, then env
|
|
185
236
|
if _is_profile_active():
|
|
186
|
-
|
|
237
|
+
creds = _get_credentials_from_config_file()
|
|
238
|
+
account_id_from_file = creds.get("account_id")
|
|
187
239
|
if account_id_from_file:
|
|
188
240
|
return account_id_from_file
|
|
189
241
|
account_id = os.environ.get("FIREWORKS_ACCOUNT_ID")
|
|
@@ -196,11 +248,24 @@ def get_fireworks_account_id() -> Optional[str]:
|
|
|
196
248
|
if account_id:
|
|
197
249
|
logger.debug("Using FIREWORKS_ACCOUNT_ID from environment variable.")
|
|
198
250
|
return account_id
|
|
199
|
-
|
|
251
|
+
creds = _get_credentials_from_config_file()
|
|
252
|
+
account_id_from_file = creds.get("account_id")
|
|
200
253
|
if account_id_from_file:
|
|
201
254
|
return account_id_from_file
|
|
202
255
|
|
|
203
|
-
|
|
256
|
+
# 3) Fallback: if API key is present, attempt to resolve via verifyApiKey (env or auth.ini)
|
|
257
|
+
try:
|
|
258
|
+
# Intentionally use get_fireworks_api_key to centralize precedence (env vs file)
|
|
259
|
+
api_key_for_verify = get_fireworks_api_key()
|
|
260
|
+
if api_key_for_verify:
|
|
261
|
+
resolved = verify_api_key_and_get_account_id(api_key=api_key_for_verify, api_base=get_fireworks_api_base())
|
|
262
|
+
if resolved:
|
|
263
|
+
logger.debug("Using FIREWORKS_ACCOUNT_ID resolved via verifyApiKey: %s", resolved)
|
|
264
|
+
return resolved
|
|
265
|
+
except Exception as e:
|
|
266
|
+
logger.debug("Failed to resolve FIREWORKS_ACCOUNT_ID via verifyApiKey: %s", e)
|
|
267
|
+
|
|
268
|
+
logger.debug("Fireworks Account ID not found in environment variables, auth.ini, or via verifyApiKey.")
|
|
204
269
|
return None
|
|
205
270
|
|
|
206
271
|
|
|
@@ -242,9 +307,16 @@ def verify_api_key_and_get_account_id(
|
|
|
242
307
|
if not resolved_key:
|
|
243
308
|
return None
|
|
244
309
|
resolved_base = api_base or get_fireworks_api_base()
|
|
310
|
+
|
|
311
|
+
from .common_utils import get_user_agent
|
|
312
|
+
|
|
245
313
|
url = f"{resolved_base.rstrip('/')}/verifyApiKey"
|
|
246
|
-
headers = {
|
|
314
|
+
headers = {
|
|
315
|
+
"Authorization": f"Bearer {resolved_key}",
|
|
316
|
+
"User-Agent": get_user_agent(),
|
|
317
|
+
}
|
|
247
318
|
resp = requests.get(url, headers=headers, timeout=10)
|
|
319
|
+
|
|
248
320
|
if resp.status_code != 200:
|
|
249
321
|
logger.debug("verifyApiKey returned status %s", resp.status_code)
|
|
250
322
|
return None
|
{eval_protocol-0.2.64.dev1 → eval_protocol-0.2.84.dev1}/eval_protocol/benchmarks/test_frozen_lake.py
RENAMED
|
@@ -46,7 +46,7 @@ def frozen_lake_to_evaluation_row(data: List[Dict[str, Any]]) -> List[Evaluation
|
|
|
46
46
|
num_runs=1,
|
|
47
47
|
max_concurrent_rollouts=3,
|
|
48
48
|
mode="pointwise",
|
|
49
|
-
server_script_path="
|
|
49
|
+
server_script_path="eval_protocol/mcp_servers/frozen_lake/server.py",
|
|
50
50
|
)
|
|
51
51
|
def test_frozen_lake_evaluation(row: EvaluationRow) -> EvaluationRow:
|
|
52
52
|
"""
|
|
@@ -355,6 +355,77 @@ def parse_args(args=None):
|
|
|
355
355
|
action="store_true",
|
|
356
356
|
help="Non-interactive: upload all discovered evaluation tests",
|
|
357
357
|
)
|
|
358
|
+
upload_parser.add_argument(
|
|
359
|
+
"--env-file",
|
|
360
|
+
help="Path to .env file containing secrets to upload (default: .env in current directory)",
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Create command group
|
|
364
|
+
create_parser = subparsers.add_parser(
|
|
365
|
+
"create",
|
|
366
|
+
help="Resource creation commands",
|
|
367
|
+
)
|
|
368
|
+
create_subparsers = create_parser.add_subparsers(dest="create_command")
|
|
369
|
+
rft_parser = create_subparsers.add_parser(
|
|
370
|
+
"rft",
|
|
371
|
+
help="Create a Reinforcement Fine-tuning Job on Fireworks",
|
|
372
|
+
)
|
|
373
|
+
rft_parser.add_argument(
|
|
374
|
+
"--evaluator-id",
|
|
375
|
+
help="Evaluator ID used during upload; if omitted, derive from local traces or a single discovered test",
|
|
376
|
+
)
|
|
377
|
+
# Dataset options
|
|
378
|
+
rft_parser.add_argument(
|
|
379
|
+
"--dataset-id",
|
|
380
|
+
help="Use existing Fireworks dataset id (skip local materialization)",
|
|
381
|
+
)
|
|
382
|
+
rft_parser.add_argument(
|
|
383
|
+
"--dataset-jsonl",
|
|
384
|
+
help="Path to JSONL to upload as a new Fireworks dataset",
|
|
385
|
+
)
|
|
386
|
+
rft_parser.add_argument(
|
|
387
|
+
"--dataset-builder",
|
|
388
|
+
help="Explicit dataset builder spec (module::function or path::function)",
|
|
389
|
+
)
|
|
390
|
+
rft_parser.add_argument(
|
|
391
|
+
"--dataset-display-name",
|
|
392
|
+
help="Display name for dataset on Fireworks (defaults to dataset id)",
|
|
393
|
+
)
|
|
394
|
+
# Training config and evaluator/job settings
|
|
395
|
+
rft_parser.add_argument("--base-model", help="Base model resource id")
|
|
396
|
+
rft_parser.add_argument("--warm-start-from", help="Addon model to warm start from")
|
|
397
|
+
rft_parser.add_argument("--output-model", help="Output model id (defaults from evaluator)")
|
|
398
|
+
rft_parser.add_argument("--epochs", type=int, default=1)
|
|
399
|
+
rft_parser.add_argument("--batch-size", type=int, default=128000)
|
|
400
|
+
rft_parser.add_argument("--learning-rate", type=float, default=3e-5)
|
|
401
|
+
rft_parser.add_argument("--max-context-length", type=int, default=65536)
|
|
402
|
+
rft_parser.add_argument("--lora-rank", type=int, default=16)
|
|
403
|
+
rft_parser.add_argument("--accelerator-count", type=int, default=1)
|
|
404
|
+
rft_parser.add_argument("--region", help="Fireworks region enum value")
|
|
405
|
+
rft_parser.add_argument("--display-name", help="RFT job display name")
|
|
406
|
+
rft_parser.add_argument("--evaluation-dataset", help="Optional separate eval dataset id")
|
|
407
|
+
rft_parser.add_argument("--eval-auto-carveout", dest="eval_auto_carveout", action="store_true", default=True)
|
|
408
|
+
rft_parser.add_argument("--no-eval-auto-carveout", dest="eval_auto_carveout", action="store_false")
|
|
409
|
+
# Rollout chunking
|
|
410
|
+
rft_parser.add_argument("--chunk-size", type=int, default=10, help="Data chunk size for rollout batching")
|
|
411
|
+
# Inference params
|
|
412
|
+
rft_parser.add_argument("--temperature", type=float)
|
|
413
|
+
rft_parser.add_argument("--top-p", type=float)
|
|
414
|
+
rft_parser.add_argument("--top-k", type=int)
|
|
415
|
+
rft_parser.add_argument("--max-tokens", type=int, default=32768)
|
|
416
|
+
rft_parser.add_argument("--n", type=int, default=8)
|
|
417
|
+
rft_parser.add_argument("--inference-extra-body", help="JSON string for extra inference params")
|
|
418
|
+
# Wandb
|
|
419
|
+
rft_parser.add_argument("--wandb-enabled", action="store_true")
|
|
420
|
+
rft_parser.add_argument("--wandb-project")
|
|
421
|
+
rft_parser.add_argument("--wandb-entity")
|
|
422
|
+
rft_parser.add_argument("--wandb-run-id")
|
|
423
|
+
rft_parser.add_argument("--wandb-api-key")
|
|
424
|
+
# Misc
|
|
425
|
+
rft_parser.add_argument("--rft-job-id", help="Specify an explicit RFT job id")
|
|
426
|
+
rft_parser.add_argument("--yes", "-y", action="store_true", help="Non-interactive mode")
|
|
427
|
+
rft_parser.add_argument("--dry-run", action="store_true", help="Print planned REST calls without sending")
|
|
428
|
+
rft_parser.add_argument("--force", action="store_true", help="Overwrite existing evaluator with the same ID")
|
|
358
429
|
|
|
359
430
|
# Run command (for Hydra-based evaluations)
|
|
360
431
|
# This subparser intentionally defines no arguments itself.
|
|
@@ -481,6 +552,13 @@ def main():
|
|
|
481
552
|
from .cli_commands.upload import upload_command
|
|
482
553
|
|
|
483
554
|
return upload_command(args)
|
|
555
|
+
elif args.command == "create":
|
|
556
|
+
if args.create_command == "rft":
|
|
557
|
+
from .cli_commands.create_rft import create_rft_command
|
|
558
|
+
|
|
559
|
+
return create_rft_command(args)
|
|
560
|
+
print("Error: missing subcommand for 'create'. Try: eval-protocol create rft")
|
|
561
|
+
return 1
|
|
484
562
|
elif args.command == "run":
|
|
485
563
|
# For the 'run' command, Hydra takes over argument parsing.
|
|
486
564
|
|