eval-protocol 0.2.78__tar.gz → 0.2.80__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.2.78/eval_protocol.egg-info → eval_protocol-0.2.80}/PKG-INFO +1 -1
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/_version.py +3 -3
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/fireworks_tracing.py +12 -2
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/auth.py +8 -1
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli.py +4 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/create_rft.py +82 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/upload.py +66 -15
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/common_utils.py +17 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/evaluation.py +3 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/fireworks_rft.py +17 -3
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/generation/clients.py +3 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/platform_api.py +18 -10
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/handle_persist_flow.py +15 -8
- {eval_protocol-0.2.78 → eval_protocol-0.2.80/eval_protocol.egg-info}/PKG-INFO +1 -1
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/LICENSE +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/README.md +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/development/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/development/utils/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/base.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/bigquery.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/langchain.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/langsmith.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/openai_responses.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/adapters/weave.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/test_aime25.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/test_gpqa.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/deploy.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/preview.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/config.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/data_loader/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/data_loader/factory_data_loader.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/data_loader/inline_data_loader.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/data_loader/jsonl_data_loader.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/data_loader/models.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/directory_utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/event_bus/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/exceptions.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/init.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/rollout_context.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/log_utils/util.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/execution/manager.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/README.md +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/server.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/models.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/proxy_core/app.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/proxy_core/auth.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/proxy_core/main.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/proxy_core/models.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/evaluation_test.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/evaluation_test_utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/exception_config.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/execution.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/github_action_rollout_processor.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/parameterize.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/plugin.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/remote_rollout_processor.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/rollout_processor.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/store_experiment_link.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/store_results_url.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/tracing_utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/pytest/validate_signature.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/llm_judge.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/svg_agent/evaluator/test_svgagent.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/svg_agent/evaluator/utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/quickstart/utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/server.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/types/errors.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/types/remote_rollout_processor.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/browser_utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/check_server_status.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/evaluation_row_utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/logs_models.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/show_results_url.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/subprocess_utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol.egg-info/SOURCES.txt +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol.egg-info/requires.txt +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/pyproject.toml +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/setup.cfg +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/setup.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_auth.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_cli.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_config.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_deploy_integration.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_directory_utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_ep_upload_e2e.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_evaluation_integration.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_evaluation_postprocess.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_evaluation_preview_integration.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_event_bus_helper.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_exceptions.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_format.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_human_id.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_integration.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_length.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_logs_server.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_logs_server_simple.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_math.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_message_field_filtering.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_minimal.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_models.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_packaging.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_quickstart_utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_readiness.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_repetition.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_retry_mechanism.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_server.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_show_results_url.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_status_migration_changes.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_status_migration_integration.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_status_model.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_tau_bench_airline_smoke.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_upload_entrypoint.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/tests/test_vite_server.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/airline/policy.md +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/mock/policy.md +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/retail/policy.md +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/versioneer.py +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vite-app/dist/assets/index-BGlGI2LH.css +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vite-app/dist/assets/index-CnGlFAnP.js +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vite-app/dist/assets/index-CnGlFAnP.js.map +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
- {eval_protocol-0.2.78 → eval_protocol-0.2.80}/vite-app/dist/index.html +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.80
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-11-
|
|
11
|
+
"date": "2025-11-04T15:41:02-0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.2.
|
|
14
|
+
"full-revisionid": "9303a224e55ef4f6e47aaa9e9e596ebd1c83cc56",
|
|
15
|
+
"version": "0.2.80"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -273,7 +273,12 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
273
273
|
if not tags:
|
|
274
274
|
raise ValueError("At least one tag is required to fetch logs")
|
|
275
275
|
|
|
276
|
-
|
|
276
|
+
from ..common_utils import get_user_agent
|
|
277
|
+
|
|
278
|
+
headers = {
|
|
279
|
+
"Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}",
|
|
280
|
+
"User-Agent": get_user_agent(),
|
|
281
|
+
}
|
|
277
282
|
params: Dict[str, Any] = {"tags": tags, "limit": limit, "hours_back": hours_back, "program": "eval_protocol"}
|
|
278
283
|
|
|
279
284
|
# Try /logs first, fall back to /v1/logs if not found
|
|
@@ -398,7 +403,12 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
398
403
|
else:
|
|
399
404
|
url = f"{self.base_url}/v1/traces/pointwise"
|
|
400
405
|
|
|
401
|
-
|
|
406
|
+
from ..common_utils import get_user_agent
|
|
407
|
+
|
|
408
|
+
headers = {
|
|
409
|
+
"Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}",
|
|
410
|
+
"User-Agent": get_user_agent(),
|
|
411
|
+
}
|
|
402
412
|
|
|
403
413
|
result = None
|
|
404
414
|
try:
|
|
@@ -242,9 +242,16 @@ def verify_api_key_and_get_account_id(
|
|
|
242
242
|
if not resolved_key:
|
|
243
243
|
return None
|
|
244
244
|
resolved_base = api_base or get_fireworks_api_base()
|
|
245
|
+
|
|
246
|
+
from .common_utils import get_user_agent
|
|
247
|
+
|
|
245
248
|
url = f"{resolved_base.rstrip('/')}/verifyApiKey"
|
|
246
|
-
headers = {
|
|
249
|
+
headers = {
|
|
250
|
+
"Authorization": f"Bearer {resolved_key}",
|
|
251
|
+
"User-Agent": get_user_agent(),
|
|
252
|
+
}
|
|
247
253
|
resp = requests.get(url, headers=headers, timeout=10)
|
|
254
|
+
|
|
248
255
|
if resp.status_code != 200:
|
|
249
256
|
logger.debug("verifyApiKey returned status %s", resp.status_code)
|
|
250
257
|
return None
|
|
@@ -355,6 +355,10 @@ def parse_args(args=None):
|
|
|
355
355
|
action="store_true",
|
|
356
356
|
help="Non-interactive: upload all discovered evaluation tests",
|
|
357
357
|
)
|
|
358
|
+
upload_parser.add_argument(
|
|
359
|
+
"--env-file",
|
|
360
|
+
help="Path to .env file containing secrets to upload (default: .env in current directory)",
|
|
361
|
+
)
|
|
358
362
|
|
|
359
363
|
# Create command group
|
|
360
364
|
create_parser = subparsers.add_parser(
|
|
@@ -5,12 +5,15 @@ import time
|
|
|
5
5
|
import argparse
|
|
6
6
|
from typing import Any, Dict, Optional
|
|
7
7
|
|
|
8
|
+
import requests
|
|
9
|
+
|
|
8
10
|
from ..auth import (
|
|
9
11
|
get_fireworks_account_id,
|
|
10
12
|
get_fireworks_api_base,
|
|
11
13
|
get_fireworks_api_key,
|
|
12
14
|
verify_api_key_and_get_account_id,
|
|
13
15
|
)
|
|
16
|
+
from ..common_utils import get_user_agent
|
|
14
17
|
from ..fireworks_rft import (
|
|
15
18
|
_map_api_host_to_app_host,
|
|
16
19
|
build_default_output_model,
|
|
@@ -263,6 +266,67 @@ def _auto_select_evaluator_id(cwd: str) -> Optional[str]:
|
|
|
263
266
|
return None
|
|
264
267
|
|
|
265
268
|
|
|
269
|
+
def _poll_evaluator_status(
|
|
270
|
+
evaluator_resource_name: str, api_key: str, api_base: str, timeout_minutes: int = 5
|
|
271
|
+
) -> bool:
|
|
272
|
+
"""
|
|
273
|
+
Poll evaluator status until it becomes ACTIVE or times out.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
evaluator_resource_name: Full evaluator resource name (e.g., accounts/xxx/evaluators/yyy)
|
|
277
|
+
api_key: Fireworks API key
|
|
278
|
+
api_base: Fireworks API base URL
|
|
279
|
+
timeout_minutes: Maximum time to wait in minutes
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
True if evaluator becomes ACTIVE, False if timeout or BUILD_FAILED
|
|
283
|
+
"""
|
|
284
|
+
headers = {
|
|
285
|
+
"Authorization": f"Bearer {api_key}",
|
|
286
|
+
"Content-Type": "application/json",
|
|
287
|
+
"User-Agent": get_user_agent(),
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
check_url = f"{api_base}/v1/{evaluator_resource_name}"
|
|
291
|
+
timeout_seconds = timeout_minutes * 60
|
|
292
|
+
poll_interval = 10 # seconds
|
|
293
|
+
start_time = time.time()
|
|
294
|
+
|
|
295
|
+
print(f"Polling evaluator status (timeout: {timeout_minutes}m, interval: {poll_interval}s)...")
|
|
296
|
+
|
|
297
|
+
while time.time() - start_time < timeout_seconds:
|
|
298
|
+
try:
|
|
299
|
+
response = requests.get(check_url, headers=headers, timeout=30)
|
|
300
|
+
response.raise_for_status()
|
|
301
|
+
|
|
302
|
+
evaluator_data = response.json()
|
|
303
|
+
state = evaluator_data.get("state", "STATE_UNSPECIFIED")
|
|
304
|
+
status = evaluator_data.get("status", "")
|
|
305
|
+
|
|
306
|
+
if state == "ACTIVE":
|
|
307
|
+
print("✅ Evaluator is ACTIVE and ready!")
|
|
308
|
+
return True
|
|
309
|
+
elif state == "BUILD_FAILED":
|
|
310
|
+
print(f"❌ Evaluator build failed. Status: {status}")
|
|
311
|
+
return False
|
|
312
|
+
elif state == "BUILDING":
|
|
313
|
+
elapsed_minutes = (time.time() - start_time) / 60
|
|
314
|
+
print(f"⏳ Evaluator is still building... ({elapsed_minutes:.1f}m elapsed)")
|
|
315
|
+
else:
|
|
316
|
+
print(f"⏳ Evaluator state: {state}, status: {status}")
|
|
317
|
+
|
|
318
|
+
except requests.exceptions.RequestException as e:
|
|
319
|
+
print(f"Warning: Failed to check evaluator status: {e}")
|
|
320
|
+
|
|
321
|
+
# Wait before next poll
|
|
322
|
+
time.sleep(poll_interval)
|
|
323
|
+
|
|
324
|
+
# Timeout reached
|
|
325
|
+
elapsed_minutes = (time.time() - start_time) / 60
|
|
326
|
+
print(f"⏰ Timeout after {elapsed_minutes:.1f}m - evaluator is not yet ACTIVE")
|
|
327
|
+
return False
|
|
328
|
+
|
|
329
|
+
|
|
266
330
|
def create_rft_command(args) -> int:
|
|
267
331
|
evaluator_id: Optional[str] = getattr(args, "evaluator_id", None)
|
|
268
332
|
non_interactive: bool = bool(getattr(args, "yes", False))
|
|
@@ -328,10 +392,28 @@ def create_rft_command(args) -> int:
|
|
|
328
392
|
description=None,
|
|
329
393
|
force=False,
|
|
330
394
|
yes=True,
|
|
395
|
+
env_file=None, # Add the new env_file parameter
|
|
331
396
|
)
|
|
332
397
|
rc = upload_command(upload_args)
|
|
333
398
|
if rc == 0:
|
|
334
399
|
print(f"✓ Uploaded/ensured evaluator: {evaluator_id}")
|
|
400
|
+
|
|
401
|
+
# Poll for evaluator status
|
|
402
|
+
print(f"Waiting for evaluator '{evaluator_id}' to become ACTIVE...")
|
|
403
|
+
is_active = _poll_evaluator_status(
|
|
404
|
+
evaluator_resource_name=evaluator_resource_name, api_key=api_key, api_base=api_base, timeout_minutes=5
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
if not is_active:
|
|
408
|
+
# Print helpful message with dashboard link
|
|
409
|
+
app_base = _map_api_host_to_app_host(api_base)
|
|
410
|
+
evaluator_slug = _extract_terminal_segment(evaluator_id)
|
|
411
|
+
dashboard_url = f"{app_base}/dashboard/evaluators/{evaluator_slug}"
|
|
412
|
+
|
|
413
|
+
print("\n❌ Evaluator is not ready within the timeout period.")
|
|
414
|
+
print(f"📊 Please check the evaluator status at: {dashboard_url}")
|
|
415
|
+
print(" Wait for it to become ACTIVE, then run 'eval-protocol create rft' again.")
|
|
416
|
+
return 1
|
|
335
417
|
else:
|
|
336
418
|
print("Warning: Evaluator upload did not complete successfully; proceeding to RFT creation.")
|
|
337
419
|
except Exception as e:
|
|
@@ -9,7 +9,7 @@ import runpy
|
|
|
9
9
|
import sys
|
|
10
10
|
from dataclasses import dataclass
|
|
11
11
|
from pathlib import Path
|
|
12
|
-
from typing import Any,
|
|
12
|
+
from typing import Any, Dict, Iterable
|
|
13
13
|
|
|
14
14
|
import pytest
|
|
15
15
|
from eval_protocol.auth import (
|
|
@@ -551,6 +551,35 @@ def _prompt_select(tests: list[DiscoveredTest], non_interactive: bool) -> list[D
|
|
|
551
551
|
return _prompt_select_interactive(tests)
|
|
552
552
|
|
|
553
553
|
|
|
554
|
+
def _load_secrets_from_env_file(env_file_path: str) -> Dict[str, str]:
|
|
555
|
+
"""
|
|
556
|
+
Load secrets from a .env file that should be uploaded to Fireworks.
|
|
557
|
+
|
|
558
|
+
Returns a dictionary of secret key-value pairs that contain 'API_KEY' in the name.
|
|
559
|
+
"""
|
|
560
|
+
if not os.path.exists(env_file_path):
|
|
561
|
+
return {}
|
|
562
|
+
|
|
563
|
+
# Load the .env file into a temporary environment
|
|
564
|
+
env_vars = {}
|
|
565
|
+
with open(env_file_path, "r") as f:
|
|
566
|
+
for line in f:
|
|
567
|
+
line = line.strip()
|
|
568
|
+
if line and not line.startswith("#") and "=" in line:
|
|
569
|
+
key, value = line.split("=", 1)
|
|
570
|
+
key = key.strip()
|
|
571
|
+
value = value.strip().strip('"').strip("'") # Remove quotes
|
|
572
|
+
env_vars[key] = value
|
|
573
|
+
|
|
574
|
+
# Filter for secrets that look like API keys
|
|
575
|
+
secrets = {}
|
|
576
|
+
for key, value in env_vars.items():
|
|
577
|
+
if "API_KEY" in key.upper() and value:
|
|
578
|
+
secrets[key] = value
|
|
579
|
+
|
|
580
|
+
return secrets
|
|
581
|
+
|
|
582
|
+
|
|
554
583
|
def upload_command(args: argparse.Namespace) -> int:
|
|
555
584
|
root = os.path.abspath(getattr(args, "path", "."))
|
|
556
585
|
entries_arg = getattr(args, "entry", None)
|
|
@@ -585,11 +614,27 @@ def upload_command(args: argparse.Namespace) -> int:
|
|
|
585
614
|
display_name = getattr(args, "display_name", None)
|
|
586
615
|
description = getattr(args, "description", None)
|
|
587
616
|
force = bool(getattr(args, "force", False))
|
|
617
|
+
env_file = getattr(args, "env_file", None)
|
|
588
618
|
|
|
589
|
-
#
|
|
619
|
+
# Load secrets from .env file and ensure they're available on Fireworks
|
|
590
620
|
try:
|
|
591
621
|
fw_account_id = get_fireworks_account_id()
|
|
622
|
+
|
|
623
|
+
# Determine .env file path
|
|
624
|
+
if env_file:
|
|
625
|
+
env_file_path = env_file
|
|
626
|
+
else:
|
|
627
|
+
env_file_path = os.path.join(root, ".env")
|
|
628
|
+
|
|
629
|
+
# Load secrets from .env file
|
|
630
|
+
secrets_from_file = _load_secrets_from_env_file(env_file_path)
|
|
631
|
+
secrets_from_env_file = secrets_from_file.copy() # Track what came from .env file
|
|
632
|
+
|
|
633
|
+
# Also ensure FIREWORKS_API_KEY from environment is included
|
|
592
634
|
fw_api_key_value = get_fireworks_api_key()
|
|
635
|
+
if fw_api_key_value:
|
|
636
|
+
secrets_from_file["FIREWORKS_API_KEY"] = fw_api_key_value
|
|
637
|
+
|
|
593
638
|
if not fw_account_id and fw_api_key_value:
|
|
594
639
|
# Attempt to verify and resolve account id from server headers
|
|
595
640
|
resolved = verify_api_key_and_get_account_id(api_key=fw_api_key_value, api_base=get_fireworks_api_base())
|
|
@@ -598,21 +643,27 @@ def upload_command(args: argparse.Namespace) -> int:
|
|
|
598
643
|
# Propagate to environment so downstream calls use it if needed
|
|
599
644
|
os.environ["FIREWORKS_ACCOUNT_ID"] = fw_account_id
|
|
600
645
|
print(f"Resolved FIREWORKS_ACCOUNT_ID via API verification: {fw_account_id}")
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
):
|
|
608
|
-
print("
|
|
609
|
-
|
|
610
|
-
|
|
646
|
+
|
|
647
|
+
if fw_account_id and secrets_from_file:
|
|
648
|
+
print(f"Found {len(secrets_from_file)} API keys to upload as Fireworks secrets...")
|
|
649
|
+
if secrets_from_env_file and os.path.exists(env_file_path):
|
|
650
|
+
print(f"Loading secrets from: {env_file_path}")
|
|
651
|
+
|
|
652
|
+
for secret_name, secret_value in secrets_from_file.items():
|
|
653
|
+
print(f"Ensuring {secret_name} is registered as a secret on Fireworks for rollout...")
|
|
654
|
+
if create_or_update_fireworks_secret(
|
|
655
|
+
account_id=fw_account_id,
|
|
656
|
+
key_name=secret_name,
|
|
657
|
+
secret_value=secret_value,
|
|
658
|
+
):
|
|
659
|
+
print(f"✓ {secret_name} secret created/updated on Fireworks.")
|
|
660
|
+
else:
|
|
661
|
+
print(f"Warning: Failed to create/update {secret_name} secret on Fireworks.")
|
|
611
662
|
else:
|
|
612
663
|
if not fw_account_id:
|
|
613
|
-
print("Warning: FIREWORKS_ACCOUNT_ID not found; cannot register
|
|
614
|
-
if not
|
|
615
|
-
print("Warning:
|
|
664
|
+
print("Warning: FIREWORKS_ACCOUNT_ID not found; cannot register secrets.")
|
|
665
|
+
if not secrets_from_file:
|
|
666
|
+
print("Warning: No API keys found in environment or .env file; no secrets to register.")
|
|
616
667
|
except Exception as e:
|
|
617
668
|
print(f"Warning: Skipped Fireworks secret registration due to error: {e}")
|
|
618
669
|
|
|
@@ -5,6 +5,23 @@ from typing import Any, Dict, List
|
|
|
5
5
|
import requests
|
|
6
6
|
|
|
7
7
|
|
|
8
|
+
def get_user_agent() -> str:
|
|
9
|
+
"""
|
|
10
|
+
Returns the user-agent string for eval-protocol CLI requests.
|
|
11
|
+
|
|
12
|
+
Format: eval-protocol-cli/{version}
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
User-agent string identifying the eval-protocol CLI and version.
|
|
16
|
+
"""
|
|
17
|
+
try:
|
|
18
|
+
from . import __version__
|
|
19
|
+
|
|
20
|
+
return f"eval-protocol/{__version__}"
|
|
21
|
+
except Exception:
|
|
22
|
+
return "eval-protocol/unknown"
|
|
23
|
+
|
|
24
|
+
|
|
8
25
|
def load_jsonl(file_path: str) -> List[Dict[str, Any]]:
|
|
9
26
|
"""
|
|
10
27
|
Reads a JSONL file where each line is a valid JSON object and returns a list of these objects.
|
|
@@ -20,6 +20,7 @@ from eval_protocol.auth import (
|
|
|
20
20
|
get_fireworks_api_key,
|
|
21
21
|
verify_api_key_and_get_account_id,
|
|
22
22
|
)
|
|
23
|
+
from eval_protocol.common_utils import get_user_agent
|
|
23
24
|
from eval_protocol.typed_interface import EvaluationMode
|
|
24
25
|
|
|
25
26
|
from eval_protocol.get_pep440_version import get_pep440_version
|
|
@@ -405,6 +406,7 @@ class Evaluator:
|
|
|
405
406
|
headers = {
|
|
406
407
|
"Authorization": f"Bearer {auth_token}",
|
|
407
408
|
"Content-Type": "application/json",
|
|
409
|
+
"User-Agent": get_user_agent(),
|
|
408
410
|
}
|
|
409
411
|
logger.info(f"Previewing evaluator using API endpoint: {url} with account: {account_id}")
|
|
410
412
|
logger.debug(f"Preview API Request URL: {url}")
|
|
@@ -748,6 +750,7 @@ class Evaluator:
|
|
|
748
750
|
headers = {
|
|
749
751
|
"Authorization": f"Bearer {auth_token}",
|
|
750
752
|
"Content-Type": "application/json",
|
|
753
|
+
"User-Agent": get_user_agent(),
|
|
751
754
|
}
|
|
752
755
|
|
|
753
756
|
self._ensure_requirements_present(os.getcwd())
|
|
@@ -11,6 +11,7 @@ from typing import Any, Callable, Dict, Iterable, Optional, Tuple
|
|
|
11
11
|
import requests
|
|
12
12
|
|
|
13
13
|
from .auth import get_fireworks_account_id, get_fireworks_api_base, get_fireworks_api_key
|
|
14
|
+
from .common_utils import get_user_agent
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
def _map_api_host_to_app_host(api_base: str) -> str:
|
|
@@ -157,12 +158,17 @@ def create_dataset_from_jsonl(
|
|
|
157
158
|
display_name: Optional[str],
|
|
158
159
|
jsonl_path: str,
|
|
159
160
|
) -> Tuple[str, Dict[str, Any]]:
|
|
160
|
-
headers = {
|
|
161
|
+
headers = {
|
|
162
|
+
"Authorization": f"Bearer {api_key}",
|
|
163
|
+
"Content-Type": "application/json",
|
|
164
|
+
"User-Agent": get_user_agent(),
|
|
165
|
+
}
|
|
161
166
|
# Count examples quickly
|
|
162
167
|
example_count = 0
|
|
163
168
|
with open(jsonl_path, "r", encoding="utf-8") as f:
|
|
164
169
|
for _ in f:
|
|
165
170
|
example_count += 1
|
|
171
|
+
|
|
166
172
|
dataset_url = f"{api_base.rstrip('/')}/v1/accounts/{account_id}/datasets"
|
|
167
173
|
payload = {
|
|
168
174
|
"dataset": {
|
|
@@ -181,7 +187,10 @@ def create_dataset_from_jsonl(
|
|
|
181
187
|
upload_url = f"{api_base.rstrip('/')}/v1/accounts/{account_id}/datasets/{dataset_id}:upload"
|
|
182
188
|
with open(jsonl_path, "rb") as f:
|
|
183
189
|
files = {"file": f}
|
|
184
|
-
up_headers = {
|
|
190
|
+
up_headers = {
|
|
191
|
+
"Authorization": f"Bearer {api_key}",
|
|
192
|
+
"User-Agent": get_user_agent(),
|
|
193
|
+
}
|
|
185
194
|
up_resp = requests.post(upload_url, files=files, headers=up_headers, timeout=600)
|
|
186
195
|
if up_resp.status_code not in (200, 201):
|
|
187
196
|
raise RuntimeError(f"Dataset upload failed: {up_resp.status_code} {up_resp.text}")
|
|
@@ -195,7 +204,12 @@ def create_reinforcement_fine_tuning_job(
|
|
|
195
204
|
body: Dict[str, Any],
|
|
196
205
|
) -> Dict[str, Any]:
|
|
197
206
|
url = f"{api_base.rstrip('/')}/v1/accounts/{account_id}/reinforcementFineTuningJobs"
|
|
198
|
-
headers = {
|
|
207
|
+
headers = {
|
|
208
|
+
"Authorization": f"Bearer {api_key}",
|
|
209
|
+
"Content-Type": "application/json",
|
|
210
|
+
"Accept": "application/json",
|
|
211
|
+
"User-Agent": get_user_agent(),
|
|
212
|
+
}
|
|
199
213
|
resp = requests.post(url, json=body, headers=headers, timeout=60)
|
|
200
214
|
if resp.status_code not in (200, 201):
|
|
201
215
|
raise RuntimeError(f"RFT job creation failed: {resp.status_code} {resp.text}")
|
|
@@ -13,6 +13,8 @@ import aiohttp
|
|
|
13
13
|
from omegaconf import DictConfig
|
|
14
14
|
from pydantic import BaseModel # Added for new models
|
|
15
15
|
|
|
16
|
+
from ..common_utils import get_user_agent
|
|
17
|
+
|
|
16
18
|
logger = logging.getLogger(__name__)
|
|
17
19
|
|
|
18
20
|
|
|
@@ -101,6 +103,7 @@ class FireworksModelClient(ModelClient):
|
|
|
101
103
|
"Authorization": f"Bearer {self.api_key}",
|
|
102
104
|
"Content-Type": "application/json",
|
|
103
105
|
"Accept": "application/json",
|
|
106
|
+
"User-Agent": get_user_agent(),
|
|
104
107
|
}
|
|
105
108
|
|
|
106
109
|
debug_payload_log = json.loads(json.dumps(payload))
|
|
@@ -11,6 +11,7 @@ from eval_protocol.auth import (
|
|
|
11
11
|
get_fireworks_api_base,
|
|
12
12
|
get_fireworks_api_key,
|
|
13
13
|
)
|
|
14
|
+
from eval_protocol.common_utils import get_user_agent
|
|
14
15
|
|
|
15
16
|
logger = logging.getLogger(__name__)
|
|
16
17
|
|
|
@@ -95,6 +96,7 @@ def create_or_update_fireworks_secret(
|
|
|
95
96
|
headers = {
|
|
96
97
|
"Authorization": f"Bearer {resolved_api_key}",
|
|
97
98
|
"Content-Type": "application/json",
|
|
99
|
+
"User-Agent": get_user_agent(),
|
|
98
100
|
}
|
|
99
101
|
|
|
100
102
|
# The secret_id for GET/PATCH/DELETE operations is the key_name.
|
|
@@ -107,10 +109,10 @@ def create_or_update_fireworks_secret(
|
|
|
107
109
|
|
|
108
110
|
# Check if secret exists using GET (path uses normalized resource id)
|
|
109
111
|
resource_id = _normalize_secret_resource_id(key_name)
|
|
110
|
-
get_url = f"{resolved_api_base.rstrip('/')}/v1/accounts/{resolved_account_id}/secrets/{resource_id}"
|
|
111
112
|
secret_exists = False
|
|
112
113
|
try:
|
|
113
|
-
|
|
114
|
+
url = f"{resolved_api_base}/v1/accounts/{resolved_account_id}/secrets/{resource_id}"
|
|
115
|
+
response = requests.get(url, headers=headers, timeout=10)
|
|
114
116
|
if response.status_code == 200:
|
|
115
117
|
secret_exists = True
|
|
116
118
|
logger.info(f"Secret '{key_name}' already exists. Will attempt to update.")
|
|
@@ -131,7 +133,6 @@ def create_or_update_fireworks_secret(
|
|
|
131
133
|
|
|
132
134
|
if secret_exists:
|
|
133
135
|
# Update existing secret (PATCH)
|
|
134
|
-
patch_url = f"{resolved_api_base.rstrip('/')}/v1/accounts/{resolved_account_id}/secrets/{resource_id}"
|
|
135
136
|
# Body for PATCH requires 'keyName' and 'value'.
|
|
136
137
|
# Transform key_name for payload: uppercase and underscores
|
|
137
138
|
payload_key_name = key_name.upper().replace("-", "_")
|
|
@@ -146,7 +147,8 @@ def create_or_update_fireworks_secret(
|
|
|
146
147
|
payload = {"keyName": payload_key_name, "value": secret_value}
|
|
147
148
|
try:
|
|
148
149
|
logger.debug(f"PATCH payload for '{key_name}': {payload}")
|
|
149
|
-
|
|
150
|
+
url = f"{resolved_api_base}/v1/accounts/{resolved_account_id}/secrets/{resource_id}"
|
|
151
|
+
response = requests.patch(url, json=payload, headers=headers, timeout=30)
|
|
150
152
|
response.raise_for_status()
|
|
151
153
|
logger.info(f"Successfully updated secret '{key_name}' on Fireworks platform.")
|
|
152
154
|
return True
|
|
@@ -158,7 +160,6 @@ def create_or_update_fireworks_secret(
|
|
|
158
160
|
return False
|
|
159
161
|
else:
|
|
160
162
|
# Create new secret (POST)
|
|
161
|
-
post_url = f"{resolved_api_base.rstrip('/')}/v1/accounts/{resolved_account_id}/secrets"
|
|
162
163
|
# Body for POST is gatewaySecret. 'name' field in payload is the resource path.
|
|
163
164
|
# Let's assume for POST, the 'name' in payload can be omitted or is the key_name.
|
|
164
165
|
# The API should ideally use 'keyName' from URL or a specific 'secretId' in payload for creation if 'name' is server-assigned.
|
|
@@ -183,7 +184,8 @@ def create_or_update_fireworks_secret(
|
|
|
183
184
|
}
|
|
184
185
|
try:
|
|
185
186
|
logger.debug(f"POST payload for '{key_name}': {payload}")
|
|
186
|
-
|
|
187
|
+
url = f"{resolved_api_base}/v1/accounts/{resolved_account_id}/secrets"
|
|
188
|
+
response = requests.post(url, json=payload, headers=headers, timeout=30)
|
|
187
189
|
response.raise_for_status()
|
|
188
190
|
logger.info(
|
|
189
191
|
f"Successfully created secret '{key_name}' on Fireworks platform. Full name: {response.json().get('name')}"
|
|
@@ -217,11 +219,14 @@ def get_fireworks_secret(
|
|
|
217
219
|
logger.error("Missing Fireworks API key, base URL, or account ID for getting secret.")
|
|
218
220
|
return None
|
|
219
221
|
|
|
220
|
-
headers = {
|
|
222
|
+
headers = {
|
|
223
|
+
"Authorization": f"Bearer {resolved_api_key}",
|
|
224
|
+
"User-Agent": get_user_agent(),
|
|
225
|
+
}
|
|
221
226
|
resource_id = _normalize_secret_resource_id(key_name)
|
|
222
|
-
url = f"{resolved_api_base.rstrip('/')}/v1/accounts/{resolved_account_id}/secrets/{resource_id}"
|
|
223
227
|
|
|
224
228
|
try:
|
|
229
|
+
url = f"{resolved_api_base}/v1/accounts/{resolved_account_id}/secrets/{resource_id}"
|
|
225
230
|
response = requests.get(url, headers=headers, timeout=10)
|
|
226
231
|
if response.status_code == 200:
|
|
227
232
|
logger.info(f"Successfully retrieved secret '{key_name}'.")
|
|
@@ -254,11 +259,14 @@ def delete_fireworks_secret(
|
|
|
254
259
|
logger.error("Missing Fireworks API key, base URL, or account ID for deleting secret.")
|
|
255
260
|
return False
|
|
256
261
|
|
|
257
|
-
headers = {
|
|
262
|
+
headers = {
|
|
263
|
+
"Authorization": f"Bearer {resolved_api_key}",
|
|
264
|
+
"User-Agent": get_user_agent(),
|
|
265
|
+
}
|
|
258
266
|
resource_id = _normalize_secret_resource_id(key_name)
|
|
259
|
-
url = f"{resolved_api_base.rstrip('/')}/v1/accounts/{resolved_account_id}/secrets/{resource_id}"
|
|
260
267
|
|
|
261
268
|
try:
|
|
269
|
+
url = f"{resolved_api_base}/v1/accounts/{resolved_account_id}/secrets/{resource_id}"
|
|
262
270
|
response = requests.delete(url, headers=headers, timeout=30)
|
|
263
271
|
if response.status_code == 200 or response.status_code == 204: # 204 No Content is also success for DELETE
|
|
264
272
|
logger.info(f"Successfully deleted secret '{key_name}'.")
|
|
@@ -7,9 +7,11 @@ import pathlib
|
|
|
7
7
|
import re
|
|
8
8
|
from typing import Any
|
|
9
9
|
|
|
10
|
+
from eval_protocol.common_utils import get_user_agent
|
|
10
11
|
from eval_protocol.directory_utils import find_eval_protocol_dir
|
|
11
12
|
from eval_protocol.models import EvaluationRow
|
|
12
13
|
from eval_protocol.pytest.store_experiment_link import store_experiment_link
|
|
14
|
+
|
|
13
15
|
import requests
|
|
14
16
|
|
|
15
17
|
|
|
@@ -127,10 +129,14 @@ def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name:
|
|
|
127
129
|
)
|
|
128
130
|
continue
|
|
129
131
|
|
|
130
|
-
|
|
132
|
+
api_base = "https://api.fireworks.ai"
|
|
133
|
+
headers = {
|
|
134
|
+
"Authorization": f"Bearer {fireworks_api_key}",
|
|
135
|
+
"Content-Type": "application/json",
|
|
136
|
+
"User-Agent": get_user_agent(),
|
|
137
|
+
}
|
|
131
138
|
|
|
132
139
|
# Make dataset first
|
|
133
|
-
dataset_url = f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/datasets"
|
|
134
140
|
|
|
135
141
|
dataset_payload = { # pyright: ignore[reportUnknownVariableType]
|
|
136
142
|
"dataset": {
|
|
@@ -142,6 +148,7 @@ def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name:
|
|
|
142
148
|
"datasetId": dataset_name,
|
|
143
149
|
}
|
|
144
150
|
|
|
151
|
+
dataset_url = f"{api_base}/v1/accounts/{fireworks_account_id}/datasets"
|
|
145
152
|
dataset_response = requests.post(dataset_url, json=dataset_payload, headers=headers) # pyright: ignore[reportUnknownArgumentType]
|
|
146
153
|
|
|
147
154
|
# Skip if dataset creation failed
|
|
@@ -157,13 +164,13 @@ def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name:
|
|
|
157
164
|
dataset_id = dataset_data.get("datasetId", dataset_name) # pyright: ignore[reportAny]
|
|
158
165
|
|
|
159
166
|
# Upload the JSONL file content
|
|
160
|
-
upload_url =
|
|
161
|
-
f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/datasets/{dataset_id}:upload"
|
|
162
|
-
)
|
|
163
|
-
upload_headers = {"Authorization": f"Bearer {fireworks_api_key}"}
|
|
164
|
-
|
|
167
|
+
upload_url = f"{api_base}/v1/accounts/{fireworks_account_id}/datasets/{dataset_id}:upload"
|
|
165
168
|
with open(exp_file, "rb") as f:
|
|
166
169
|
files = {"file": f}
|
|
170
|
+
upload_headers = {
|
|
171
|
+
"Authorization": f"Bearer {fireworks_api_key}",
|
|
172
|
+
"User-Agent": get_user_agent(),
|
|
173
|
+
}
|
|
167
174
|
upload_response = requests.post(upload_url, files=files, headers=upload_headers)
|
|
168
175
|
|
|
169
176
|
# Skip if upload failed
|
|
@@ -176,7 +183,6 @@ def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name:
|
|
|
176
183
|
continue
|
|
177
184
|
|
|
178
185
|
# Create evaluation job (optional - don't skip experiment if this fails)
|
|
179
|
-
eval_job_url = f"https://api.fireworks.ai/v1/accounts/{fireworks_account_id}/evaluationJobs"
|
|
180
186
|
# Truncate job ID to fit 63 character limit
|
|
181
187
|
job_id_base = f"{dataset_name}-job"
|
|
182
188
|
if len(job_id_base) > 63:
|
|
@@ -194,6 +200,7 @@ def handle_persist_flow(all_results: list[list[EvaluationRow]], test_func_name:
|
|
|
194
200
|
},
|
|
195
201
|
}
|
|
196
202
|
|
|
203
|
+
eval_job_url = f"{api_base}/v1/accounts/{fireworks_account_id}/evaluationJobs"
|
|
197
204
|
eval_response = requests.post(eval_job_url, json=eval_job_payload, headers=headers)
|
|
198
205
|
|
|
199
206
|
if eval_response.status_code in [200, 201]:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.80
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|