eval-protocol 0.2.40__tar.gz → 0.2.41__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.2.40/eval_protocol.egg-info → eval_protocol-0.2.41}/PKG-INFO +1 -1
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/_version.py +3 -3
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/adapters/fireworks_tracing.py +51 -13
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/remote_rollout_processor.py +5 -2
- {eval_protocol-0.2.40 → eval_protocol-0.2.41/eval_protocol.egg-info}/PKG-INFO +1 -1
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/LICENSE +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/README.md +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/development/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/development/utils/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/adapters/base.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/adapters/bigquery.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/adapters/langchain.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/adapters/langsmith.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/adapters/openai_responses.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/adapters/utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/auth.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/benchmarks/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/benchmarks/test_aime25.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/benchmarks/test_gpqa.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/cli.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/cli_commands/deploy.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/cli_commands/preview.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/cli_commands/upload.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/common_utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/config.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/data_loader/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/data_loader/factory_data_loader.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/data_loader/inline_data_loader.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/data_loader/models.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/directory_utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/evaluation.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/event_bus/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/generation/clients.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/log_utils/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp/execution/manager.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_servers/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_servers/tau2/README.md +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_servers/tau2/server.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/models.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/platform_api.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/evaluation_test.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/exception_config.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/execution.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/handle_persist_flow.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/parameterize.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/plugin.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/rollout_processor.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/store_experiment_link.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/store_results_url.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/validate_signature.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/quickstart/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/quickstart/llm_judge.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/quickstart/llm_judge_langfuse.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/quickstart/llm_judge_langsmith.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/quickstart/llm_judge_openai_responses.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/quickstart/utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/server.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/types/errors.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/types/remote_rollout_processor.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/utils/check_server_status.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/utils/logs_models.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/utils/show_results_url.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/utils/subprocess_utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol.egg-info/SOURCES.txt +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol.egg-info/requires.txt +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/pyproject.toml +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/setup.cfg +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/setup.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_auth.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_cli.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_config.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_deploy_integration.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_evaluation_integration.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_evaluation_postprocess.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_evaluation_preview_integration.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_format.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_human_id.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_integration.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_length.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_logs_server.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_logs_server_simple.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_math.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_minimal.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_models.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_packaging.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_quickstart_utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_readiness.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_repetition.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_retry_mechanism.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_server.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_show_results_url.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_status_migration_changes.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_status_migration_integration.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_status_model.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_tau_bench_airline_smoke.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/tests/test_vite_server.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/data/domains/airline/policy.md +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/data/domains/mock/policy.md +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/data/domains/retail/policy.md +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/versioneer.py +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vite-app/dist/assets/index-D3tKqxWU.js +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vite-app/dist/assets/index-D3tKqxWU.js.map +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vite-app/dist/assets/index-DpYZaoAr.css +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
- {eval_protocol-0.2.40 → eval_protocol-0.2.41}/vite-app/dist/index.html +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.41
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-10-
|
|
11
|
+
"date": "2025-10-07T15:43:37-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.2.
|
|
14
|
+
"full-revisionid": "289abc56fc56935b45b11da011712fe48d956af1",
|
|
15
|
+
"version": "0.2.41"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -7,6 +7,7 @@ to pull data from Langfuse deployments with simplified retry logic handling.
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
import logging
|
|
9
9
|
import requests
|
|
10
|
+
import time
|
|
10
11
|
from datetime import datetime
|
|
11
12
|
from typing import Any, Dict, List, Optional, Protocol
|
|
12
13
|
|
|
@@ -280,8 +281,9 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
280
281
|
from_timestamp: Optional[datetime] = None,
|
|
281
282
|
to_timestamp: Optional[datetime] = None,
|
|
282
283
|
include_tool_calls: bool = True,
|
|
283
|
-
|
|
284
|
-
|
|
284
|
+
backend_sleep_between_gets: float = 0.1,
|
|
285
|
+
backend_max_retries: int = 3,
|
|
286
|
+
proxy_max_retries: int = 3,
|
|
285
287
|
span_name: Optional[str] = None,
|
|
286
288
|
converter: Optional[TraceDictConverter] = None,
|
|
287
289
|
) -> List[EvaluationRow]:
|
|
@@ -303,8 +305,9 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
303
305
|
from_timestamp: Explicit start time (ISO format)
|
|
304
306
|
to_timestamp: Explicit end time (ISO format)
|
|
305
307
|
include_tool_calls: Whether to include tool calling traces
|
|
306
|
-
|
|
307
|
-
|
|
308
|
+
backend_sleep_between_gets: Sleep time between backend trace fetches (passed to proxy)
|
|
309
|
+
backend_max_retries: Maximum retries for backend operations (passed to proxy)
|
|
310
|
+
proxy_max_retries: Maximum retries when proxy returns 404 (client-side retries with exponential backoff)
|
|
308
311
|
span_name: If provided, extract messages from generations within this named span
|
|
309
312
|
converter: Optional custom converter implementing TraceDictConverter protocol.
|
|
310
313
|
If provided, this will be used instead of the default conversion logic.
|
|
@@ -336,25 +339,60 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
336
339
|
"hours_back": hours_back,
|
|
337
340
|
"from_timestamp": from_timestamp.isoformat() if from_timestamp else None,
|
|
338
341
|
"to_timestamp": to_timestamp.isoformat() if to_timestamp else None,
|
|
339
|
-
"sleep_between_gets":
|
|
340
|
-
"max_retries":
|
|
342
|
+
"sleep_between_gets": backend_sleep_between_gets,
|
|
343
|
+
"max_retries": backend_max_retries,
|
|
341
344
|
}
|
|
342
345
|
|
|
343
346
|
# Remove None values
|
|
344
347
|
params = {k: v for k, v in params.items() if v is not None}
|
|
345
348
|
|
|
346
|
-
# Make request to proxy
|
|
349
|
+
# Make request to proxy with retry logic
|
|
347
350
|
if self.project_id:
|
|
348
351
|
url = f"{self.base_url}/v1/project_id/{self.project_id}/traces"
|
|
349
352
|
else:
|
|
350
353
|
url = f"{self.base_url}/v1/traces"
|
|
351
354
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
355
|
+
# Retry loop for handling backend indexing delays (proxy returns 404)
|
|
356
|
+
result = None
|
|
357
|
+
for attempt in range(proxy_max_retries):
|
|
358
|
+
try:
|
|
359
|
+
response = requests.get(url, params=params, timeout=self.timeout)
|
|
360
|
+
response.raise_for_status()
|
|
361
|
+
result = response.json()
|
|
362
|
+
break # Success, exit retry loop
|
|
363
|
+
except requests.exceptions.HTTPError as e:
|
|
364
|
+
error_msg = str(e)
|
|
365
|
+
should_retry = False
|
|
366
|
+
|
|
367
|
+
# Try to extract detail message from response
|
|
368
|
+
if e.response is not None:
|
|
369
|
+
try:
|
|
370
|
+
error_detail = e.response.json().get("detail", "")
|
|
371
|
+
error_msg = error_detail or e.response.text
|
|
372
|
+
|
|
373
|
+
# Retry on 404 if it's due to incomplete/missing traces (backend still indexing)
|
|
374
|
+
if e.response.status_code == 404 and (
|
|
375
|
+
"Incomplete traces" in error_detail or "No traces found" in error_detail
|
|
376
|
+
):
|
|
377
|
+
should_retry = True
|
|
378
|
+
except Exception:
|
|
379
|
+
error_msg = e.response.text
|
|
380
|
+
|
|
381
|
+
if should_retry and attempt < proxy_max_retries - 1:
|
|
382
|
+
sleep_time = 2 ** (attempt + 1)
|
|
383
|
+
logger.warning(error_msg)
|
|
384
|
+
time.sleep(sleep_time)
|
|
385
|
+
else:
|
|
386
|
+
# Final retry or non-retryable error
|
|
387
|
+
logger.error("Failed to fetch traces from proxy: %s", error_msg)
|
|
388
|
+
return eval_rows
|
|
389
|
+
except requests.exceptions.RequestException as e:
|
|
390
|
+
# Non-HTTP errors (network issues, timeouts, etc.)
|
|
391
|
+
logger.error("Failed to fetch traces from proxy: %s", str(e))
|
|
392
|
+
return eval_rows
|
|
393
|
+
|
|
394
|
+
if result is None:
|
|
395
|
+
logger.error("Failed to fetch traces after %d retries", proxy_max_retries)
|
|
358
396
|
return eval_rows
|
|
359
397
|
|
|
360
398
|
# Extract traces from response
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/remote_rollout_processor.py
RENAMED
|
@@ -58,7 +58,7 @@ def _default_output_data_loader(config: DataLoaderConfig) -> DynamicDataLoader:
|
|
|
58
58
|
def fetch_traces() -> List[EvaluationRow]:
|
|
59
59
|
base_url = config.model_base_url or "https://tracing.fireworks.ai"
|
|
60
60
|
adapter = FireworksTracingAdapter(base_url=base_url)
|
|
61
|
-
return adapter.get_evaluation_rows(tags=[f"rollout_id:{config.rollout_id}"],
|
|
61
|
+
return adapter.get_evaluation_rows(tags=[f"rollout_id:{config.rollout_id}"], proxy_max_retries=5)
|
|
62
62
|
|
|
63
63
|
return DynamicDataLoader(generators=[fetch_traces], preprocess_fn=filter_longest_conversation)
|
|
64
64
|
|
|
@@ -188,7 +188,10 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
188
188
|
raise ValueError("Rollout ID is required in RemoteRolloutProcessor")
|
|
189
189
|
|
|
190
190
|
final_model_base_url = model_base_url
|
|
191
|
-
if model_base_url and
|
|
191
|
+
if model_base_url and (
|
|
192
|
+
model_base_url.startswith("https://tracing.fireworks.ai")
|
|
193
|
+
or model_base_url.startswith("http://localhost")
|
|
194
|
+
):
|
|
192
195
|
final_model_base_url = _build_fireworks_tracing_url(model_base_url, meta)
|
|
193
196
|
|
|
194
197
|
init_payload: InitRequest = InitRequest(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.41
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resources/bfcl_envs/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resources/bfcl_envs/math_api.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resources/bfcl_envs/posting_api.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resources/bfcl_sim_api_resource.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resources/docker_resource.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resources/filesystem_resource.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/agent/resources/python_state_resource.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/benchmarks/data/airline_dataset.jsonl
RENAMED
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/benchmarks/data/retail_dataset.jsonl
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/benchmarks/test_tau_bench_airline.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/benchmarks/test_tau_bench_retail.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/data_loader/dynamic_data_loader.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/data_loader/factory_data_loader.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/data_loader/inline_data_loader.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/dataset_logger/dataset_logger.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/event_bus/sqlite_event_bus_database.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/log_utils/elasticsearch_client.py
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/log_utils/elasticsearch_index_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_agent/orchestration/__init__.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_agent/orchestration/base_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_servers/frozen_lake/server.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/default_dataset_adapter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.40 → eval_protocol-0.2.41}/eval_protocol/pytest/evaluation_test_postprocess.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|