eval-protocol 0.3.24__tar.gz → 0.3.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.3.24/eval_protocol.egg-info → eval_protocol-0.3.26}/PKG-INFO +1 -1
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/_version.py +3 -3
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/evaluation_test.py +16 -10
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/evaluation_test_utils.py +18 -2
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/exception_config.py +4 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/remote_rollout_processor.py +3 -2
- {eval_protocol-0.3.24 → eval_protocol-0.3.26/eval_protocol.egg-info}/PKG-INFO +1 -1
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/LICENSE +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/README.md +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/development/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/development/utils/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/adapters/base.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/adapters/bigquery.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/adapters/dataframe.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/adapters/fireworks_tracing.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/adapters/langchain.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/adapters/langsmith.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/adapters/openai_responses.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/adapters/utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/adapters/weave.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/auth.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/benchmarks/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/benchmarks/test_aime25.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/benchmarks/test_glm_streaming_compliance.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/benchmarks/test_gpqa.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/cli.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/cli_commands/create_rft.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/cli_commands/export_docs.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/cli_commands/local_test.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/cli_commands/upload.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/cli_commands/utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/common_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/config.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/data_loader/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/data_loader/factory_data_loader.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/data_loader/inline_data_loader.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/data_loader/jsonl_data_loader.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/data_loader/models.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/directory_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/evaluation.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/event_bus/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/exceptions.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/fireworks_rft.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/generation/clients.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/integrations/fireworks_v1_completions_client.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/integrations/openai_rft.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/integrations/tinker_cookbook.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/integrations/tinker_rollout_processor.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/log_utils/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/log_utils/init.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/log_utils/rollout_context.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/log_utils/util.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/execution/manager.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/execution/vllm_policy.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_servers/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_servers/tau2/README.md +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_servers/tau2/server.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/models.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/platform_api.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/proxy/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/proxy/proxy_core/app.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/proxy/proxy_core/auth.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/proxy/proxy_core/main.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/proxy/proxy_core/models.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/buffer.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/default_klavis_sandbox_rollout_processor.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/execution.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/github_action_rollout_processor.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/handle_persist_flow.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/integrations/openenv_trl_vllm.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/openenv_rollout_processor.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/parameterize.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/plugin.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/priority_scheduler.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/rollout_processor.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/rollout_result_post_processor.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/store_experiment_link.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/store_results_url.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/tracing_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/validate_signature.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/quickstart/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/quickstart/llm_judge.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/quickstart/svg_agent/evaluator/test_svgagent.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/quickstart/svg_agent/evaluator/utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/quickstart/utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/server.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/training/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/training/gepa_trainer.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/training/gepa_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/training/trainer.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/training/utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/types/errors.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/types/remote_rollout_processor.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/utils/browser_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/utils/check_server_status.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/utils/evaluation_row_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/utils/logs_models.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/utils/show_results_url.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/utils/subprocess_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol.egg-info/SOURCES.txt +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol.egg-info/requires.txt +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/pyproject.toml +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/setup.cfg +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/setup.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_auth.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_cli_create_rft.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_cli_local_test.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_cli_startup_benchmark.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_config.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_directory_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_ep_upload_e2e.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_evaluation_postprocess.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_event_bus_helper.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_exception_config.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_exceptions.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_fireworks_v1_completions_client.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_format.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_human_id.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_integration.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_length.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_litellm_policy_provider_fields.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_logs_server.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_logs_server_simple.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_math.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_message_field_filtering.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_minimal.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_models.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_no_implicit_dotenv.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_openai_rft_integration.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_packaging.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_priority_scheduler.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_quickstart_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_readiness.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_repetition.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_retry_mechanism.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_rollout_logprobs.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_server.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_show_results_url.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_sqlite_hardening.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_status_migration_changes.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_status_migration_integration.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_status_model.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_tau_bench_airline_smoke.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_training_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_upload_entrypoint.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/tests/test_vite_server.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/data/domains/airline/policy.md +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/data/domains/mock/policy.md +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/data/domains/retail/policy.md +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/versioneer.py +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vite-app/dist/assets/index-DFeF7AG_.js +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vite-app/dist/assets/index-DFeF7AG_.js.map +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vite-app/dist/assets/index-DvKW7FQL.css +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
- {eval_protocol-0.3.24 → eval_protocol-0.3.26}/vite-app/dist/index.html +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.26
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2026-03-
|
|
11
|
+
"date": "2026-03-22T23:53:03-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.3.
|
|
14
|
+
"full-revisionid": "b0cbc2584647484c26662944ba59bd137f42a33f",
|
|
15
|
+
"version": "0.3.26"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -211,7 +211,7 @@ def evaluation_test(
|
|
|
211
211
|
completion_params = parse_ep_completion_params_overwrite(completion_params)
|
|
212
212
|
original_completion_params = completion_params
|
|
213
213
|
passed_threshold = parse_ep_passed_threshold(passed_threshold)
|
|
214
|
-
data_loaders = parse_ep_dataloaders(data_loaders)
|
|
214
|
+
data_loaders = parse_ep_dataloaders(data_loaders, dataset_adapter=dataset_adapter)
|
|
215
215
|
custom_invocation_id = os.environ.get("EP_INVOCATION_ID", None)
|
|
216
216
|
|
|
217
217
|
# ignore other data input params when dataloader is provided
|
|
@@ -449,6 +449,8 @@ def evaluation_test(
|
|
|
449
449
|
finally:
|
|
450
450
|
if output_buffer:
|
|
451
451
|
await output_buffer.close()
|
|
452
|
+
await rollout_processor.acleanup()
|
|
453
|
+
rollout_processor.cleanup()
|
|
452
454
|
|
|
453
455
|
for res in priority_results:
|
|
454
456
|
run_idx = (res.execution_metadata.extra or {}).get("run_index", 0)
|
|
@@ -697,15 +699,19 @@ def evaluation_test(
|
|
|
697
699
|
# Lazy import (cached after first import above)
|
|
698
700
|
from eval_protocol.pytest.default_mcp_gym_rollout_processor import MCPGymRolloutProcessor
|
|
699
701
|
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
702
|
+
try:
|
|
703
|
+
if isinstance(rollout_processor, MCPGymRolloutProcessor):
|
|
704
|
+
# For MCPGymRolloutProcessor, create and execute tasks one at a time to avoid port conflicts
|
|
705
|
+
for run_idx in range(num_runs):
|
|
706
|
+
task = asyncio.create_task(execute_run(run_idx, config))
|
|
707
|
+
await task
|
|
708
|
+
else:
|
|
709
|
+
# For other processors, create all tasks at once and run in parallel
|
|
710
|
+
# Concurrency is now controlled by the shared semaphore in each rollout processor
|
|
711
|
+
await run_tasks_with_run_progress(execute_run, num_runs, config)
|
|
712
|
+
finally:
|
|
713
|
+
await rollout_processor.acleanup()
|
|
714
|
+
rollout_processor.cleanup()
|
|
709
715
|
|
|
710
716
|
experiment_duration_seconds = time.perf_counter() - experiment_start_time
|
|
711
717
|
|
|
@@ -21,6 +21,7 @@ from eval_protocol.models import (
|
|
|
21
21
|
EvaluationThresholdDict,
|
|
22
22
|
Status,
|
|
23
23
|
)
|
|
24
|
+
from eval_protocol.common_utils import load_jsonl
|
|
24
25
|
from eval_protocol.data_loader import DynamicDataLoader
|
|
25
26
|
from eval_protocol.data_loader.models import EvaluationDataLoader
|
|
26
27
|
from eval_protocol.pytest.rollout_processor import RolloutProcessor
|
|
@@ -288,10 +289,21 @@ def _rows_from_jsonl(path: str) -> list[EvaluationRow]:
|
|
|
288
289
|
|
|
289
290
|
def parse_ep_dataloaders(
|
|
290
291
|
dataloaders: Sequence[EvaluationDataLoader] | EvaluationDataLoader | None,
|
|
292
|
+
*,
|
|
293
|
+
dataset_adapter: Callable[[list[dict[str, Any]]], list[EvaluationRow]] | None = None,
|
|
291
294
|
) -> Sequence[EvaluationDataLoader] | EvaluationDataLoader | None:
|
|
295
|
+
"""When ``EP_JSONL_PATH`` is set, load JSONL as raw dicts and run ``dataset_adapter`` if provided.
|
|
296
|
+
|
|
297
|
+
Without ``dataset_adapter``, rows are built with ``EvaluationRow(**dict)`` (legacy behavior),
|
|
298
|
+
which skips custom label fields that adapters normally attach.
|
|
299
|
+
"""
|
|
292
300
|
try:
|
|
293
301
|
load_from_jsonl_path = os.getenv("EP_JSONL_PATH")
|
|
294
302
|
if load_from_jsonl_path:
|
|
303
|
+
if dataset_adapter is not None:
|
|
304
|
+
return DynamicDataLoader(
|
|
305
|
+
generators=[lambda path=load_from_jsonl_path, da=dataset_adapter: da(load_jsonl(path))]
|
|
306
|
+
)
|
|
295
307
|
return DynamicDataLoader(generators=[lambda path=load_from_jsonl_path: _rows_from_jsonl(path)])
|
|
296
308
|
except Exception:
|
|
297
309
|
pass
|
|
@@ -476,8 +488,12 @@ async def rollout_processor_with_retry(
|
|
|
476
488
|
yield result
|
|
477
489
|
|
|
478
490
|
finally:
|
|
479
|
-
|
|
480
|
-
|
|
491
|
+
# Cleanup is intentionally NOT called here. rollout_processor_with_retry
|
|
492
|
+
# is invoked per-run, but the processor (and its session) is shared
|
|
493
|
+
# across parallel runs. Closing per-run would kill in-flight requests
|
|
494
|
+
# in other runs. Cleanup is called once after all runs complete in
|
|
495
|
+
# evaluation_test.py.
|
|
496
|
+
pass
|
|
481
497
|
|
|
482
498
|
|
|
483
499
|
def sanitize_filename(text: str) -> str:
|
|
@@ -23,6 +23,7 @@ def get_default_retryable_exceptions() -> Set[Type[Exception]]:
|
|
|
23
23
|
return _default_retryable_exceptions
|
|
24
24
|
|
|
25
25
|
# Lazy imports (these are expensive)
|
|
26
|
+
import aiohttp
|
|
26
27
|
import httpx
|
|
27
28
|
import litellm
|
|
28
29
|
import requests
|
|
@@ -32,6 +33,9 @@ def get_default_retryable_exceptions() -> Set[Type[Exception]]:
|
|
|
32
33
|
ConnectionError, # type: ignore[assignment]
|
|
33
34
|
TimeoutError, # type: ignore[assignment]
|
|
34
35
|
OSError, # type: ignore[assignment] # Covers network-related OS errors
|
|
36
|
+
# aiohttp library exceptions
|
|
37
|
+
aiohttp.ClientConnectionError,
|
|
38
|
+
aiohttp.ServerDisconnectedError,
|
|
35
39
|
# Requests library exceptions
|
|
36
40
|
requests.exceptions.ConnectionError,
|
|
37
41
|
requests.exceptions.Timeout,
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/pytest/remote_rollout_processor.py
RENAMED
|
@@ -104,6 +104,9 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
104
104
|
try:
|
|
105
105
|
session = self._get_or_create_session()
|
|
106
106
|
async with session.post(init_url, json=init_payload.model_dump(), timeout=timeout_init) as resp:
|
|
107
|
+
if resp.status >= 500:
|
|
108
|
+
body = await resp.text()
|
|
109
|
+
raise ConnectionError(f"Remote /init returned server error (HTTP {resp.status}): {body}")
|
|
107
110
|
if resp.status >= 400:
|
|
108
111
|
body = await resp.text()
|
|
109
112
|
raise RuntimeError(f"Remote /init failed (HTTP {resp.status}): {body}")
|
|
@@ -215,8 +218,6 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
215
218
|
loop = asyncio.get_running_loop()
|
|
216
219
|
loop.create_task(self._session.close())
|
|
217
220
|
except RuntimeError:
|
|
218
|
-
# No running event loop - can't safely close the session.
|
|
219
|
-
# The session will be garbage collected eventually, but warn about it.
|
|
220
221
|
logger.warning(
|
|
221
222
|
"RemoteRolloutProcessor.cleanup() called outside of async context. "
|
|
222
223
|
"Session may not be properly closed. Use `await processor.acleanup()` when possible."
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.26
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resources/bfcl_envs/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resources/bfcl_envs/math_api.py
RENAMED
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resources/bfcl_envs/posting_api.py
RENAMED
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resources/bfcl_sim_api_resource.py
RENAMED
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resources/docker_resource.py
RENAMED
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resources/filesystem_resource.py
RENAMED
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/agent/resources/python_state_resource.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/benchmarks/data/airline_dataset.jsonl
RENAMED
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/benchmarks/data/retail_dataset.jsonl
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/benchmarks/test_tau_bench_airline.py
RENAMED
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/benchmarks/test_tau_bench_retail.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/data_loader/dynamic_data_loader.py
RENAMED
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/data_loader/factory_data_loader.py
RENAMED
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/data_loader/inline_data_loader.py
RENAMED
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/data_loader/jsonl_data_loader.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/dataset_logger/dataset_logger.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/event_bus/sqlite_event_bus_database.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/integrations/tinker_rollout_processor.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/log_utils/elasticsearch_client.py
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/log_utils/elasticsearch_index_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_agent/orchestration/__init__.py
RENAMED
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_agent/orchestration/base_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_servers/frozen_lake/server.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.24 → eval_protocol-0.3.26}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|