eval-protocol 0.3.5.dev3__tar.gz → 0.3.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.3.5.dev3/eval_protocol.egg-info → eval_protocol-0.3.7}/PKG-INFO +1 -1
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/_version.py +3 -3
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/proxy/proxy_core/app.py +5 -1
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/proxy/proxy_core/redis_utils.py +9 -2
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/github_action_rollout_processor.py +3 -6
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/remote_rollout_processor.py +3 -6
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7/eval_protocol.egg-info}/PKG-INFO +1 -1
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/LICENSE +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/README.md +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/development/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/development/utils/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/adapters/base.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/adapters/bigquery.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/adapters/dataframe.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/adapters/fireworks_tracing.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/adapters/langchain.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/adapters/langsmith.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/adapters/openai_responses.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/adapters/utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/adapters/weave.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/auth.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/benchmarks/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/benchmarks/test_aime25.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/benchmarks/test_glm_streaming_compliance.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/benchmarks/test_gpqa.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/cli.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/cli_commands/create_rft.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/cli_commands/export_docs.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/cli_commands/local_test.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/cli_commands/upload.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/cli_commands/utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/common_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/config.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/data_loader/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/data_loader/factory_data_loader.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/data_loader/inline_data_loader.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/data_loader/jsonl_data_loader.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/data_loader/models.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/directory_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/evaluation.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/event_bus/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/exceptions.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/fireworks_rft.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/generation/clients.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/integrations/openai_rft.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/integrations/tinker_cookbook.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/integrations/tinker_rollout_processor.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/log_utils/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/log_utils/init.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/log_utils/rollout_context.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/log_utils/util.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/execution/manager.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/execution/vllm_policy.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_servers/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_servers/tau2/README.md +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_servers/tau2/server.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/models.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/platform_api.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/proxy/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/proxy/proxy_core/auth.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/proxy/proxy_core/main.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/proxy/proxy_core/models.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/buffer.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/default_klavis_sandbox_rollout_processor.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/evaluation_test.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/evaluation_test_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/exception_config.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/execution.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/handle_persist_flow.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/integrations/openenv_trl_vllm.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/openenv_rollout_processor.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/parameterize.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/plugin.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/priority_scheduler.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/rollout_processor.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/rollout_result_post_processor.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/store_experiment_link.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/store_results_url.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/tracing_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/validate_signature.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/quickstart/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/quickstart/llm_judge.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/quickstart/svg_agent/evaluator/test_svgagent.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/quickstart/svg_agent/evaluator/utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/quickstart/utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/server.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/training/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/training/gepa_trainer.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/training/gepa_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/training/trainer.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/training/utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/types/errors.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/types/remote_rollout_processor.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/utils/browser_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/utils/check_server_status.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/utils/evaluation_row_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/utils/logs_models.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/utils/show_results_url.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/utils/subprocess_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol.egg-info/SOURCES.txt +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol.egg-info/requires.txt +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/pyproject.toml +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/setup.cfg +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/setup.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_auth.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_cli_create_rft.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_cli_local_test.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_config.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_directory_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_ep_upload_e2e.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_evaluation_postprocess.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_event_bus_helper.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_exception_config.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_exceptions.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_format.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_human_id.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_integration.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_length.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_litellm_policy_provider_fields.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_logs_server.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_logs_server_simple.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_math.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_message_field_filtering.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_minimal.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_models.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_openai_rft_integration.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_packaging.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_priority_scheduler.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_quickstart_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_readiness.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_repetition.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_retry_mechanism.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_rollout_logprobs.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_server.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_show_results_url.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_sqlite_hardening.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_status_migration_changes.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_status_migration_integration.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_status_model.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_tau_bench_airline_smoke.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_training_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_upload_entrypoint.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/tests/test_vite_server.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/data/domains/airline/policy.md +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/data/domains/mock/policy.md +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/data/domains/retail/policy.md +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/versioneer.py +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vite-app/dist/assets/index-CuQbfdPD.js +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vite-app/dist/assets/index-CuQbfdPD.js.map +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vite-app/dist/assets/index-iZp_HgyW.css +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
- {eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/vite-app/dist/index.html +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.7
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2026-01-
|
|
11
|
+
"date": "2026-01-07T01:14:04-0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.3.
|
|
14
|
+
"full-revisionid": "77842b531fff8ab0b70d51205a270f326410ba6e",
|
|
15
|
+
"version": "0.3.7"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -4,7 +4,7 @@ A FastAPI service that sits in front of LiteLLM and extracts metadata from URL p
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from fastapi import FastAPI, Depends, Request, Query
|
|
7
|
-
from typing import Optional, List
|
|
7
|
+
from typing import Optional, List, Callable
|
|
8
8
|
import os
|
|
9
9
|
import redis
|
|
10
10
|
import logging
|
|
@@ -105,6 +105,7 @@ def create_app(
|
|
|
105
105
|
auth_provider: AuthProvider = NoAuthProvider(),
|
|
106
106
|
preprocess_chat_request: Optional[ChatRequestHook] = None,
|
|
107
107
|
preprocess_traces_request: Optional[TracesRequestHook] = None,
|
|
108
|
+
extra_routes: Optional[Callable[[FastAPI], None]] = None,
|
|
108
109
|
) -> FastAPI:
|
|
109
110
|
@asynccontextmanager
|
|
110
111
|
async def lifespan(app: FastAPI):
|
|
@@ -288,6 +289,9 @@ def create_app(
|
|
|
288
289
|
params=params,
|
|
289
290
|
)
|
|
290
291
|
|
|
292
|
+
if extra_routes is not None:
|
|
293
|
+
extra_routes(app)
|
|
294
|
+
|
|
291
295
|
# Health
|
|
292
296
|
@app.get("/health")
|
|
293
297
|
async def health():
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/proxy/proxy_core/redis_utils.py
RENAMED
|
@@ -8,8 +8,12 @@ import redis
|
|
|
8
8
|
|
|
9
9
|
logger = logging.getLogger(__name__)
|
|
10
10
|
|
|
11
|
+
DEFAULT_ROLLOUT_TTL_SECONDS = 60 * 60 * 24
|
|
11
12
|
|
|
12
|
-
|
|
13
|
+
|
|
14
|
+
def register_insertion_id(
|
|
15
|
+
redis_client: redis.Redis, rollout_id: str, insertion_id: str, ttl_seconds: int = DEFAULT_ROLLOUT_TTL_SECONDS
|
|
16
|
+
) -> bool:
|
|
13
17
|
"""Register an insertion_id for a rollout_id in Redis.
|
|
14
18
|
|
|
15
19
|
Tracks all expected completion insertion_ids for this rollout.
|
|
@@ -22,7 +26,10 @@ def register_insertion_id(redis_client: redis.Redis, rollout_id: str, insertion_
|
|
|
22
26
|
True if successful, False otherwise
|
|
23
27
|
"""
|
|
24
28
|
try:
|
|
25
|
-
redis_client.
|
|
29
|
+
pipe = redis_client.pipeline()
|
|
30
|
+
pipe.sadd(rollout_id, insertion_id)
|
|
31
|
+
pipe.expire(rollout_id, int(ttl_seconds))
|
|
32
|
+
pipe.execute()
|
|
26
33
|
logger.info(f"Registered insertion_id {insertion_id} for rollout {rollout_id}")
|
|
27
34
|
return True
|
|
28
35
|
except Exception as e:
|
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import os
|
|
3
3
|
import time
|
|
4
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Dict, List, Optional
|
|
5
5
|
import json
|
|
6
6
|
import requests
|
|
7
7
|
from datetime import datetime, timezone, timedelta
|
|
8
8
|
from eval_protocol.models import EvaluationRow, Status
|
|
9
9
|
from eval_protocol.data_loader.dynamic_data_loader import DynamicDataLoader
|
|
10
|
-
from eval_protocol.types.remote_rollout_processor import DataLoaderConfig
|
|
11
10
|
|
|
12
11
|
from .rollout_processor import RolloutProcessor
|
|
13
12
|
from .types import RolloutProcessorConfig
|
|
@@ -21,7 +20,7 @@ class GithubActionRolloutProcessor(RolloutProcessor):
|
|
|
21
20
|
Expected GitHub Actions workflow:
|
|
22
21
|
- Workflow dispatch with inputs: completion_params, metadata (JSON), model_base_url, api_key
|
|
23
22
|
- Workflow makes API calls that get traced (e.g., via Fireworks tracing proxy)
|
|
24
|
-
- Traces are fetched later via
|
|
23
|
+
- Traces are fetched later via Fireworks tracing proxy using rollout_id tags
|
|
25
24
|
|
|
26
25
|
NOTE: GHA has a rate limit of 5000 requests per hour.
|
|
27
26
|
"""
|
|
@@ -38,7 +37,6 @@ class GithubActionRolloutProcessor(RolloutProcessor):
|
|
|
38
37
|
timeout_seconds: float = 1800.0,
|
|
39
38
|
max_find_workflow_retries: int = 5,
|
|
40
39
|
github_token: Optional[str] = None,
|
|
41
|
-
output_data_loader: Optional[Callable[[DataLoaderConfig], DynamicDataLoader]] = None,
|
|
42
40
|
):
|
|
43
41
|
self.owner = owner
|
|
44
42
|
self.repo = repo
|
|
@@ -52,7 +50,6 @@ class GithubActionRolloutProcessor(RolloutProcessor):
|
|
|
52
50
|
self.timeout_seconds = timeout_seconds
|
|
53
51
|
self.max_find_workflow_retries = max_find_workflow_retries
|
|
54
52
|
self.github_token = github_token
|
|
55
|
-
self._output_data_loader = output_data_loader or default_fireworks_output_data_loader
|
|
56
53
|
|
|
57
54
|
def _headers(self) -> Dict[str, str]:
|
|
58
55
|
headers = {"Accept": "application/vnd.github+json"}
|
|
@@ -200,7 +197,7 @@ class GithubActionRolloutProcessor(RolloutProcessor):
|
|
|
200
197
|
row.execution_metadata.rollout_duration_seconds = time.perf_counter() - start_time
|
|
201
198
|
|
|
202
199
|
def _update_with_trace() -> None:
|
|
203
|
-
return update_row_with_remote_trace(row,
|
|
200
|
+
return update_row_with_remote_trace(row, default_fireworks_output_data_loader, self.model_base_url)
|
|
204
201
|
|
|
205
202
|
await asyncio.to_thread(_update_with_trace)
|
|
206
203
|
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/pytest/remote_rollout_processor.py
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import time
|
|
3
|
-
from typing import Any, Dict, List, Optional
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
4
|
|
|
5
5
|
import requests
|
|
6
6
|
|
|
@@ -26,8 +26,7 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
26
26
|
"""
|
|
27
27
|
Rollout processor that triggers a remote HTTP server to perform the rollout.
|
|
28
28
|
|
|
29
|
-
|
|
30
|
-
You can provide a custom output_data_loader for different tracing backends.
|
|
29
|
+
Fetches traces from the Fireworks tracing proxy using rollout_id tags.
|
|
31
30
|
|
|
32
31
|
See https://evalprotocol.io/tutorial/remote-rollout-processor for documentation.
|
|
33
32
|
"""
|
|
@@ -39,7 +38,6 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
39
38
|
model_base_url: str = "https://tracing.fireworks.ai",
|
|
40
39
|
poll_interval: float = 1.0,
|
|
41
40
|
timeout_seconds: float = 120.0,
|
|
42
|
-
output_data_loader: Optional[Callable[[DataLoaderConfig], DynamicDataLoader]] = None,
|
|
43
41
|
):
|
|
44
42
|
# Prefer constructor-provided configuration. These can be overridden via
|
|
45
43
|
# config.kwargs at call time for backward compatibility.
|
|
@@ -52,7 +50,6 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
52
50
|
self._model_base_url = _ep_model_base_url
|
|
53
51
|
self._poll_interval = poll_interval
|
|
54
52
|
self._timeout_seconds = timeout_seconds
|
|
55
|
-
self._output_data_loader = output_data_loader or default_fireworks_output_data_loader
|
|
56
53
|
self._tracing_adapter = FireworksTracingAdapter(base_url=self._model_base_url)
|
|
57
54
|
|
|
58
55
|
def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) -> List[asyncio.Task[EvaluationRow]]:
|
|
@@ -188,7 +185,7 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
188
185
|
row.execution_metadata.rollout_duration_seconds = time.perf_counter() - start_time
|
|
189
186
|
|
|
190
187
|
def _update_with_trace() -> None:
|
|
191
|
-
return update_row_with_remote_trace(row,
|
|
188
|
+
return update_row_with_remote_trace(row, default_fireworks_output_data_loader, model_base_url)
|
|
192
189
|
|
|
193
190
|
await asyncio.to_thread(_update_with_trace) # Update row with remote trace in-place
|
|
194
191
|
return row
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.7
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/adapters/fireworks_tracing.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/resources/bfcl_envs/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/resources/bfcl_envs/math_api.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/resources/docker_resource.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/agent/resources/sql_resource.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/benchmarks/data/airline_dataset.jsonl
RENAMED
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/benchmarks/data/retail_dataset.jsonl
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/benchmarks/test_frozen_lake.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/benchmarks/test_tau_bench_airline.py
RENAMED
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/benchmarks/test_tau_bench_retail.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/cli_commands/agent_eval_cmd.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/data_loader/dynamic_data_loader.py
RENAMED
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/data_loader/factory_data_loader.py
RENAMED
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/data_loader/inline_data_loader.py
RENAMED
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/data_loader/jsonl_data_loader.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/dataset_logger/dataset_logger.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/event_bus/sqlite_event_bus.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/integrations/tinker_cookbook.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/log_utils/elasticsearch_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/log_utils/rollout_id_filter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp/simple_process_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_agent/orchestration/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.3.5.dev3 → eval_protocol-0.3.7}/eval_protocol/mcp_servers/frozen_lake/server.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|