eval-protocol 0.3.28__tar.gz → 0.3.30__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.3.28/eval_protocol.egg-info → eval_protocol-0.3.30}/PKG-INFO +2 -1
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/_version.py +3 -3
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/adapters/fireworks_tracing.py +47 -1
- eval_protocol-0.3.30/eval_protocol/adapters/lp_deserializer.py +109 -0
- eval_protocol-0.3.30/eval_protocol/adapters/r3_deserializer.py +187 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/remote_rollout_processor.py +29 -5
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/tracing_utils.py +65 -6
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/types/remote_rollout_processor.py +1 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30/eval_protocol.egg-info}/PKG-INFO +2 -1
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol.egg-info/SOURCES.txt +2 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol.egg-info/requires.txt +1 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/pyproject.toml +1 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/LICENSE +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/README.md +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/development/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/development/utils/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/adapters/base.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/adapters/bigquery.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/adapters/dataframe.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/adapters/langchain.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/adapters/langsmith.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/adapters/openai_responses.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/adapters/utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/adapters/weave.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/auth.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/benchmarks/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/benchmarks/test_aime25.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/benchmarks/test_glm_streaming_compliance.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/benchmarks/test_gpqa.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/cli.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/cli_commands/create_rft.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/cli_commands/export_docs.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/cli_commands/local_test.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/cli_commands/upload.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/cli_commands/utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/common_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/config.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/data_loader/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/data_loader/factory_data_loader.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/data_loader/inline_data_loader.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/data_loader/jsonl_data_loader.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/data_loader/models.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/directory_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/evaluation.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/event_bus/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/exceptions.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/fireworks_rft.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/generation/clients.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/integrations/fireworks_v1_completions_client.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/integrations/openai_rft.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/integrations/tinker_cookbook.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/integrations/tinker_rollout_processor.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/log_utils/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/log_utils/init.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/log_utils/rollout_context.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/log_utils/util.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/execution/manager.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/execution/vllm_policy.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_servers/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_servers/tau2/README.md +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_servers/tau2/server.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/models.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/platform_api.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/proxy/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/proxy/proxy_core/app.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/proxy/proxy_core/auth.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/proxy/proxy_core/main.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/proxy/proxy_core/models.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/buffer.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/default_klavis_sandbox_rollout_processor.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/evaluation_test.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/evaluation_test_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/exception_config.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/execution.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/github_action_rollout_processor.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/handle_persist_flow.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/integrations/openenv_trl_vllm.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/openenv_rollout_processor.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/parameterize.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/plugin.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/priority_scheduler.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/rollout_processor.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/rollout_result_post_processor.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/store_experiment_link.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/store_results_url.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/validate_signature.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/quickstart/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/quickstart/llm_judge.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/quickstart/svg_agent/evaluator/test_svgagent.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/quickstart/svg_agent/evaluator/utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/quickstart/utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/server.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/training/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/training/gepa_trainer.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/training/gepa_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/training/trainer.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/training/utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/types/errors.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/utils/browser_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/utils/check_server_status.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/utils/evaluation_row_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/utils/logs_models.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/utils/show_results_url.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/utils/subprocess_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/setup.cfg +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/setup.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_auth.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_cli_create_rft.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_cli_local_test.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_cli_startup_benchmark.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_config.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_directory_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_ep_upload_e2e.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_evaluation_postprocess.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_event_bus_helper.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_exception_config.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_exceptions.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_fireworks_v1_completions_client.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_format.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_human_id.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_integration.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_length.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_litellm_policy_provider_fields.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_logs_server.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_logs_server_simple.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_math.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_message_field_filtering.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_minimal.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_models.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_no_implicit_dotenv.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_openai_rft_integration.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_packaging.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_priority_scheduler.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_quickstart_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_readiness.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_repetition.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_retry_mechanism.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_rollout_logprobs.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_server.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_show_results_url.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_sqlite_hardening.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_status_migration_changes.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_status_migration_integration.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_status_model.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_tau_bench_airline_smoke.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_training_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_upload_entrypoint.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/tests/test_vite_server.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/data/domains/airline/policy.md +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/data/domains/mock/policy.md +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/data/domains/retail/policy.md +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/versioneer.py +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vite-app/dist/assets/index-DFeF7AG_.js +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vite-app/dist/assets/index-DFeF7AG_.js.map +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vite-app/dist/assets/index-DvKW7FQL.css +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
- {eval_protocol-0.3.28 → eval_protocol-0.3.30}/vite-app/dist/index.html +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.30
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -40,6 +40,7 @@ Requires-Dist: addict>=2.4.0
|
|
|
40
40
|
Requires-Dist: deepdiff>=6.0.0
|
|
41
41
|
Requires-Dist: websockets>=15.0.1
|
|
42
42
|
Requires-Dist: fastapi>=0.116.1
|
|
43
|
+
Requires-Dist: zstandard>=0.19.0
|
|
43
44
|
Provides-Extra: dev
|
|
44
45
|
Requires-Dist: build; extra == "dev"
|
|
45
46
|
Requires-Dist: twine; extra == "dev"
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2026-
|
|
11
|
+
"date": "2026-05-29T16:09:24-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.3.
|
|
14
|
+
"full-revisionid": "1bd5447a3afbca3b71e0f0d205ed7cff6c3afe5d",
|
|
15
|
+
"version": "0.3.30"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -16,6 +16,8 @@ import os
|
|
|
16
16
|
|
|
17
17
|
from eval_protocol.models import EvaluationRow, InputMetadata, ExecutionMetadata, Message
|
|
18
18
|
from .base import BaseAdapter
|
|
19
|
+
from .lp_deserializer import decompress_and_parse_lp
|
|
20
|
+
from .r3_deserializer import decompress_and_parse_r3
|
|
19
21
|
from .utils import extract_messages_from_data
|
|
20
22
|
from ..common_utils import get_user_agent
|
|
21
23
|
|
|
@@ -100,13 +102,53 @@ def convert_trace_dict_to_evaluation_row(
|
|
|
100
102
|
):
|
|
101
103
|
break # Break early if we've found all the metadata we need
|
|
102
104
|
|
|
105
|
+
# Extract router replay payloads when present
|
|
106
|
+
payloads = trace.get("payloads")
|
|
107
|
+
if isinstance(payloads, dict):
|
|
108
|
+
router_replay = payloads.get("router_replay")
|
|
109
|
+
if isinstance(router_replay, dict) and router_replay.get("data"):
|
|
110
|
+
try:
|
|
111
|
+
matrices, r3_meta = decompress_and_parse_r3(router_replay["data"])
|
|
112
|
+
if execution_metadata.extra is None:
|
|
113
|
+
execution_metadata.extra = {}
|
|
114
|
+
execution_metadata.extra["routing_matrices"] = matrices
|
|
115
|
+
execution_metadata.extra["routing_metadata"] = r3_meta
|
|
116
|
+
except Exception as e:
|
|
117
|
+
logger.warning("Failed to decompress R3 payload for trace %s: %s", trace.get("id"), e)
|
|
118
|
+
|
|
119
|
+
logprobs_payload = payloads.get("logprobs")
|
|
120
|
+
if isinstance(logprobs_payload, dict) and logprobs_payload.get("data"):
|
|
121
|
+
try:
|
|
122
|
+
logprobs, token_ids, lp_meta = decompress_and_parse_lp(logprobs_payload["data"])
|
|
123
|
+
if execution_metadata.extra is None:
|
|
124
|
+
execution_metadata.extra = {}
|
|
125
|
+
execution_metadata.extra["completion_logprobs"] = logprobs
|
|
126
|
+
if token_ids is not None:
|
|
127
|
+
execution_metadata.extra["completion_token_ids"] = token_ids
|
|
128
|
+
execution_metadata.extra["logprobs_metadata"] = lp_meta
|
|
129
|
+
|
|
130
|
+
for i in range(len(messages) - 1, -1, -1):
|
|
131
|
+
if messages[i].role == "assistant":
|
|
132
|
+
content_entries = [{"logprob": lp} for lp in logprobs]
|
|
133
|
+
if token_ids is not None:
|
|
134
|
+
for entry, tid in zip(content_entries, token_ids):
|
|
135
|
+
entry["token_id"] = tid
|
|
136
|
+
messages[i].logprobs = {"content": content_entries}
|
|
137
|
+
break
|
|
138
|
+
except Exception as e:
|
|
139
|
+
logger.warning(
|
|
140
|
+
"Failed to decompress logprobs payload for trace %s: %s",
|
|
141
|
+
trace.get("id"),
|
|
142
|
+
e,
|
|
143
|
+
)
|
|
144
|
+
|
|
103
145
|
return EvaluationRow(
|
|
104
146
|
messages=messages,
|
|
105
147
|
tools=tools,
|
|
106
148
|
input_metadata=InputMetadata(
|
|
107
149
|
row_id=row_id,
|
|
108
150
|
session_data={
|
|
109
|
-
"langfuse_trace_id": trace.get("id"),
|
|
151
|
+
"langfuse_trace_id": trace.get("id"),
|
|
110
152
|
},
|
|
111
153
|
),
|
|
112
154
|
execution_metadata=execution_metadata,
|
|
@@ -426,6 +468,7 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
426
468
|
max_retries: int = 3,
|
|
427
469
|
span_name: Optional[str] = None,
|
|
428
470
|
converter: Optional[TraceDictConverter] = None,
|
|
471
|
+
include_payloads: bool = False,
|
|
429
472
|
) -> List[EvaluationRow]:
|
|
430
473
|
"""Pull traces from Langfuse via proxy and convert to EvaluationRow format.
|
|
431
474
|
|
|
@@ -449,6 +492,8 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
449
492
|
max_retries: Max retry attempts used by proxy (default: 3)
|
|
450
493
|
converter: Optional custom converter implementing TraceDictConverter protocol.
|
|
451
494
|
If provided, this will be used instead of the default conversion logic.
|
|
495
|
+
include_payloads: If True, request payload data (e.g., router replay)
|
|
496
|
+
from the gateway and decompress it into the returned EvaluationRows.
|
|
452
497
|
|
|
453
498
|
Returns:
|
|
454
499
|
List[EvaluationRow]: Converted evaluation rows
|
|
@@ -479,6 +524,7 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
479
524
|
"to_timestamp": to_timestamp.isoformat() if to_timestamp else None,
|
|
480
525
|
"sleep_between_gets": sleep_between_gets,
|
|
481
526
|
"max_retries": max_retries,
|
|
527
|
+
"include_payloads": include_payloads if include_payloads else None,
|
|
482
528
|
}
|
|
483
529
|
|
|
484
530
|
# Remove None values
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""LP/v1 binary deserializer for per-token logprobs payloads.
|
|
2
|
+
|
|
3
|
+
Implements the inverse of the tracing gateway's ``logprobs_serializer.serialize_logprobs``.
|
|
4
|
+
See that module for the full header specification.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import base64
|
|
10
|
+
import struct
|
|
11
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
12
|
+
|
|
13
|
+
import zstandard as zstd
|
|
14
|
+
|
|
15
|
+
MAGIC = b"LP01"
|
|
16
|
+
HEADER_VERSION = 1
|
|
17
|
+
MISSING_TOKEN_ID = -1
|
|
18
|
+
ENTRY_FORMAT = "<if"
|
|
19
|
+
ENTRY_SIZE = struct.calcsize(ENTRY_FORMAT) # 8 bytes
|
|
20
|
+
HEADER_FORMAT = "<4sBBHIIQ"
|
|
21
|
+
HEADER_SIZE = struct.calcsize(HEADER_FORMAT) # 24 bytes
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _parse_header(raw: bytes) -> Dict[str, Any]:
|
|
25
|
+
if len(raw) < HEADER_SIZE:
|
|
26
|
+
raise ValueError(f"Payload too short for lp/v1 header: {len(raw)} < {HEADER_SIZE}")
|
|
27
|
+
|
|
28
|
+
(
|
|
29
|
+
magic,
|
|
30
|
+
version,
|
|
31
|
+
flags,
|
|
32
|
+
reserved_u16,
|
|
33
|
+
token_count,
|
|
34
|
+
body_byte_length,
|
|
35
|
+
reserved_u64,
|
|
36
|
+
) = struct.unpack(HEADER_FORMAT, raw[:HEADER_SIZE])
|
|
37
|
+
|
|
38
|
+
if magic != MAGIC:
|
|
39
|
+
raise ValueError(f"Bad LP/v1 magic: {magic!r}")
|
|
40
|
+
if version != HEADER_VERSION:
|
|
41
|
+
raise ValueError(f"Unsupported lp/v1 header version: {version}")
|
|
42
|
+
|
|
43
|
+
return {
|
|
44
|
+
"flags": flags,
|
|
45
|
+
"reserved_u16": reserved_u16,
|
|
46
|
+
"token_count": token_count,
|
|
47
|
+
"body_byte_length": body_byte_length,
|
|
48
|
+
"reserved_u64": reserved_u64,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def parse_logprobs(raw: bytes) -> Tuple[List[float], Optional[List[int]], Dict[str, Any]]:
|
|
53
|
+
"""Parse uncompressed LP/v1 bytes into logprobs, optional token ids, and metadata."""
|
|
54
|
+
header = _parse_header(raw)
|
|
55
|
+
token_count = header["token_count"]
|
|
56
|
+
body_byte_length = header["body_byte_length"]
|
|
57
|
+
|
|
58
|
+
if token_count == 0:
|
|
59
|
+
raise ValueError("LP/v1 token_count must be > 0")
|
|
60
|
+
if body_byte_length != token_count * ENTRY_SIZE:
|
|
61
|
+
raise ValueError(
|
|
62
|
+
f"body_byte_length ({body_byte_length}) != token_count * {ENTRY_SIZE} "
|
|
63
|
+
f"({token_count * ENTRY_SIZE})"
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
expected_len = HEADER_SIZE + body_byte_length
|
|
67
|
+
if len(raw) != expected_len:
|
|
68
|
+
raise ValueError(f"LP/v1 payload length mismatch: {len(raw)} != {expected_len}")
|
|
69
|
+
|
|
70
|
+
logprobs: List[float] = []
|
|
71
|
+
token_ids: List[int] = []
|
|
72
|
+
all_token_ids_valid = True
|
|
73
|
+
offset = HEADER_SIZE
|
|
74
|
+
for _ in range(token_count):
|
|
75
|
+
wire_id, logprob = struct.unpack(ENTRY_FORMAT, raw[offset : offset + ENTRY_SIZE])
|
|
76
|
+
offset += ENTRY_SIZE
|
|
77
|
+
logprobs.append(logprob)
|
|
78
|
+
if wire_id == MISSING_TOKEN_ID:
|
|
79
|
+
all_token_ids_valid = False
|
|
80
|
+
token_ids.append(wire_id)
|
|
81
|
+
else:
|
|
82
|
+
token_ids.append(wire_id)
|
|
83
|
+
|
|
84
|
+
metadata: Dict[str, Any] = {
|
|
85
|
+
"scope": "completion_only",
|
|
86
|
+
"completion_token_count": token_count,
|
|
87
|
+
"all_token_ids_valid": all_token_ids_valid,
|
|
88
|
+
}
|
|
89
|
+
header.update(metadata)
|
|
90
|
+
ids_out: Optional[List[int]] = token_ids if all_token_ids_valid else None
|
|
91
|
+
return logprobs, ids_out, header
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def decompress_and_parse_lp(data_b64: str) -> Tuple[List[float], Optional[List[int]], Dict[str, Any]]:
|
|
95
|
+
"""Decompress and unpack an LP/v1 payload into completion logprobs and token ids.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
data_b64: Base64-encoded zstd-compressed LP binary blob from
|
|
99
|
+
``payloads.logprobs.data``.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
``(logprobs, token_ids, metadata)`` where ``logprobs`` is per-completion-token
|
|
103
|
+
scalars, ``token_ids`` is ``None`` if any wire id was ``MISSING_TOKEN_ID``,
|
|
104
|
+
and ``metadata`` includes ``all_token_ids_valid`` and ``completion_token_count``.
|
|
105
|
+
"""
|
|
106
|
+
compressed = base64.b64decode(data_b64)
|
|
107
|
+
decompressor = zstd.ZstdDecompressor()
|
|
108
|
+
raw = decompressor.decompress(compressed)
|
|
109
|
+
return parse_logprobs(raw)
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""R3/v1 binary deserializer for router-replay payloads.
|
|
2
|
+
|
|
3
|
+
Implements the inverse of the packed binary format produced by the tracing
|
|
4
|
+
gateway's ``r3_serializer.serialize_r3``. See that module for the full
|
|
5
|
+
header specification.
|
|
6
|
+
|
|
7
|
+
The main entry point is :func:`decompress_and_parse_r3`, which accepts the
|
|
8
|
+
base64-encoded compressed blob returned by the gateway's
|
|
9
|
+
``/v1/traces/pointwise?include_payloads=true`` endpoint and produces
|
|
10
|
+
per-token routing matrices in the same ``List[Optional[str]]`` format used
|
|
11
|
+
by the direct inference path (``DeploymentSampler.sample_with_tokens()``).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import base64
|
|
17
|
+
import struct
|
|
18
|
+
from enum import IntEnum
|
|
19
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
20
|
+
|
|
21
|
+
import zstandard as zstd
|
|
22
|
+
|
|
23
|
+
MAGIC = b"R3V1"
|
|
24
|
+
HEADER_FORMAT = "<4sBBBBIIIIQ"
|
|
25
|
+
HEADER_SIZE = struct.calcsize(HEADER_FORMAT) # 32 bytes
|
|
26
|
+
BITS_PER_BYTE = 8
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class _SelectorMode(IntEnum):
|
|
30
|
+
ALL = 0
|
|
31
|
+
SUFFIX = 1
|
|
32
|
+
BITMAP = 2
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class _RoutingDtype(IntEnum):
|
|
36
|
+
UINT8 = 1
|
|
37
|
+
UINT16 = 2
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
_SELECTOR_MODE_NAMES = {v: v.name.lower() for v in _SelectorMode}
|
|
41
|
+
_ROUTING_DTYPE_NAMES = {v: v.name.lower() for v in _RoutingDtype}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _parse_header(raw: bytes) -> Dict[str, Any]:
|
|
45
|
+
if len(raw) < HEADER_SIZE:
|
|
46
|
+
raise ValueError(
|
|
47
|
+
f"Payload too short for r3/v1 header: {len(raw)} < {HEADER_SIZE}"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
(
|
|
51
|
+
magic,
|
|
52
|
+
version,
|
|
53
|
+
selector_mode,
|
|
54
|
+
routing_dtype,
|
|
55
|
+
flags,
|
|
56
|
+
total_token_count,
|
|
57
|
+
replayed_token_count,
|
|
58
|
+
replay_start_token,
|
|
59
|
+
selector_byte_length,
|
|
60
|
+
matrix_byte_length,
|
|
61
|
+
) = struct.unpack(HEADER_FORMAT, raw[:HEADER_SIZE])
|
|
62
|
+
|
|
63
|
+
if magic != MAGIC:
|
|
64
|
+
raise ValueError(f"Bad R3 magic: {magic!r}")
|
|
65
|
+
if version != 1:
|
|
66
|
+
raise ValueError(f"Unsupported R3 header version: {version}")
|
|
67
|
+
|
|
68
|
+
return {
|
|
69
|
+
"selector_mode": selector_mode,
|
|
70
|
+
"routing_dtype": routing_dtype,
|
|
71
|
+
"flags": flags,
|
|
72
|
+
"total_token_count": total_token_count,
|
|
73
|
+
"replayed_token_count": replayed_token_count,
|
|
74
|
+
"replay_start_token": replay_start_token,
|
|
75
|
+
"selector_byte_length": selector_byte_length,
|
|
76
|
+
"matrix_byte_length": matrix_byte_length,
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _read_bitmap_positions(
|
|
81
|
+
selector_bytes: bytes, total_token_count: int
|
|
82
|
+
) -> List[int]:
|
|
83
|
+
"""Return sorted token indices where the bitmap bit is set."""
|
|
84
|
+
positions: List[int] = []
|
|
85
|
+
for i in range(total_token_count):
|
|
86
|
+
byte_idx = i // BITS_PER_BYTE
|
|
87
|
+
bit_idx = i % BITS_PER_BYTE
|
|
88
|
+
if byte_idx < len(selector_bytes) and (selector_bytes[byte_idx] >> bit_idx) & 1:
|
|
89
|
+
positions.append(i)
|
|
90
|
+
return positions
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def decompress_and_parse_r3(
|
|
94
|
+
data_b64: str,
|
|
95
|
+
) -> Tuple[List[Optional[str]], Dict[str, Any]]:
|
|
96
|
+
"""Decompress and unpack an R3/v1 payload into per-token routing matrices.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
data_b64: Base64-encoded zstd-compressed R3 binary blob, as returned
|
|
100
|
+
by the tracing gateway in ``payloads.router_replay.data``.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
A tuple of ``(routing_matrices, metadata)`` where:
|
|
104
|
+
|
|
105
|
+
- ``routing_matrices`` is a ``List[Optional[str]]`` of length
|
|
106
|
+
``total_token_count``. Each present position contains a
|
|
107
|
+
base64-encoded routing matrix (matching the format returned by
|
|
108
|
+
the direct inference path); absent positions are ``None``.
|
|
109
|
+
- ``metadata`` is a dict with keys ``routing_dtype``,
|
|
110
|
+
``selector_mode``, ``total_token_count``, ``replayed_token_count``,
|
|
111
|
+
``replay_start_token``.
|
|
112
|
+
"""
|
|
113
|
+
compressed = base64.b64decode(data_b64)
|
|
114
|
+
|
|
115
|
+
# ZstdCompressor.compress() embeds the uncompressed size in the frame
|
|
116
|
+
# header by default, so the library can auto-allocate the output buffer.
|
|
117
|
+
decompressor = zstd.ZstdDecompressor()
|
|
118
|
+
raw = decompressor.decompress(compressed)
|
|
119
|
+
|
|
120
|
+
header = _parse_header(raw)
|
|
121
|
+
|
|
122
|
+
selector_mode = header["selector_mode"]
|
|
123
|
+
routing_dtype = header["routing_dtype"]
|
|
124
|
+
total_token_count = header["total_token_count"]
|
|
125
|
+
replayed_token_count = header["replayed_token_count"]
|
|
126
|
+
replay_start_token = header["replay_start_token"]
|
|
127
|
+
selector_byte_length = header["selector_byte_length"]
|
|
128
|
+
matrix_byte_length = header["matrix_byte_length"]
|
|
129
|
+
|
|
130
|
+
metadata: Dict[str, Any] = {
|
|
131
|
+
"routing_dtype": _ROUTING_DTYPE_NAMES.get(routing_dtype, str(routing_dtype)),
|
|
132
|
+
"selector_mode": _SELECTOR_MODE_NAMES.get(selector_mode, str(selector_mode)),
|
|
133
|
+
"total_token_count": total_token_count,
|
|
134
|
+
"replayed_token_count": replayed_token_count,
|
|
135
|
+
"replay_start_token": replay_start_token,
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if replayed_token_count == 0:
|
|
139
|
+
return [None] * total_token_count, metadata
|
|
140
|
+
|
|
141
|
+
# Per-token matrix byte size is implicit in the payload: all replayed
|
|
142
|
+
# tokens share the same matrix length, so we can recover it from the
|
|
143
|
+
# matrix section total length divided by the replayed-token count.
|
|
144
|
+
if matrix_byte_length % replayed_token_count != 0:
|
|
145
|
+
raise ValueError(
|
|
146
|
+
f"matrix_byte_length ({matrix_byte_length}) is not a multiple of "
|
|
147
|
+
f"replayed_token_count ({replayed_token_count}); cannot split "
|
|
148
|
+
"into per-token matrices"
|
|
149
|
+
)
|
|
150
|
+
matrix_elem_size = matrix_byte_length // replayed_token_count
|
|
151
|
+
|
|
152
|
+
body = raw[HEADER_SIZE:]
|
|
153
|
+
expected_body_length = selector_byte_length + matrix_byte_length
|
|
154
|
+
if len(body) < expected_body_length:
|
|
155
|
+
raise ValueError(
|
|
156
|
+
f"Payload body too short for selector and matrix sections: "
|
|
157
|
+
f"{len(body)} < {expected_body_length}"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
selector_bytes = body[:selector_byte_length]
|
|
161
|
+
matrix_bytes = body[selector_byte_length : selector_byte_length + matrix_byte_length]
|
|
162
|
+
|
|
163
|
+
if selector_mode == _SelectorMode.ALL:
|
|
164
|
+
replayed_positions = list(range(total_token_count))
|
|
165
|
+
elif selector_mode == _SelectorMode.SUFFIX:
|
|
166
|
+
replayed_positions = list(
|
|
167
|
+
range(replay_start_token, replay_start_token + replayed_token_count)
|
|
168
|
+
)
|
|
169
|
+
elif selector_mode == _SelectorMode.BITMAP:
|
|
170
|
+
replayed_positions = _read_bitmap_positions(selector_bytes, total_token_count)
|
|
171
|
+
else:
|
|
172
|
+
raise ValueError(f"Unknown selector_mode: {selector_mode}")
|
|
173
|
+
|
|
174
|
+
if len(replayed_positions) != replayed_token_count:
|
|
175
|
+
raise ValueError(
|
|
176
|
+
f"Selector produced {len(replayed_positions)} replayed positions, "
|
|
177
|
+
f"but header replayed_token_count is {replayed_token_count}"
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Split matrix bytes into per-token chunks and base64-encode each one
|
|
181
|
+
matrices: List[Optional[str]] = [None] * total_token_count
|
|
182
|
+
for idx, pos in enumerate(replayed_positions):
|
|
183
|
+
start = idx * matrix_elem_size
|
|
184
|
+
end = start + matrix_elem_size
|
|
185
|
+
matrices[pos] = base64.b64encode(matrix_bytes[start:end]).decode("ascii")
|
|
186
|
+
|
|
187
|
+
return matrices, metadata
|
{eval_protocol-0.3.28 → eval_protocol-0.3.30}/eval_protocol/pytest/remote_rollout_processor.py
RENAMED
|
@@ -35,11 +35,13 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
35
35
|
model_base_url: str = "https://tracing.fireworks.ai",
|
|
36
36
|
poll_interval: float = 1.0,
|
|
37
37
|
timeout_seconds: float = 120.0,
|
|
38
|
+
include_payloads: bool = False,
|
|
38
39
|
):
|
|
39
40
|
# Prefer constructor-provided configuration. These can be overridden via
|
|
40
41
|
# config.kwargs at call time for backward compatibility.
|
|
41
42
|
self._remote_base_url = remote_base_url
|
|
42
43
|
self._model_base_url = model_base_url
|
|
44
|
+
self._include_payloads = include_payloads
|
|
43
45
|
if os.getenv("EP_REMOTE_ROLLOUT_PROCESSOR_BASE_URL"):
|
|
44
46
|
self._remote_base_url = os.getenv("EP_REMOTE_ROLLOUT_PROCESSOR_BASE_URL")
|
|
45
47
|
_ep_model_base_url = os.getenv("EP_MODEL_BASE_URL")
|
|
@@ -139,8 +141,28 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
139
141
|
status_code,
|
|
140
142
|
)
|
|
141
143
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
+
# /status only returns the code; backfill message/details/extras from Logs once.
|
|
145
|
+
status_message: str = ""
|
|
146
|
+
status_details: list = []
|
|
147
|
+
status_extras: dict = {}
|
|
148
|
+
completed_logs = await self._tracing_adapter.async_search_logs(
|
|
149
|
+
session, tags=[f"rollout_id:{row.execution_metadata.rollout_id}"]
|
|
150
|
+
)
|
|
151
|
+
# Pick the log row whose status code matches the terminal
|
|
152
|
+
# code from /status, so intermediate RUNNING checkpoints
|
|
153
|
+
# don't poison the backfill.
|
|
154
|
+
for log in completed_logs:
|
|
155
|
+
sd = log.get("status")
|
|
156
|
+
if isinstance(sd, dict) and sd.get("code") == status_code:
|
|
157
|
+
status_message = sd.get("message", "") or ""
|
|
158
|
+
status_details = sd.get("details", []) or []
|
|
159
|
+
raw_extras = log.get("extras") or {}
|
|
160
|
+
status_extras = {
|
|
161
|
+
k: v
|
|
162
|
+
for k, v in raw_extras.items()
|
|
163
|
+
if k not in ("logger_name", "level", "timestamp")
|
|
164
|
+
}
|
|
165
|
+
break
|
|
144
166
|
|
|
145
167
|
exception = exception_for_status_code(status_code, status_message)
|
|
146
168
|
if exception is not None:
|
|
@@ -152,8 +174,7 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
152
174
|
details=status_details,
|
|
153
175
|
)
|
|
154
176
|
|
|
155
|
-
status_extras
|
|
156
|
-
if isinstance(status_extras, dict):
|
|
177
|
+
if status_extras:
|
|
157
178
|
if row.execution_metadata.extra:
|
|
158
179
|
row.execution_metadata.extra.update(status_extras)
|
|
159
180
|
else:
|
|
@@ -175,7 +196,10 @@ class RemoteRolloutProcessor(RolloutProcessor):
|
|
|
175
196
|
row.execution_metadata.rollout_duration_seconds = time.perf_counter() - start_time
|
|
176
197
|
|
|
177
198
|
def _update_with_trace() -> None:
|
|
178
|
-
return update_row_with_remote_trace(
|
|
199
|
+
return update_row_with_remote_trace(
|
|
200
|
+
row, default_fireworks_output_data_loader, model_base_url,
|
|
201
|
+
include_payloads=self._include_payloads,
|
|
202
|
+
)
|
|
179
203
|
|
|
180
204
|
await asyncio.to_thread(_update_with_trace) # Update row with remote trace in-place
|
|
181
205
|
return row
|
|
@@ -22,9 +22,61 @@ def default_fireworks_output_data_loader(config: DataLoaderConfig) -> DynamicDat
|
|
|
22
22
|
# Use EP_REMOTE_API_KEY for fetching remote traces, falling back to FIREWORKS_API_KEY
|
|
23
23
|
api_key = os.environ.get("EP_REMOTE_API_KEY") or os.environ.get("FIREWORKS_API_KEY")
|
|
24
24
|
adapter = FireworksTracingAdapter(base_url=base_url, api_key=api_key)
|
|
25
|
-
return adapter.get_evaluation_rows(
|
|
25
|
+
return adapter.get_evaluation_rows(
|
|
26
|
+
tags=[f"rollout_id:{config.rollout_id}"],
|
|
27
|
+
max_retries=5,
|
|
28
|
+
include_payloads=config.include_payloads,
|
|
29
|
+
)
|
|
26
30
|
|
|
27
|
-
|
|
31
|
+
def preprocess_traces(rows: List[EvaluationRow]) -> List[EvaluationRow]:
|
|
32
|
+
filtered_rows = filter_longest_conversation(rows)
|
|
33
|
+
if config.include_payloads and filtered_rows:
|
|
34
|
+
_merge_payloads_into_longest_row(filtered_rows[0], rows)
|
|
35
|
+
return filtered_rows
|
|
36
|
+
|
|
37
|
+
return DynamicDataLoader(generators=[fetch_traces], preprocess_fn=preprocess_traces)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _merge_payloads_into_longest_row(longest_row: EvaluationRow, rows: List[EvaluationRow]) -> None:
|
|
41
|
+
"""
|
|
42
|
+
Preserve per-turn payload-derived metadata after selecting the longest trace row.
|
|
43
|
+
|
|
44
|
+
Each trace row carries payloads for its final assistant turn. The longest row
|
|
45
|
+
keeps the full conversation, while its top-level execution metadata remains
|
|
46
|
+
the payload metadata for the final completion for backward compatibility.
|
|
47
|
+
"""
|
|
48
|
+
target_assistants = longest_row.get_assistant_messages()
|
|
49
|
+
assistant_turn_payloads = []
|
|
50
|
+
|
|
51
|
+
for row in sorted(rows, key=lambda item: len(item.messages)):
|
|
52
|
+
source = row.last_assistant_message()
|
|
53
|
+
source_turn_index = len(row.get_assistant_messages()) - 1
|
|
54
|
+
if source_turn_index < 0 or source_turn_index >= len(target_assistants):
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
if source and source.logprobs and not target_assistants[source_turn_index].logprobs:
|
|
58
|
+
target_assistants[source_turn_index].logprobs = source.logprobs
|
|
59
|
+
|
|
60
|
+
extra = row.execution_metadata.extra or {}
|
|
61
|
+
turn_payload = {
|
|
62
|
+
key: extra[key]
|
|
63
|
+
for key in (
|
|
64
|
+
"completion_logprobs",
|
|
65
|
+
"completion_token_ids",
|
|
66
|
+
"logprobs_metadata",
|
|
67
|
+
"routing_matrices",
|
|
68
|
+
"routing_metadata",
|
|
69
|
+
)
|
|
70
|
+
if key in extra
|
|
71
|
+
}
|
|
72
|
+
if turn_payload:
|
|
73
|
+
turn_payload["assistant_turn_index"] = source_turn_index
|
|
74
|
+
assistant_turn_payloads.append(turn_payload)
|
|
75
|
+
|
|
76
|
+
if assistant_turn_payloads:
|
|
77
|
+
if longest_row.execution_metadata.extra is None:
|
|
78
|
+
longest_row.execution_metadata.extra = {}
|
|
79
|
+
longest_row.execution_metadata.extra["assistant_turn_payloads"] = assistant_turn_payloads
|
|
28
80
|
|
|
29
81
|
|
|
30
82
|
def build_fireworks_tracing_url(
|
|
@@ -99,7 +151,7 @@ def build_init_request(
|
|
|
99
151
|
if not completion_params_dict.get("model"):
|
|
100
152
|
raise ValueError("Model must be provided in completion_params")
|
|
101
153
|
|
|
102
|
-
# Extract base_url from completion_params
|
|
154
|
+
# Extract base_url from completion_params for tracing-gateway URL encoding
|
|
103
155
|
completion_params_base_url: Optional[str] = completion_params_dict.get("base_url")
|
|
104
156
|
|
|
105
157
|
# Strip non-OpenAI fields from messages
|
|
@@ -129,7 +181,7 @@ def build_init_request(
|
|
|
129
181
|
|
|
130
182
|
# Build final model base URL with tracing metadata
|
|
131
183
|
final_model_base_url = model_base_url
|
|
132
|
-
if model_base_url and ("tracing.fireworks.ai" in model_base_url or model_base_url.startswith("http://localhost")):
|
|
184
|
+
if model_base_url and ("tracing.fireworks.ai" in model_base_url or model_base_url.startswith("http://localhost") or "litellm-gateway" in model_base_url):
|
|
133
185
|
final_model_base_url = build_fireworks_tracing_url(model_base_url, meta, completion_params_base_url)
|
|
134
186
|
|
|
135
187
|
# Extract API key from environment or completion_params
|
|
@@ -148,13 +200,20 @@ def build_init_request(
|
|
|
148
200
|
|
|
149
201
|
|
|
150
202
|
def update_row_with_remote_trace(
|
|
151
|
-
row: EvaluationRow,
|
|
203
|
+
row: EvaluationRow,
|
|
204
|
+
output_data_loader: Callable[[DataLoaderConfig], DynamicDataLoader],
|
|
205
|
+
model_base_url: str,
|
|
206
|
+
include_payloads: bool = False,
|
|
152
207
|
) -> None:
|
|
153
208
|
"""Update row with remote trace data using output_data_loader (shared logic)."""
|
|
154
209
|
if not row.execution_metadata.rollout_id:
|
|
155
210
|
return None
|
|
156
211
|
|
|
157
|
-
loader_config = DataLoaderConfig(
|
|
212
|
+
loader_config = DataLoaderConfig(
|
|
213
|
+
rollout_id=row.execution_metadata.rollout_id,
|
|
214
|
+
model_base_url=model_base_url,
|
|
215
|
+
include_payloads=include_payloads,
|
|
216
|
+
)
|
|
158
217
|
data_loader = output_data_loader(loader_config)
|
|
159
218
|
results = data_loader.load()
|
|
160
219
|
output_rows: List[EvaluationRow] = [r for result in results for r in result.rows]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.30
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -40,6 +40,7 @@ Requires-Dist: addict>=2.4.0
|
|
|
40
40
|
Requires-Dist: deepdiff>=6.0.0
|
|
41
41
|
Requires-Dist: websockets>=15.0.1
|
|
42
42
|
Requires-Dist: fastapi>=0.116.1
|
|
43
|
+
Requires-Dist: zstandard>=0.19.0
|
|
43
44
|
Provides-Extra: dev
|
|
44
45
|
Requires-Dist: build; extra == "dev"
|
|
45
46
|
Requires-Dist: twine; extra == "dev"
|
|
@@ -56,7 +56,9 @@ eval_protocol/adapters/huggingface.py
|
|
|
56
56
|
eval_protocol/adapters/langchain.py
|
|
57
57
|
eval_protocol/adapters/langfuse.py
|
|
58
58
|
eval_protocol/adapters/langsmith.py
|
|
59
|
+
eval_protocol/adapters/lp_deserializer.py
|
|
59
60
|
eval_protocol/adapters/openai_responses.py
|
|
61
|
+
eval_protocol/adapters/r3_deserializer.py
|
|
60
62
|
eval_protocol/adapters/trl.py
|
|
61
63
|
eval_protocol/adapters/utils.py
|
|
62
64
|
eval_protocol/adapters/weave.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|