eval-protocol 0.2.46.dev2__tar.gz → 0.2.46.dev3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.2.46.dev2/eval_protocol.egg-info → eval_protocol-0.2.46.dev3}/PKG-INFO +1 -1
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/_version.py +3 -3
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/adapters/fireworks_tracing.py +4 -4
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/proxy/proxy_core/app.py +22 -1
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/proxy/proxy_core/langfuse.py +158 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3/eval_protocol.egg-info}/PKG-INFO +1 -1
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/LICENSE +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/README.md +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/development/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/development/utils/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/adapters/base.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/adapters/bigquery.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/adapters/langchain.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/adapters/langsmith.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/adapters/openai_responses.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/adapters/utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/adapters/weave.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/auth.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/benchmarks/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/benchmarks/test_aime25.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/benchmarks/test_gpqa.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli_commands/deploy.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli_commands/preview.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli_commands/upload.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/common_utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/config.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/data_loader/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/data_loader/factory_data_loader.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/data_loader/inline_data_loader.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/data_loader/models.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/directory_utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/evaluation.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/event_bus/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/generation/clients.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/log_utils/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/log_utils/util.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/execution/manager.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/tau2/README.md +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/tau2/server.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/models.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/platform_api.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/proxy/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/proxy/proxy_core/auth.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/proxy/proxy_core/main.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/proxy/proxy_core/models.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/evaluation_test.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/exception_config.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/execution.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/handle_persist_flow.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/parameterize.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/plugin.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/remote_rollout_processor.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/rollout_processor.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/store_experiment_link.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/store_results_url.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/pytest/validate_signature.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/quickstart/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/quickstart/llm_judge.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/quickstart/llm_judge_langfuse.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/quickstart/llm_judge_langsmith.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/quickstart/llm_judge_openai_responses.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/quickstart/utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/server.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/types/errors.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/types/remote_rollout_processor.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/utils/check_server_status.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/utils/logs_models.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/utils/show_results_url.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/utils/subprocess_utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol.egg-info/SOURCES.txt +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol.egg-info/requires.txt +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/pyproject.toml +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/setup.cfg +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/setup.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_auth.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_cli.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_config.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_deploy_integration.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_directory_utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_evaluation_integration.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_evaluation_postprocess.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_evaluation_preview_integration.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_event_bus_helper.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_format.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_human_id.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_integration.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_length.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_logs_server.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_logs_server_simple.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_math.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_minimal.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_models.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_packaging.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_quickstart_utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_readiness.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_repetition.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_retry_mechanism.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_server.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_show_results_url.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_status_migration_changes.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_status_migration_integration.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_status_model.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_tau_bench_airline_smoke.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_upload_entrypoint.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/tests/test_vite_server.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/data/domains/airline/policy.md +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/data/domains/mock/policy.md +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/data/domains/retail/policy.md +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/versioneer.py +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vite-app/dist/assets/index-C81y9r9l.js +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vite-app/dist/assets/index-C81y9r9l.js.map +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vite-app/dist/assets/index-DpYZaoAr.css +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
- {eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/vite-app/dist/index.html +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.46.
|
|
3
|
+
Version: 0.2.46.dev3
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-10-10T01:
|
|
11
|
+
"date": "2025-10-10T01:45:32-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.2.46-
|
|
14
|
+
"full-revisionid": "1757548441eb93afd5dc0428b0218637787cdd80",
|
|
15
|
+
"version": "0.2.46-dev3"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/adapters/fireworks_tracing.py
RENAMED
|
@@ -343,11 +343,11 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
343
343
|
# Remove None values
|
|
344
344
|
params = {k: v for k, v in params.items() if v is not None}
|
|
345
345
|
|
|
346
|
-
# Make request to proxy
|
|
346
|
+
# Make request to proxy (using pointwise for efficiency)
|
|
347
347
|
if self.project_id:
|
|
348
|
-
url = f"{self.base_url}/v1/project_id/{self.project_id}/traces"
|
|
348
|
+
url = f"{self.base_url}/v1/project_id/{self.project_id}/traces/pointwise"
|
|
349
349
|
else:
|
|
350
|
-
url = f"{self.base_url}/v1/traces"
|
|
350
|
+
url = f"{self.base_url}/v1/traces/pointwise"
|
|
351
351
|
|
|
352
352
|
headers = {"Authorization": f"Bearer {os.environ.get('FIREWORKS_API_KEY')}"}
|
|
353
353
|
|
|
@@ -367,7 +367,7 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
367
367
|
except Exception: # In case e.response.json() fails
|
|
368
368
|
error_msg = f"Proxy error: {e.response.text}"
|
|
369
369
|
|
|
370
|
-
logger.error("Failed to fetch traces from proxy: %s", error_msg)
|
|
370
|
+
logger.error("Failed to fetch traces from proxy (HTTP %s): %s", e.response.status_code, error_msg)
|
|
371
371
|
return eval_rows
|
|
372
372
|
except requests.exceptions.RequestException as e:
|
|
373
373
|
# Non-HTTP errors (network issues, timeouts, etc.)
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/proxy/proxy_core/app.py
RENAMED
|
@@ -16,7 +16,7 @@ from contextlib import asynccontextmanager
|
|
|
16
16
|
from .models import ProxyConfig, LangfuseTracesResponse, TracesParams, ChatParams, ChatRequestHook, TracesRequestHook
|
|
17
17
|
from .auth import AuthProvider, NoAuthProvider
|
|
18
18
|
from .litellm import handle_chat_completion, proxy_to_litellm
|
|
19
|
-
from .langfuse import fetch_langfuse_traces
|
|
19
|
+
from .langfuse import fetch_langfuse_traces, pointwise_fetch_langfuse_trace
|
|
20
20
|
|
|
21
21
|
# Configure logging before any other imports (so all modules inherit this config)
|
|
22
22
|
log_level = os.getenv("LOG_LEVEL", "INFO").upper()
|
|
@@ -267,6 +267,27 @@ def create_app(
|
|
|
267
267
|
params=params,
|
|
268
268
|
)
|
|
269
269
|
|
|
270
|
+
@app.get("/traces/pointwise", response_model=LangfuseTracesResponse)
|
|
271
|
+
@app.get("/v1/traces/pointwise", response_model=LangfuseTracesResponse)
|
|
272
|
+
@app.get("/project_id/{project_id}/traces/pointwise", response_model=LangfuseTracesResponse)
|
|
273
|
+
@app.get("/v1/project_id/{project_id}/traces/pointwise", response_model=LangfuseTracesResponse)
|
|
274
|
+
async def pointwise_get_langfuse_trace(
|
|
275
|
+
request: Request,
|
|
276
|
+
params: TracesParams = Depends(get_traces_params),
|
|
277
|
+
project_id: Optional[str] = None,
|
|
278
|
+
config: ProxyConfig = Depends(get_config),
|
|
279
|
+
redis_client: redis.Redis = Depends(get_redis),
|
|
280
|
+
_: None = Depends(require_auth),
|
|
281
|
+
) -> LangfuseTracesResponse:
|
|
282
|
+
if project_id is not None:
|
|
283
|
+
params.project_id = project_id
|
|
284
|
+
return await pointwise_fetch_langfuse_trace(
|
|
285
|
+
config=config,
|
|
286
|
+
redis_client=redis_client,
|
|
287
|
+
request=request,
|
|
288
|
+
params=params,
|
|
289
|
+
)
|
|
290
|
+
|
|
270
291
|
# Health
|
|
271
292
|
@app.get("/health")
|
|
272
293
|
async def health():
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/proxy/proxy_core/langfuse.py
RENAMED
|
@@ -366,3 +366,161 @@ async def fetch_langfuse_traces(
|
|
|
366
366
|
raise
|
|
367
367
|
except Exception as e:
|
|
368
368
|
raise HTTPException(status_code=500, detail=f"Error fetching traces from Langfuse: {str(e)}")
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
async def pointwise_fetch_langfuse_trace(
|
|
372
|
+
config: ProxyConfig,
|
|
373
|
+
redis_client: redis.Redis,
|
|
374
|
+
request: Request,
|
|
375
|
+
params: TracesParams,
|
|
376
|
+
):
|
|
377
|
+
"""
|
|
378
|
+
Fetch the latest trace from Langfuse for the specified project.
|
|
379
|
+
|
|
380
|
+
Since insertion_ids are UUID v7 (time-ordered), we only fetch the last one
|
|
381
|
+
as it contains all accumulated information from the pointwise evaluation.
|
|
382
|
+
|
|
383
|
+
Returns a single trace object or raises if not found.
|
|
384
|
+
"""
|
|
385
|
+
|
|
386
|
+
# Preprocess traces request
|
|
387
|
+
if config.preprocess_traces_request:
|
|
388
|
+
params = config.preprocess_traces_request(request, params)
|
|
389
|
+
|
|
390
|
+
tags = params.tags
|
|
391
|
+
project_id = params.project_id
|
|
392
|
+
user_id = params.user_id
|
|
393
|
+
session_id = params.session_id
|
|
394
|
+
name = params.name
|
|
395
|
+
environment = params.environment
|
|
396
|
+
version = params.version
|
|
397
|
+
release = params.release
|
|
398
|
+
fields = params.fields
|
|
399
|
+
hours_back = params.hours_back
|
|
400
|
+
from_timestamp = params.from_timestamp
|
|
401
|
+
to_timestamp = params.to_timestamp
|
|
402
|
+
sleep_between_gets = params.sleep_between_gets
|
|
403
|
+
max_retries = params.max_retries
|
|
404
|
+
|
|
405
|
+
# Use default project if not specified
|
|
406
|
+
if project_id is None:
|
|
407
|
+
project_id = config.default_project_id
|
|
408
|
+
|
|
409
|
+
# Validate project_id
|
|
410
|
+
if project_id not in config.langfuse_keys:
|
|
411
|
+
raise HTTPException(
|
|
412
|
+
status_code=404,
|
|
413
|
+
detail=f"Project ID '{project_id}' not found. Available projects: {list(config.langfuse_keys.keys())}",
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
# Extract rollout_id from tags for Redis lookup
|
|
417
|
+
rollout_id = _extract_tag_value(tags, "rollout_id:")
|
|
418
|
+
|
|
419
|
+
try:
|
|
420
|
+
# Import the Langfuse adapter
|
|
421
|
+
from langfuse import Langfuse
|
|
422
|
+
|
|
423
|
+
# Create Langfuse client with the project's keys
|
|
424
|
+
logger.debug(f"Connecting to Langfuse at {config.langfuse_host} for project '{project_id}'")
|
|
425
|
+
langfuse_client = Langfuse(
|
|
426
|
+
public_key=config.langfuse_keys[project_id]["public_key"],
|
|
427
|
+
secret_key=config.langfuse_keys[project_id]["secret_key"],
|
|
428
|
+
host=config.langfuse_host,
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
# Parse datetime strings if provided
|
|
432
|
+
from_ts = None
|
|
433
|
+
to_ts = None
|
|
434
|
+
if from_timestamp:
|
|
435
|
+
from_ts = datetime.fromisoformat(from_timestamp.replace("Z", "+00:00"))
|
|
436
|
+
if to_timestamp:
|
|
437
|
+
to_ts = datetime.fromisoformat(to_timestamp.replace("Z", "+00:00"))
|
|
438
|
+
|
|
439
|
+
# Determine time window: explicit from/to takes precedence over hours_back
|
|
440
|
+
if from_ts is None and to_ts is None and hours_back:
|
|
441
|
+
to_ts = datetime.now()
|
|
442
|
+
from_ts = to_ts - timedelta(hours=hours_back)
|
|
443
|
+
|
|
444
|
+
# Get expected insertion_ids from Redis for completeness checking
|
|
445
|
+
expected_ids: Set[str] = set()
|
|
446
|
+
if rollout_id:
|
|
447
|
+
expected_ids = get_insertion_ids(redis_client, rollout_id)
|
|
448
|
+
logger.info(f"Pointwise fetch for rollout_id '{rollout_id}', expecting {len(expected_ids)} insertion_ids")
|
|
449
|
+
if not expected_ids:
|
|
450
|
+
logger.warning(
|
|
451
|
+
f"No expected insertion_ids found in Redis for rollout '{rollout_id}'. Returning empty trace."
|
|
452
|
+
)
|
|
453
|
+
raise HTTPException(
|
|
454
|
+
status_code=500,
|
|
455
|
+
detail=f"No expected insertion_ids found in Redis for rollout '{rollout_id}'. Returning empty trace.",
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
# Get the latest (last) insertion_id since UUID v7 is time-ordered
|
|
459
|
+
latest_insertion_id = max(expected_ids) # UUID v7 max = newest
|
|
460
|
+
logger.info(f"Targeting latest insertion_id (last5): {latest_insertion_id[-5:]} for rollout '{rollout_id}'")
|
|
461
|
+
|
|
462
|
+
for retry in range(max_retries):
|
|
463
|
+
# Fetch trace list targeting the latest insertion_id
|
|
464
|
+
traces = await _fetch_trace_list_with_retry(
|
|
465
|
+
langfuse_client,
|
|
466
|
+
page=1,
|
|
467
|
+
limit=1, # Only need the one trace
|
|
468
|
+
tags=[f"insertion_id:{latest_insertion_id}"],
|
|
469
|
+
user_id=user_id,
|
|
470
|
+
session_id=session_id,
|
|
471
|
+
name=name,
|
|
472
|
+
environment=environment,
|
|
473
|
+
version=version,
|
|
474
|
+
release=release,
|
|
475
|
+
fields=fields,
|
|
476
|
+
from_ts=from_ts,
|
|
477
|
+
to_ts=to_ts,
|
|
478
|
+
max_retries=max_retries,
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
if traces and traces.data:
|
|
482
|
+
# Get the trace info
|
|
483
|
+
trace_info = traces.data[0]
|
|
484
|
+
logger.debug(f"Found trace {trace_info.id} for latest insertion_id {latest_insertion_id[-5:]}")
|
|
485
|
+
|
|
486
|
+
# Fetch full trace details
|
|
487
|
+
trace_full = await _fetch_trace_detail_with_retry(
|
|
488
|
+
langfuse_client,
|
|
489
|
+
trace_info.id,
|
|
490
|
+
max_retries,
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
if trace_full:
|
|
494
|
+
trace_dict = _serialize_trace_to_dict(trace_full)
|
|
495
|
+
logger.info(
|
|
496
|
+
f"Successfully fetched latest trace for rollout '{rollout_id}', insertion_id (last5): {latest_insertion_id[-5:]}"
|
|
497
|
+
)
|
|
498
|
+
return LangfuseTracesResponse(
|
|
499
|
+
project_id=project_id,
|
|
500
|
+
total_traces=1,
|
|
501
|
+
traces=[TraceResponse(**trace_dict)],
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
# If not successful and not last retry, sleep and continue
|
|
505
|
+
if retry < max_retries - 1:
|
|
506
|
+
wait_time = 2**retry
|
|
507
|
+
logger.info(
|
|
508
|
+
f"Pointwise fetch attempt {retry + 1}/{max_retries} failed for rollout '{rollout_id}', insertion_id (last5): {latest_insertion_id[-5:]}. Retrying in {wait_time}s..."
|
|
509
|
+
)
|
|
510
|
+
await asyncio.sleep(wait_time)
|
|
511
|
+
|
|
512
|
+
# After all retries failed
|
|
513
|
+
logger.error(
|
|
514
|
+
f"Failed to fetch latest trace for rollout '{rollout_id}', insertion_id (last5): {latest_insertion_id[-5:]} after {max_retries} retries"
|
|
515
|
+
)
|
|
516
|
+
raise HTTPException(
|
|
517
|
+
status_code=404,
|
|
518
|
+
detail=f"Failed to fetch latest trace for rollout '{rollout_id}' after {max_retries} retries",
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
except ImportError:
|
|
522
|
+
raise HTTPException(status_code=500, detail="Langfuse SDK not installed. Install with: pip install langfuse")
|
|
523
|
+
except HTTPException:
|
|
524
|
+
raise
|
|
525
|
+
except Exception as e:
|
|
526
|
+
raise HTTPException(status_code=500, detail=f"Error fetching latest trace from Langfuse: {str(e)}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.46.
|
|
3
|
+
Version: 0.2.46.dev3
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/development/normalize_sandbox_fusion.py
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/development/utils/generate_api_key.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/development/utils/subprocess_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/adapters/braintrust.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/adapters/huggingface.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/adapters/openai_responses.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/resource_pool.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/resources/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/agent/tool_registry.py
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/benchmarks/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/benchmarks/test_aime25.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/benchmarks/test_frozen_lake.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/benchmarks/test_gpqa.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli_commands/__init__.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli_commands/agent_eval_cmd.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli_commands/common.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli_commands/deploy.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli_commands/deploy_mcp.py
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli_commands/preview.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli_commands/run_eval_cmd.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/cli_commands/upload.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/data_loader/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/dataset_logger/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/event_bus/event_bus.py
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/event_bus/sqlite_event_bus.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/generation/clients/base.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/human_id/dictionary.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/integrations/__init__.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/integrations/deepeval.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/integrations/openeval.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/log_utils/rollout_id_filter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/client/__init__.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/client/connection.py
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/execution/__init__.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/execution/base_policy.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/execution/manager.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/execution/policy.py
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/mcp_multi_client.py
RENAMED
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/process_manager.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/session/__init__.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/session/manager.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/simple_process_manager.py
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp/simulation_server.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/tau2/README.md
RENAMED
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/tau2/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{eval_protocol-0.2.46.dev2 → eval_protocol-0.2.46.dev3}/eval_protocol/mcp_servers/tau2/server.py
RENAMED
|
File without changes
|