eval-protocol 0.2.6.dev1__tar.gz → 0.2.6.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.2.6.dev1/eval_protocol.egg-info → eval_protocol-0.2.6.dev2}/PKG-INFO +2 -2
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/_version.py +3 -3
- eval_protocol-0.2.6.dev2/eval_protocol/common_utils.py +55 -0
- eval_protocol-0.2.6.dev2/eval_protocol/dataset_logger/__init__.py +3 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/dataset_logger/dataset_logger.py +2 -0
- eval_protocol-0.2.6.dev2/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +39 -0
- eval_protocol-0.2.6.dev2/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +57 -0
- eval_protocol-0.2.6.dev2/eval_protocol/event_bus/__init__.py +5 -0
- eval_protocol-0.2.6.dev2/eval_protocol/event_bus/event_bus.py +50 -0
- eval_protocol-0.2.6.dev2/eval_protocol/event_bus/logger.py +3 -0
- eval_protocol-0.2.6.dev2/eval_protocol/event_bus/sqlite_event_bus.py +109 -0
- eval_protocol-0.2.6.dev2/eval_protocol/event_bus/sqlite_event_bus_database.py +95 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/generation/clients.py +4 -1
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/logging_utils.py +1 -1
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/client/connection.py +6 -11
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/execution/base_policy.py +1 -1
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/execution/manager.py +40 -25
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/mcpgym.py +61 -101
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/session/manager.py +1 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_env.py +6 -14
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/models.py +22 -2
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +14 -1
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/pytest/default_single_turn_rollout_process.py +37 -5
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/pytest/evaluation_test.py +310 -48
- eval_protocol-0.2.6.dev2/eval_protocol/pytest/plugin.py +144 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/pytest/utils.py +33 -1
- eval_protocol-0.2.6.dev2/eval_protocol/stats/__init__.py +5 -0
- eval_protocol-0.2.6.dev2/eval_protocol/stats/confidence_intervals.py +116 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/types/types.py +3 -0
- eval_protocol-0.2.6.dev2/eval_protocol/utils/logs_server.py +338 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2/eval_protocol.egg-info}/PKG-INFO +2 -2
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol.egg-info/SOURCES.txt +15 -4
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol.egg-info/entry_points.txt +3 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol.egg-info/requires.txt +1 -1
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/pyproject.toml +13 -1
- eval_protocol-0.2.6.dev2/tests/test_event_bus.py +265 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_parallel_rollouts.py +2 -2
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_url_handling.py +8 -26
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/agent/llm_agent.py +22 -36
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/user/user_simulator.py +9 -5
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/utils/llm_utils.py +18 -3
- eval_protocol-0.2.6.dev2/vite-app/dist/assets/index-D9iVTBbF.css +1 -0
- eval_protocol-0.2.6.dev1/vite-app/dist/assets/index-BMc_e8JT.js → eval_protocol-0.2.6.dev2/vite-app/dist/assets/index-DiF_B1x_.js +19 -19
- eval_protocol-0.2.6.dev1/vite-app/dist/assets/index-BMc_e8JT.js.map → eval_protocol-0.2.6.dev2/vite-app/dist/assets/index-DiF_B1x_.js.map +1 -1
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vite-app/dist/index.html +3 -3
- eval_protocol-0.2.6.dev1/eval_protocol/common_utils.py +0 -30
- eval_protocol-0.2.6.dev1/eval_protocol/dataset_logger/__init__.py +0 -3
- eval_protocol-0.2.6.dev1/eval_protocol/utils/logs_server.py +0 -299
- eval_protocol-0.2.6.dev1/vite-app/dist/assets/index-Dp7ms4NJ.css +0 -1
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/LICENSE +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/README.md +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/development/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/development/utils/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/http_rollout_protocol.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/http_rollout_resource.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/auth.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/cli.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/cli_commands/deploy.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/cli_commands/preview.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/config.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.2.6.dev1/eval_protocol/dataset_logger → eval_protocol-0.2.6.dev2/eval_protocol}/directory_utils.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/evaluation.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/integrations/braintrust.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/intermediary_server.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/orchestration/remote_http_client.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/session.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/platform_api.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/pytest/default_no_op_rollout_process.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/server.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/setup.cfg +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/setup.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_auth.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_braintrust_adapter.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_braintrust_example.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_cli.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_config.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_deploy_integration.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_evaluation_integration.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_evaluation_preview_integration.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_format.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_frozen_lake_http_server.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_frozen_lake_seed_evaluation.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_integration.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_length.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_math.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_minimal.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_models.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_packaging.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_readiness.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_repetition.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_server.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/versioneer.py +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.6.
|
|
3
|
+
Version: 0.2.6.dev2
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -38,10 +38,10 @@ Requires-Dist: litellm>=1.0.0
|
|
|
38
38
|
Requires-Dist: addict>=2.4.0
|
|
39
39
|
Requires-Dist: deepdiff>=6.0.0
|
|
40
40
|
Requires-Dist: pandas>=1.5.0
|
|
41
|
-
Requires-Dist: watchdog>=2.1.0
|
|
42
41
|
Requires-Dist: websockets>=15.0.1
|
|
43
42
|
Requires-Dist: fastapi>=0.116.1
|
|
44
43
|
Requires-Dist: pytest>=6.0.0
|
|
44
|
+
Requires-Dist: peewee>=3.18.2
|
|
45
45
|
Provides-Extra: dev
|
|
46
46
|
Requires-Dist: build; extra == "dev"
|
|
47
47
|
Requires-Dist: twine; extra == "dev"
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-08-
|
|
11
|
+
"date": "2025-08-10T19:39:17-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.2.6-
|
|
14
|
+
"full-revisionid": "a50c3f62fdb5be7347741446338d8c1771e92547",
|
|
15
|
+
"version": "0.2.6-dev2"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
from typing import Any, Dict, List
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def load_jsonl(file_path: str) -> List[Dict[str, Any]]:
|
|
9
|
+
"""
|
|
10
|
+
Reads a JSONL file where each line is a valid JSON object and returns a list of these objects.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
file_path: Path to the JSONL file.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
A list of dictionaries, where each dictionary is a parsed JSON object from a line.
|
|
17
|
+
Returns an empty list if the file is not found or if errors occur during parsing. Supports HTTP urls and local file paths.
|
|
18
|
+
"""
|
|
19
|
+
data: List[Dict[str, Any]] = []
|
|
20
|
+
if file_path.startswith("http://") or file_path.startswith("https://"):
|
|
21
|
+
resp = requests.get(file_path, stream=True, timeout=30)
|
|
22
|
+
resp.raise_for_status()
|
|
23
|
+
for line_number, raw in enumerate(resp.iter_lines(decode_unicode=True), start=1):
|
|
24
|
+
if raw is None:
|
|
25
|
+
continue
|
|
26
|
+
stripped = raw.strip()
|
|
27
|
+
if not stripped:
|
|
28
|
+
continue
|
|
29
|
+
try:
|
|
30
|
+
data.append(json.loads(stripped))
|
|
31
|
+
except json.JSONDecodeError as e:
|
|
32
|
+
print(f"Error parsing JSON line for URL {file_path} at line {line_number}")
|
|
33
|
+
row_id_index = stripped.find("row_id")
|
|
34
|
+
if row_id_index != -1:
|
|
35
|
+
row_id = re.search(r'"row_id": (.*),', stripped[row_id_index:])
|
|
36
|
+
raise ValueError(f"{e.msg} at line {line_number}: {stripped} ({row_id})") from e
|
|
37
|
+
raise e
|
|
38
|
+
else:
|
|
39
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
40
|
+
for line_number, line in enumerate(f, start=1):
|
|
41
|
+
# Skip entirely blank or whitespace-only lines to be robust to trailing newlines
|
|
42
|
+
stripped = line.strip()
|
|
43
|
+
if not stripped:
|
|
44
|
+
continue
|
|
45
|
+
try:
|
|
46
|
+
data.append(json.loads(stripped))
|
|
47
|
+
except json.JSONDecodeError as e:
|
|
48
|
+
print(f"Error parsing JSON line for file {file_path} at line {line_number}")
|
|
49
|
+
# attempt to find "row_id" in the line by finding index of "row_id" and performing regex of `"row_id": (.*),`
|
|
50
|
+
row_id_index = line.find("row_id")
|
|
51
|
+
if row_id_index != -1:
|
|
52
|
+
row_id = re.search(r'"row_id": (.*),', line[row_id_index:])
|
|
53
|
+
raise ValueError(f"{e.msg} at line {line_number}: {line} ({row_id})") from e
|
|
54
|
+
raise e
|
|
55
|
+
return data
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from eval_protocol.dataset_logger.dataset_logger import LOG_EVENT_TYPE, DatasetLogger
|
|
5
|
+
from eval_protocol.dataset_logger.sqlite_evaluation_row_store import SqliteEvaluationRowStore
|
|
6
|
+
from eval_protocol.directory_utils import find_eval_protocol_dir
|
|
7
|
+
from eval_protocol.event_bus import event_bus
|
|
8
|
+
from eval_protocol.event_bus.logger import logger
|
|
9
|
+
from eval_protocol.models import EvaluationRow
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SqliteDatasetLoggerAdapter(DatasetLogger):
|
|
13
|
+
def __init__(self, db_path: Optional[str] = None, store: Optional[SqliteEvaluationRowStore] = None):
|
|
14
|
+
eval_protocol_dir = find_eval_protocol_dir()
|
|
15
|
+
if db_path is not None and store is not None:
|
|
16
|
+
raise ValueError("Provide only one of db_path or store, not both.")
|
|
17
|
+
if store is not None:
|
|
18
|
+
self.db_path = store.db_path
|
|
19
|
+
self._store = store
|
|
20
|
+
else:
|
|
21
|
+
self.db_path = db_path if db_path is not None else os.path.join(eval_protocol_dir, "logs.db")
|
|
22
|
+
self._store = SqliteEvaluationRowStore(self.db_path)
|
|
23
|
+
|
|
24
|
+
def log(self, row: "EvaluationRow") -> None:
|
|
25
|
+
row_id = row.input_metadata.row_id
|
|
26
|
+
data = row.model_dump(exclude_none=True, mode="json")
|
|
27
|
+
self._store.upsert_row(row_id=row_id, data=data)
|
|
28
|
+
try:
|
|
29
|
+
event_bus.emit(LOG_EVENT_TYPE, EvaluationRow(**data))
|
|
30
|
+
except Exception as e:
|
|
31
|
+
# Avoid breaking storage due to event emission issues
|
|
32
|
+
logger.error(f"Failed to emit row_upserted event: {e}")
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
def read(self, row_id: Optional[str] = None) -> List["EvaluationRow"]:
|
|
36
|
+
from eval_protocol.models import EvaluationRow
|
|
37
|
+
|
|
38
|
+
results = self._store.read_rows(row_id=row_id)
|
|
39
|
+
return [EvaluationRow(**data) for data in results]
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from peewee import CharField, Model, SqliteDatabase
|
|
5
|
+
from playhouse.sqlite_ext import JSONField
|
|
6
|
+
|
|
7
|
+
from eval_protocol.models import EvaluationRow
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SqliteEvaluationRowStore:
|
|
11
|
+
"""
|
|
12
|
+
Lightweight reusable SQLite store for evaluation rows.
|
|
13
|
+
|
|
14
|
+
Stores arbitrary row data as JSON keyed by a unique string `row_id`.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, db_path: str):
|
|
18
|
+
os.makedirs(os.path.dirname(db_path), exist_ok=True)
|
|
19
|
+
self._db_path = db_path
|
|
20
|
+
self._db = SqliteDatabase(self._db_path)
|
|
21
|
+
|
|
22
|
+
class BaseModel(Model):
|
|
23
|
+
class Meta:
|
|
24
|
+
database = self._db
|
|
25
|
+
|
|
26
|
+
class EvaluationRow(BaseModel): # type: ignore
|
|
27
|
+
row_id = CharField(unique=True)
|
|
28
|
+
data = JSONField()
|
|
29
|
+
|
|
30
|
+
self._EvaluationRow = EvaluationRow
|
|
31
|
+
|
|
32
|
+
self._db.connect()
|
|
33
|
+
self._db.create_tables([EvaluationRow])
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def db_path(self) -> str:
|
|
37
|
+
return self._db_path
|
|
38
|
+
|
|
39
|
+
def upsert_row(self, row_id: str, data: dict) -> None:
|
|
40
|
+
if self._EvaluationRow.select().where(self._EvaluationRow.row_id == row_id).exists():
|
|
41
|
+
self._EvaluationRow.update(data=data).where(self._EvaluationRow.row_id == row_id).execute()
|
|
42
|
+
else:
|
|
43
|
+
self._EvaluationRow.create(row_id=row_id, data=data)
|
|
44
|
+
|
|
45
|
+
def read_rows(self, row_id: Optional[str] = None) -> List[dict]:
|
|
46
|
+
if row_id is None:
|
|
47
|
+
query = self._EvaluationRow.select().dicts()
|
|
48
|
+
else:
|
|
49
|
+
query = self._EvaluationRow.select().dicts().where(self._EvaluationRow.row_id == row_id)
|
|
50
|
+
results = list(query)
|
|
51
|
+
return [result["data"] for result in results]
|
|
52
|
+
|
|
53
|
+
def delete_row(self, row_id: str) -> int:
|
|
54
|
+
return self._EvaluationRow.delete().where(self._EvaluationRow.row_id == row_id).execute()
|
|
55
|
+
|
|
56
|
+
def delete_all_rows(self) -> int:
|
|
57
|
+
return self._EvaluationRow.delete().execute()
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from typing import Any, Callable, List
|
|
2
|
+
|
|
3
|
+
from eval_protocol.event_bus.logger import logger
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class EventBus:
|
|
7
|
+
"""Core event bus interface for decoupling components in the evaluation system."""
|
|
8
|
+
|
|
9
|
+
def __init__(self):
|
|
10
|
+
self._listeners: List[Callable[[str, Any], None]] = []
|
|
11
|
+
|
|
12
|
+
def subscribe(self, callback: Callable[[str, Any], None]) -> None:
|
|
13
|
+
"""Subscribe to events.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
callback: Function that takes (event_type, data) parameters
|
|
17
|
+
"""
|
|
18
|
+
self._listeners.append(callback)
|
|
19
|
+
|
|
20
|
+
def unsubscribe(self, callback: Callable[[str, Any], None]) -> None:
|
|
21
|
+
"""Unsubscribe from events.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
callback: The callback function to remove
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
self._listeners.remove(callback)
|
|
28
|
+
except ValueError:
|
|
29
|
+
pass # Callback wasn't subscribed
|
|
30
|
+
|
|
31
|
+
def emit(self, event_type: str, data: Any) -> None:
|
|
32
|
+
"""Emit an event to all subscribers.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
event_type: Type of event (e.g., "row_upserted")
|
|
36
|
+
data: Event data
|
|
37
|
+
"""
|
|
38
|
+
for listener in self._listeners:
|
|
39
|
+
try:
|
|
40
|
+
listener(event_type, data)
|
|
41
|
+
except Exception as e:
|
|
42
|
+
logger.debug(f"Event listener failed for {event_type}: {e}")
|
|
43
|
+
|
|
44
|
+
def start_listening(self) -> None:
|
|
45
|
+
"""Start listening for cross-process events. Override in subclasses."""
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
def stop_listening(self) -> None:
|
|
49
|
+
"""Stop listening for cross-process events. Override in subclasses."""
|
|
50
|
+
pass
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
import time
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
from uuid import uuid4
|
|
5
|
+
|
|
6
|
+
from eval_protocol.event_bus.event_bus import EventBus
|
|
7
|
+
from eval_protocol.event_bus.logger import logger
|
|
8
|
+
from eval_protocol.event_bus.sqlite_event_bus_database import SqliteEventBusDatabase
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SqliteEventBus(EventBus):
|
|
12
|
+
"""SQLite-based event bus implementation that supports cross-process communication."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, db_path: Optional[str] = None):
|
|
15
|
+
super().__init__()
|
|
16
|
+
|
|
17
|
+
# Use the same database as the evaluation row store
|
|
18
|
+
if db_path is None:
|
|
19
|
+
import os
|
|
20
|
+
|
|
21
|
+
from eval_protocol.directory_utils import find_eval_protocol_dir
|
|
22
|
+
|
|
23
|
+
eval_protocol_dir = find_eval_protocol_dir()
|
|
24
|
+
db_path = os.path.join(eval_protocol_dir, "logs.db")
|
|
25
|
+
|
|
26
|
+
self._db = SqliteEventBusDatabase(db_path)
|
|
27
|
+
self._running = False
|
|
28
|
+
self._listener_thread: Optional[threading.Thread] = None
|
|
29
|
+
self._process_id = str(uuid4())
|
|
30
|
+
|
|
31
|
+
def emit(self, event_type: str, data: Any) -> None:
|
|
32
|
+
"""Emit an event to all subscribers.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
event_type: Type of event (e.g., "log")
|
|
36
|
+
data: Event data
|
|
37
|
+
"""
|
|
38
|
+
# Call local listeners immediately
|
|
39
|
+
super().emit(event_type, data)
|
|
40
|
+
|
|
41
|
+
# Publish to cross-process subscribers
|
|
42
|
+
self._publish_cross_process(event_type, data)
|
|
43
|
+
|
|
44
|
+
def _publish_cross_process(self, event_type: str, data: Any) -> None:
|
|
45
|
+
"""Publish event to cross-process subscribers via database."""
|
|
46
|
+
self._db.publish_event(event_type, data, self._process_id)
|
|
47
|
+
|
|
48
|
+
def start_listening(self) -> None:
|
|
49
|
+
"""Start listening for cross-process events."""
|
|
50
|
+
if self._running:
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
self._running = True
|
|
54
|
+
self._start_database_listener()
|
|
55
|
+
|
|
56
|
+
def stop_listening(self) -> None:
|
|
57
|
+
"""Stop listening for cross-process events."""
|
|
58
|
+
self._running = False
|
|
59
|
+
if self._listener_thread and self._listener_thread.is_alive():
|
|
60
|
+
self._listener_thread.join(timeout=1)
|
|
61
|
+
|
|
62
|
+
def _start_database_listener(self) -> None:
|
|
63
|
+
"""Start database-based event listener."""
|
|
64
|
+
|
|
65
|
+
def database_listener():
|
|
66
|
+
last_cleanup = time.time()
|
|
67
|
+
|
|
68
|
+
while self._running:
|
|
69
|
+
try:
|
|
70
|
+
# Get unprocessed events from other processes
|
|
71
|
+
events = self._db.get_unprocessed_events(self._process_id)
|
|
72
|
+
|
|
73
|
+
for event in events:
|
|
74
|
+
if not self._running:
|
|
75
|
+
break
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
# Handle the event
|
|
79
|
+
self._handle_cross_process_event(event["event_type"], event["data"])
|
|
80
|
+
|
|
81
|
+
# Mark as processed
|
|
82
|
+
self._db.mark_event_processed(event["event_id"])
|
|
83
|
+
|
|
84
|
+
except Exception as e:
|
|
85
|
+
logger.debug(f"Failed to process event {event['event_id']}: {e}")
|
|
86
|
+
|
|
87
|
+
# Clean up old events every hour
|
|
88
|
+
current_time = time.time()
|
|
89
|
+
if current_time - last_cleanup >= 3600:
|
|
90
|
+
self._db.cleanup_old_events()
|
|
91
|
+
last_cleanup = current_time
|
|
92
|
+
|
|
93
|
+
# Small sleep to prevent busy waiting
|
|
94
|
+
time.sleep(0.1)
|
|
95
|
+
|
|
96
|
+
except Exception as e:
|
|
97
|
+
logger.debug(f"Database listener error: {e}")
|
|
98
|
+
time.sleep(1)
|
|
99
|
+
|
|
100
|
+
self._listener_thread = threading.Thread(target=database_listener, daemon=True)
|
|
101
|
+
self._listener_thread.start()
|
|
102
|
+
|
|
103
|
+
def _handle_cross_process_event(self, event_type: str, data: Any) -> None:
|
|
104
|
+
"""Handle events received from other processes."""
|
|
105
|
+
for listener in self._listeners:
|
|
106
|
+
try:
|
|
107
|
+
listener(event_type, data)
|
|
108
|
+
except Exception as e:
|
|
109
|
+
logger.debug(f"Cross-process event listener failed for {event_type}: {e}")
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import Any, List
|
|
3
|
+
from uuid import uuid4
|
|
4
|
+
|
|
5
|
+
from peewee import CharField, DateTimeField, Model, SqliteDatabase
|
|
6
|
+
from playhouse.sqlite_ext import JSONField
|
|
7
|
+
|
|
8
|
+
from eval_protocol.event_bus.logger import logger
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SqliteEventBusDatabase:
|
|
12
|
+
"""SQLite database for cross-process event communication."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, db_path: str):
|
|
15
|
+
self._db_path = db_path
|
|
16
|
+
self._db = SqliteDatabase(db_path)
|
|
17
|
+
|
|
18
|
+
class BaseModel(Model):
|
|
19
|
+
class Meta:
|
|
20
|
+
database = self._db
|
|
21
|
+
|
|
22
|
+
class Event(BaseModel): # type: ignore
|
|
23
|
+
event_id = CharField(unique=True)
|
|
24
|
+
event_type = CharField()
|
|
25
|
+
data = JSONField()
|
|
26
|
+
timestamp = DateTimeField()
|
|
27
|
+
process_id = CharField()
|
|
28
|
+
processed = CharField(default="false") # Track if event has been processed
|
|
29
|
+
|
|
30
|
+
self._Event = Event
|
|
31
|
+
self._db.connect()
|
|
32
|
+
self._db.create_tables([Event])
|
|
33
|
+
|
|
34
|
+
def publish_event(self, event_type: str, data: Any, process_id: str) -> None:
|
|
35
|
+
"""Publish an event to the database."""
|
|
36
|
+
try:
|
|
37
|
+
# Serialize data, handling pydantic models
|
|
38
|
+
if hasattr(data, "model_dump"):
|
|
39
|
+
serialized_data = data.model_dump(mode="json", exclude_none=True)
|
|
40
|
+
else:
|
|
41
|
+
serialized_data = data
|
|
42
|
+
|
|
43
|
+
self._Event.create(
|
|
44
|
+
event_id=str(uuid4()),
|
|
45
|
+
event_type=event_type,
|
|
46
|
+
data=serialized_data,
|
|
47
|
+
timestamp=time.time(),
|
|
48
|
+
process_id=process_id,
|
|
49
|
+
processed="false",
|
|
50
|
+
)
|
|
51
|
+
except Exception as e:
|
|
52
|
+
logger.warning(f"Failed to publish event to database: {e}")
|
|
53
|
+
|
|
54
|
+
def get_unprocessed_events(self, process_id: str) -> List[dict]:
|
|
55
|
+
"""Get unprocessed events from other processes."""
|
|
56
|
+
try:
|
|
57
|
+
query = (
|
|
58
|
+
self._Event.select()
|
|
59
|
+
.where((self._Event.process_id != process_id) & (self._Event.processed == "false"))
|
|
60
|
+
.order_by(self._Event.timestamp)
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
events = []
|
|
64
|
+
for event in query:
|
|
65
|
+
events.append(
|
|
66
|
+
{
|
|
67
|
+
"event_id": event.event_id,
|
|
68
|
+
"event_type": event.event_type,
|
|
69
|
+
"data": event.data,
|
|
70
|
+
"timestamp": event.timestamp,
|
|
71
|
+
"process_id": event.process_id,
|
|
72
|
+
}
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
return events
|
|
76
|
+
except Exception as e:
|
|
77
|
+
logger.warning(f"Failed to get unprocessed events: {e}")
|
|
78
|
+
return []
|
|
79
|
+
|
|
80
|
+
def mark_event_processed(self, event_id: str) -> None:
|
|
81
|
+
"""Mark an event as processed."""
|
|
82
|
+
try:
|
|
83
|
+
self._Event.update(processed="true").where(self._Event.event_id == event_id).execute()
|
|
84
|
+
except Exception as e:
|
|
85
|
+
logger.debug(f"Failed to mark event as processed: {e}")
|
|
86
|
+
|
|
87
|
+
def cleanup_old_events(self, max_age_hours: int = 24) -> None:
|
|
88
|
+
"""Clean up old processed events."""
|
|
89
|
+
try:
|
|
90
|
+
cutoff_time = time.time() - (max_age_hours * 3600)
|
|
91
|
+
self._Event.delete().where(
|
|
92
|
+
(self._Event.processed == "true") & (self._Event.timestamp < cutoff_time)
|
|
93
|
+
).execute()
|
|
94
|
+
except Exception as e:
|
|
95
|
+
logger.debug(f"Failed to cleanup old events: {e}")
|
|
@@ -11,7 +11,7 @@ from typing import Any, Dict, List, Optional
|
|
|
11
11
|
|
|
12
12
|
import aiohttp
|
|
13
13
|
from omegaconf import DictConfig
|
|
14
|
-
from pydantic import BaseModel
|
|
14
|
+
from pydantic import BaseModel # Added for new models
|
|
15
15
|
|
|
16
16
|
logger = logging.getLogger(__name__)
|
|
17
17
|
|
|
@@ -83,6 +83,9 @@ class FireworksModelClient(ModelClient):
|
|
|
83
83
|
}
|
|
84
84
|
if self.top_p is not None:
|
|
85
85
|
payload["top_p"] = self.top_p
|
|
86
|
+
# Include reasoning settings if configured (for reasoning-capable models)
|
|
87
|
+
if self.reasoning_effort:
|
|
88
|
+
payload["reasoning_effort"] = self.reasoning_effort
|
|
86
89
|
|
|
87
90
|
if tools:
|
|
88
91
|
payload["tools"] = tools
|
{eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/client/connection.py
RENAMED
|
@@ -9,14 +9,16 @@ import asyncio
|
|
|
9
9
|
import hashlib
|
|
10
10
|
import json
|
|
11
11
|
import logging
|
|
12
|
+
import time
|
|
12
13
|
from contextlib import AsyncExitStack
|
|
13
14
|
from typing import Any, Dict, List, Optional, Tuple
|
|
14
15
|
|
|
16
|
+
import httpx
|
|
15
17
|
from mcp.client.session import ClientSession
|
|
16
18
|
from mcp.client.streamable_http import streamablehttp_client
|
|
19
|
+
from mcp.types import Implementation
|
|
17
20
|
|
|
18
21
|
from ...types import MCPSession
|
|
19
|
-
from mcp.types import Implementation
|
|
20
22
|
|
|
21
23
|
logger = logging.getLogger(__name__)
|
|
22
24
|
|
|
@@ -109,15 +111,13 @@ class MCPConnectionManager:
|
|
|
109
111
|
"""
|
|
110
112
|
Clean session data in remote mcp server for the given session
|
|
111
113
|
"""
|
|
112
|
-
import httpx
|
|
113
|
-
|
|
114
114
|
base_url = session.base_url.rstrip("/").removesuffix("/mcp")
|
|
115
115
|
url = f"{base_url}/control/reset_session"
|
|
116
116
|
|
|
117
117
|
headers = {"mcp-session-id": session.session_id}
|
|
118
118
|
body = {"seed": session.seed}
|
|
119
119
|
|
|
120
|
-
timeout = httpx.Timeout(
|
|
120
|
+
timeout = httpx.Timeout(15.0)
|
|
121
121
|
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
122
122
|
resp = await client.post(url, headers=headers, json=body)
|
|
123
123
|
resp.raise_for_status()
|
|
@@ -202,8 +202,6 @@ class MCPConnectionManager:
|
|
|
202
202
|
initial_observation = None
|
|
203
203
|
|
|
204
204
|
try:
|
|
205
|
-
import httpx
|
|
206
|
-
|
|
207
205
|
# Extract base URL and session ID from the MCP session
|
|
208
206
|
base_url = session.base_url.rstrip("/").removesuffix("/mcp")
|
|
209
207
|
session_id = session.session_id
|
|
@@ -459,9 +457,6 @@ class MCPConnectionManager:
|
|
|
459
457
|
control_plane_info = {}
|
|
460
458
|
|
|
461
459
|
try:
|
|
462
|
-
# Query control plane endpoints following the new architecture
|
|
463
|
-
import httpx
|
|
464
|
-
|
|
465
460
|
# Extract base URL and session ID from the MCP session
|
|
466
461
|
base_url = session.base_url.rstrip("/").removesuffix("/mcp")
|
|
467
462
|
# Use the session ID from the established MCP session
|
|
@@ -544,10 +539,10 @@ class MCPConnectionManager:
|
|
|
544
539
|
await session._exit_stack.aclose()
|
|
545
540
|
except asyncio.CancelledError:
|
|
546
541
|
# Handle cancellation gracefully (especially important for Python 3.12)
|
|
547
|
-
logger.
|
|
542
|
+
logger.error(f"Session {session.session_id} close was cancelled")
|
|
548
543
|
except Exception as e:
|
|
549
544
|
# Hitting this error, probably because of use of threads: "Attempted to exit cancel scope in a different task than it was entered in"
|
|
550
|
-
logger.
|
|
545
|
+
logger.error(f"Error closing session {session.session_id}: {e}")
|
|
551
546
|
finally:
|
|
552
547
|
session._exit_stack = None
|
|
553
548
|
session._mcp_session = None
|
{eval_protocol-0.2.6.dev1 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/execution/base_policy.py
RENAMED
|
@@ -220,7 +220,7 @@ class LLMBasePolicy(PlaybackPolicyBase, ABC):
|
|
|
220
220
|
return mcp_tool_calls, usage_stats
|
|
221
221
|
else:
|
|
222
222
|
# No tool calls in response - this is normal when episode ends or LLM provides only text
|
|
223
|
-
logger.
|
|
223
|
+
logger.debug(f"No tool calls in response for env {env_index}, message content: {message.get('content')}")
|
|
224
224
|
return [
|
|
225
225
|
MCPToolCall(
|
|
226
226
|
tool_name="_no_tool_call",
|