eval-protocol 0.2.5__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.2.5/eval_protocol.egg-info → eval_protocol-0.2.6}/PKG-INFO +3 -4
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/README.md +1 -1
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/_version.py +3 -3
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/execution/manager.py +14 -22
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/execution/policy.py +11 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_env.py +4 -3
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +12 -5
- {eval_protocol-0.2.5 → eval_protocol-0.2.6/eval_protocol.egg-info}/PKG-INFO +3 -4
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol.egg-info/SOURCES.txt +6 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol.egg-info/requires.txt +1 -2
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/pyproject.toml +5 -3
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_rollout_control_plane_integration.py +1 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/evaluator/evaluator_nl_assertions.py +27 -10
- eval_protocol-0.2.6/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- eval_protocol-0.2.6/vite-app/dist/assets/index-BySN1scz.css +1 -0
- eval_protocol-0.2.6/vite-app/dist/assets/index-CRkZ6JGL.js +88 -0
- eval_protocol-0.2.6/vite-app/dist/assets/index-CRkZ6JGL.js.map +1 -0
- eval_protocol-0.2.6/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
- eval_protocol-0.2.6/vite-app/dist/index.html +14 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/LICENSE +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/development/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/development/utils/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/http_rollout_protocol.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/http_rollout_resource.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/auth.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli_commands/deploy.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli_commands/preview.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/common_utils.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/config.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/evaluation.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/generation/clients.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/integrations/braintrust.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/intermediary_server.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/orchestration/remote_http_client.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/mcp_agent/session.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/models.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/platform_api.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/default_no_op_rollout_process.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/evaluation_test.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/pytest/utils.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/server.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/setup.cfg +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/setup.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_auth.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_braintrust_adapter.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_braintrust_example.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_cli.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_config.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_deploy_integration.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_evaluation_integration.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_evaluation_preview_integration.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_format.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_frozen_lake_http_server.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_frozen_lake_seed_evaluation.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_integration.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_length.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_math.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_minimal.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_models.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_packaging.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_readiness.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_repetition.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_server.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.2.5 → eval_protocol-0.2.6}/versioneer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -40,7 +40,6 @@ Requires-Dist: deepdiff>=6.0.0
|
|
|
40
40
|
Requires-Dist: pandas>=1.5.0
|
|
41
41
|
Requires-Dist: watchdog>=2.1.0
|
|
42
42
|
Requires-Dist: websockets>=15.0.1
|
|
43
|
-
Requires-Dist: fireworks-ai>=0.19.12
|
|
44
43
|
Requires-Dist: fastapi>=0.116.1
|
|
45
44
|
Provides-Extra: dev
|
|
46
45
|
Requires-Dist: build; extra == "dev"
|
|
@@ -79,7 +78,7 @@ Requires-Dist: accelerate>=0.28.0; extra == "trl"
|
|
|
79
78
|
Provides-Extra: openevals
|
|
80
79
|
Requires-Dist: openevals>=0.1.0; extra == "openevals"
|
|
81
80
|
Provides-Extra: fireworks
|
|
82
|
-
Requires-Dist: fireworks-ai>=0.19.
|
|
81
|
+
Requires-Dist: fireworks-ai>=0.19.12; extra == "fireworks"
|
|
83
82
|
Provides-Extra: box2d
|
|
84
83
|
Requires-Dist: swig; extra == "box2d"
|
|
85
84
|
Requires-Dist: gymnasium[box2d]>=0.29.0; extra == "box2d"
|
|
@@ -110,7 +109,7 @@ markdown generation tasks to customer service agents with tool calling
|
|
|
110
109
|
capabilities.
|
|
111
110
|
|
|
112
111
|
<p align="center">
|
|
113
|
-
<img src="
|
|
112
|
+
<img src="https://raw.githubusercontent.com/eval-protocol/python-sdk/refs/heads/main/assets/ui.png" alt="UI" />
|
|
114
113
|
<br>
|
|
115
114
|
<sub><b>Log Viewer: Monitor your evaluation rollouts in real time.</b></sub>
|
|
116
115
|
</p>
|
|
@@ -13,7 +13,7 @@ markdown generation tasks to customer service agents with tool calling
|
|
|
13
13
|
capabilities.
|
|
14
14
|
|
|
15
15
|
<p align="center">
|
|
16
|
-
<img src="
|
|
16
|
+
<img src="https://raw.githubusercontent.com/eval-protocol/python-sdk/refs/heads/main/assets/ui.png" alt="UI" />
|
|
17
17
|
<br>
|
|
18
18
|
<sub><b>Log Viewer: Monitor your evaluation rollouts in real time.</b></sub>
|
|
19
19
|
</p>
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-08-
|
|
11
|
+
"date": "2025-08-06T23:10:26-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.2.
|
|
14
|
+
"full-revisionid": "fffd75c146b297cbce37f768ca9850e2ee05e4b5",
|
|
15
|
+
"version": "0.2.6"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -42,6 +42,7 @@ class ExecutionManager:
|
|
|
42
42
|
steps: int = 512,
|
|
43
43
|
openai_format_log_file: Optional[str] = None,
|
|
44
44
|
max_concurrent_rollouts: int = 8,
|
|
45
|
+
evaluation_rows: Optional[List[EvaluationRow]] = None,
|
|
45
46
|
) -> List[EvaluationRow]:
|
|
46
47
|
"""
|
|
47
48
|
Execute general rollouts using tool calling interface with automatic record/playback.
|
|
@@ -135,9 +136,11 @@ class ExecutionManager:
|
|
|
135
136
|
# Add note about control plane separation
|
|
136
137
|
logger.info(f"🎛️ Trajectories include control plane separation")
|
|
137
138
|
|
|
138
|
-
# Convert trajectories to unified EvaluationRow format
|
|
139
|
-
evaluation_rows
|
|
140
|
-
|
|
139
|
+
# Convert trajectories to unified EvaluationRow format. If no evaluation_rows are provided, create empty ones for backwards compatibility.
|
|
140
|
+
if evaluation_rows is None:
|
|
141
|
+
evaluation_rows = [EvaluationRow(messages=[], input_metadata=InputMetadata()) for _ in trajectories]
|
|
142
|
+
|
|
143
|
+
for idx, trajectory in enumerate(trajectories):
|
|
141
144
|
# Handle multimodal content by extracting text from complex content structures
|
|
142
145
|
messages = []
|
|
143
146
|
for msg in trajectory.conversation_history:
|
|
@@ -155,26 +158,15 @@ class ExecutionManager:
|
|
|
155
158
|
|
|
156
159
|
messages.append(Message.model_validate(msg_dict))
|
|
157
160
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
),
|
|
167
|
-
session_data={
|
|
168
|
-
"timestamp": time.time(),
|
|
169
|
-
},
|
|
170
|
-
)
|
|
171
|
-
evaluation_row = EvaluationRow(
|
|
172
|
-
messages=messages,
|
|
173
|
-
tools=shared_tool_schema,
|
|
174
|
-
input_metadata=input_metadata,
|
|
175
|
-
usage=trajectory.usage,
|
|
161
|
+
evaluation_rows[idx].messages = messages
|
|
162
|
+
evaluation_rows[idx].tools = shared_tool_schema
|
|
163
|
+
evaluation_rows[idx].usage = trajectory.usage
|
|
164
|
+
evaluation_rows[idx].input_metadata.completion_params = CompletionParams(
|
|
165
|
+
model=policy.model_id,
|
|
166
|
+
temperature=getattr(policy, "temperature", None),
|
|
167
|
+
max_tokens=getattr(policy, "max_tokens", None),
|
|
168
|
+
max_tool_calls=getattr(policy, "max_tools_per_turn", None),
|
|
176
169
|
)
|
|
177
|
-
evaluation_rows.append(evaluation_row)
|
|
178
170
|
|
|
179
171
|
return evaluation_rows
|
|
180
172
|
|
|
@@ -64,6 +64,9 @@ class LiteLLMPolicy(LLMBasePolicy):
|
|
|
64
64
|
self.num_retries = num_retries
|
|
65
65
|
self.retry_strategy = retry_strategy
|
|
66
66
|
|
|
67
|
+
# Store additional API parameters from kwargs
|
|
68
|
+
self.additional_params = kwargs
|
|
69
|
+
|
|
67
70
|
# Only initialize LiteLLM in live mode (not in playback mode)
|
|
68
71
|
if not self._is_playback:
|
|
69
72
|
self._setup_litellm_caching(use_caching, cache_type, redis_url)
|
|
@@ -166,6 +169,14 @@ class LiteLLMPolicy(LLMBasePolicy):
|
|
|
166
169
|
"base_url": self.base_url,
|
|
167
170
|
}
|
|
168
171
|
|
|
172
|
+
# Add additional parameters from kwargs (like reasoning_effort)
|
|
173
|
+
if self.additional_params:
|
|
174
|
+
request_params.update(self.additional_params)
|
|
175
|
+
|
|
176
|
+
# Tell LiteLLM to allow reasoning_effort if it's present
|
|
177
|
+
if "reasoning_effort" in self.additional_params:
|
|
178
|
+
request_params["allowed_openai_params"] = ["reasoning_effort"]
|
|
179
|
+
|
|
169
180
|
# Add tools if provided
|
|
170
181
|
if tools:
|
|
171
182
|
request_params["tools"] = tools
|
|
@@ -40,6 +40,8 @@ MCP Integration:
|
|
|
40
40
|
- Resources provide static/configuration data, tools provide dynamic actions
|
|
41
41
|
"""
|
|
42
42
|
|
|
43
|
+
import asyncio
|
|
44
|
+
|
|
43
45
|
# For legacy compatibility - import the facade functions
|
|
44
46
|
import logging
|
|
45
47
|
import random
|
|
@@ -47,11 +49,10 @@ from typing import Any, Callable, Dict, List, Optional, Union
|
|
|
47
49
|
|
|
48
50
|
# Import all functionality from the new modular components
|
|
49
51
|
from .mcp.execution.manager import ExecutionManager
|
|
50
|
-
from .mcp.execution.policy import AnthropicPolicy, FireworksPolicy, LLMBasePolicy, OpenAIPolicy
|
|
52
|
+
from .mcp.execution.policy import AnthropicPolicy, FireworksPolicy, LiteLLMPolicy, LLMBasePolicy, OpenAIPolicy
|
|
51
53
|
from .mcp.session.manager import GeneralMCPVectorEnv
|
|
52
54
|
from .models import EvaluationRow
|
|
53
55
|
from .types import DatasetRow, MCPSession, MCPToolCall
|
|
54
|
-
import asyncio
|
|
55
56
|
|
|
56
57
|
logger = logging.getLogger(__name__)
|
|
57
58
|
|
|
@@ -288,7 +289,7 @@ async def rollout(
|
|
|
288
289
|
execution_manager = ExecutionManager()
|
|
289
290
|
|
|
290
291
|
return await execution_manager.execute_rollouts(
|
|
291
|
-
envs, policy, steps, openai_format_log_file, max_concurrent_rollouts
|
|
292
|
+
envs, policy, steps, openai_format_log_file, max_concurrent_rollouts, evaluation_rows
|
|
292
293
|
)
|
|
293
294
|
|
|
294
295
|
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import atexit
|
|
2
3
|
import os
|
|
4
|
+
import signal
|
|
5
|
+
import socket
|
|
3
6
|
import subprocess
|
|
4
7
|
import time
|
|
5
|
-
import socket
|
|
6
8
|
from pathlib import Path
|
|
7
9
|
from typing import List, Optional
|
|
8
10
|
|
|
@@ -10,9 +12,6 @@ import eval_protocol as ep
|
|
|
10
12
|
from eval_protocol.models import EvaluationRow, Message
|
|
11
13
|
from eval_protocol.pytest.types import RolloutProcessorConfig
|
|
12
14
|
|
|
13
|
-
import atexit
|
|
14
|
-
import signal
|
|
15
|
-
|
|
16
15
|
|
|
17
16
|
class MCPServerManager:
|
|
18
17
|
"""Manages MCP server lifecycle for testing."""
|
|
@@ -188,13 +187,16 @@ async def default_mcp_gym_rollout_processor(
|
|
|
188
187
|
"""
|
|
189
188
|
Rollout processor for tau bench environments.
|
|
190
189
|
|
|
190
|
+
|
|
191
191
|
This processor starts an MCP server, creates tau bench environments, and runs rollouts
|
|
192
192
|
using the eval_protocol framework, following the pattern from test_tau2_e2e.py.
|
|
193
193
|
|
|
194
|
+
|
|
194
195
|
Args:
|
|
195
196
|
rows: List of EvaluationRow objects containing messages and dataset info in input_metadata
|
|
196
197
|
config: RolloutProcessorConfig with model and other parameters
|
|
197
198
|
|
|
199
|
+
|
|
198
200
|
Returns:
|
|
199
201
|
List of EvaluationRow objects with completed conversations
|
|
200
202
|
"""
|
|
@@ -207,6 +209,7 @@ async def default_mcp_gym_rollout_processor(
|
|
|
207
209
|
model_id=config.model,
|
|
208
210
|
temperature=config.input_params.get("temperature", 0.0),
|
|
209
211
|
max_tokens=config.input_params.get("max_tokens", 4096),
|
|
212
|
+
reasoning_effort=config.input_params.get("reasoning_effort", None),
|
|
210
213
|
)
|
|
211
214
|
|
|
212
215
|
# Create MCP environments directly from evaluation_rows
|
|
@@ -218,7 +221,11 @@ async def default_mcp_gym_rollout_processor(
|
|
|
218
221
|
|
|
219
222
|
# Run rollout with environments and policy
|
|
220
223
|
evaluation_rows = await ep.rollout(
|
|
221
|
-
envs,
|
|
224
|
+
envs,
|
|
225
|
+
policy=policy,
|
|
226
|
+
evaluation_rows=rows,
|
|
227
|
+
steps=config.steps,
|
|
228
|
+
max_concurrent_rollouts=config.max_concurrent_rollouts,
|
|
222
229
|
)
|
|
223
230
|
|
|
224
231
|
return evaluation_rows
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -40,7 +40,6 @@ Requires-Dist: deepdiff>=6.0.0
|
|
|
40
40
|
Requires-Dist: pandas>=1.5.0
|
|
41
41
|
Requires-Dist: watchdog>=2.1.0
|
|
42
42
|
Requires-Dist: websockets>=15.0.1
|
|
43
|
-
Requires-Dist: fireworks-ai>=0.19.12
|
|
44
43
|
Requires-Dist: fastapi>=0.116.1
|
|
45
44
|
Provides-Extra: dev
|
|
46
45
|
Requires-Dist: build; extra == "dev"
|
|
@@ -79,7 +78,7 @@ Requires-Dist: accelerate>=0.28.0; extra == "trl"
|
|
|
79
78
|
Provides-Extra: openevals
|
|
80
79
|
Requires-Dist: openevals>=0.1.0; extra == "openevals"
|
|
81
80
|
Provides-Extra: fireworks
|
|
82
|
-
Requires-Dist: fireworks-ai>=0.19.
|
|
81
|
+
Requires-Dist: fireworks-ai>=0.19.12; extra == "fireworks"
|
|
83
82
|
Provides-Extra: box2d
|
|
84
83
|
Requires-Dist: swig; extra == "box2d"
|
|
85
84
|
Requires-Dist: gymnasium[box2d]>=0.29.0; extra == "box2d"
|
|
@@ -110,7 +109,7 @@ markdown generation tasks to customer service agents with tool calling
|
|
|
110
109
|
capabilities.
|
|
111
110
|
|
|
112
111
|
<p align="center">
|
|
113
|
-
<img src="
|
|
112
|
+
<img src="https://raw.githubusercontent.com/eval-protocol/python-sdk/refs/heads/main/assets/ui.png" alt="UI" />
|
|
114
113
|
<br>
|
|
115
114
|
<sub><b>Log Viewer: Monitor your evaluation rollouts in real time.</b></sub>
|
|
116
115
|
</p>
|
|
@@ -35,6 +35,12 @@ eval_protocol.egg-info/dependency_links.txt
|
|
|
35
35
|
eval_protocol.egg-info/entry_points.txt
|
|
36
36
|
eval_protocol.egg-info/requires.txt
|
|
37
37
|
eval_protocol.egg-info/top_level.txt
|
|
38
|
+
eval_protocol/../vite-app/dist/index.html
|
|
39
|
+
eval_protocol/../vite-app/dist/assets/favicon-BkAAWQga.png
|
|
40
|
+
eval_protocol/../vite-app/dist/assets/index-BySN1scz.css
|
|
41
|
+
eval_protocol/../vite-app/dist/assets/index-CRkZ6JGL.js
|
|
42
|
+
eval_protocol/../vite-app/dist/assets/index-CRkZ6JGL.js.map
|
|
43
|
+
eval_protocol/../vite-app/dist/assets/logo-light-BprIBJQW.png
|
|
38
44
|
eval_protocol/adapters/__init__.py
|
|
39
45
|
eval_protocol/adapters/braintrust.py
|
|
40
46
|
eval_protocol/adapters/huggingface.py
|
|
@@ -28,7 +28,6 @@ deepdiff>=6.0.0
|
|
|
28
28
|
pandas>=1.5.0
|
|
29
29
|
watchdog>=2.1.0
|
|
30
30
|
websockets>=15.0.1
|
|
31
|
-
fireworks-ai>=0.19.12
|
|
32
31
|
fastapi>=0.116.1
|
|
33
32
|
|
|
34
33
|
[adapters]
|
|
@@ -71,7 +70,7 @@ pip>=25.1.1
|
|
|
71
70
|
haikus==0.3.8
|
|
72
71
|
|
|
73
72
|
[fireworks]
|
|
74
|
-
fireworks-ai>=0.19.
|
|
73
|
+
fireworks-ai>=0.19.12
|
|
75
74
|
|
|
76
75
|
[huggingface]
|
|
77
76
|
datasets>=2.0.0
|
|
@@ -48,7 +48,6 @@ dependencies = [
|
|
|
48
48
|
"pandas>=1.5.0",
|
|
49
49
|
"watchdog>=2.1.0",
|
|
50
50
|
"websockets>=15.0.1",
|
|
51
|
-
"fireworks-ai>=0.19.12",
|
|
52
51
|
"fastapi>=0.116.1",
|
|
53
52
|
]
|
|
54
53
|
|
|
@@ -96,7 +95,7 @@ openevals = [
|
|
|
96
95
|
"openevals>=0.1.0",
|
|
97
96
|
]
|
|
98
97
|
fireworks = [
|
|
99
|
-
"fireworks-ai>=0.19.
|
|
98
|
+
"fireworks-ai>=0.19.12",
|
|
100
99
|
]
|
|
101
100
|
box2d = [
|
|
102
101
|
"swig",
|
|
@@ -112,7 +111,7 @@ huggingface = [
|
|
|
112
111
|
]
|
|
113
112
|
adapters = [
|
|
114
113
|
"langfuse>=2.0.0",
|
|
115
|
-
"datasets>=2.0.0",
|
|
114
|
+
"datasets>=2.0.0",
|
|
116
115
|
"transformers>=4.0.0",
|
|
117
116
|
]
|
|
118
117
|
|
|
@@ -123,6 +122,9 @@ eval-protocol = "eval_protocol.cli:main"
|
|
|
123
122
|
[tool.setuptools.packages.find]
|
|
124
123
|
include = ["eval_protocol*", "development*", "vendor*"]
|
|
125
124
|
|
|
125
|
+
[tool.setuptools.package-data]
|
|
126
|
+
"eval_protocol" = ["../vite-app/dist/**/*"]
|
|
127
|
+
|
|
126
128
|
[tool.versioneer]
|
|
127
129
|
VCS = "git"
|
|
128
130
|
style = "pep440"
|
{eval_protocol-0.2.5 → eval_protocol-0.2.6}/vendor/tau2/evaluator/evaluator_nl_assertions.py
RENAMED
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
2
5
|
|
|
3
6
|
from vendor.tau2.config import DEFAULT_LLM_NL_ASSERTIONS, DEFAULT_LLM_NL_ASSERTIONS_ARGS
|
|
4
7
|
from vendor.tau2.data_model.message import Message, SystemMessage, UserMessage
|
|
@@ -7,6 +10,20 @@ from vendor.tau2.data_model.tasks import RewardType, Task
|
|
|
7
10
|
from vendor.tau2.utils.llm_utils import generate
|
|
8
11
|
|
|
9
12
|
|
|
13
|
+
class NLAssertionResult(BaseModel):
|
|
14
|
+
"""Individual NL assertion evaluation result."""
|
|
15
|
+
|
|
16
|
+
expectedOutcome: str
|
|
17
|
+
reasoning: str
|
|
18
|
+
metExpectation: bool
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class NLAssertionsResponse(BaseModel):
|
|
22
|
+
"""Complete NL assertions evaluation response."""
|
|
23
|
+
|
|
24
|
+
results: List[NLAssertionResult]
|
|
25
|
+
|
|
26
|
+
|
|
10
27
|
class NLAssertionsEvaluator:
|
|
11
28
|
"""
|
|
12
29
|
Judge that evaluates whether a trajectory adheres to all the natural-language assertions.
|
|
@@ -37,9 +54,7 @@ class NLAssertionsEvaluator:
|
|
|
37
54
|
reward_breakdown={RewardType.NL_ASSERTION: 1.0},
|
|
38
55
|
)
|
|
39
56
|
|
|
40
|
-
nl_assertions_checks = cls.evaluate_nl_assertions(
|
|
41
|
-
full_trajectory, nl_assertions
|
|
42
|
-
)
|
|
57
|
+
nl_assertions_checks = cls.evaluate_nl_assertions(full_trajectory, nl_assertions)
|
|
43
58
|
|
|
44
59
|
# Calculate reward: 1 if all expectations are met, 0 otherwise
|
|
45
60
|
all_expectations_met = all(result.met for result in nl_assertions_checks)
|
|
@@ -70,9 +85,7 @@ class NLAssertionsEvaluator:
|
|
|
70
85
|
- metExpectation: Boolean indicating if the assertion was met
|
|
71
86
|
- reasoning: Explanation for the evaluation
|
|
72
87
|
"""
|
|
73
|
-
trajectory_str = "\n".join(
|
|
74
|
-
[f"{message.role}: {message.content}" for message in trajectory]
|
|
75
|
-
)
|
|
88
|
+
trajectory_str = "\n".join([f"{message.role}: {message.content}" for message in trajectory])
|
|
76
89
|
# System prompt similar to the TypeScript implementation
|
|
77
90
|
system_prompt = """
|
|
78
91
|
TASK
|
|
@@ -86,7 +99,7 @@ class NLAssertionsEvaluator:
|
|
|
86
99
|
- `reasoning`: a short explanation for your classification
|
|
87
100
|
- `metExpectation`: `true` if the agent satisfies the expected outcomes, `false` otherwise
|
|
88
101
|
- `expectedOutcome`: repeat the expectation from the input that you are grading
|
|
89
|
-
|
|
102
|
+
|
|
90
103
|
Example response structure:
|
|
91
104
|
{
|
|
92
105
|
"results": [
|
|
@@ -102,7 +115,7 @@ class NLAssertionsEvaluator:
|
|
|
102
115
|
user_prompt = f"""
|
|
103
116
|
conversation:
|
|
104
117
|
{trajectory_str}
|
|
105
|
-
|
|
118
|
+
|
|
106
119
|
expectedOutcomes:
|
|
107
120
|
{nl_assertions}
|
|
108
121
|
"""
|
|
@@ -115,8 +128,12 @@ class NLAssertionsEvaluator:
|
|
|
115
128
|
assistant_message = generate(
|
|
116
129
|
model=DEFAULT_LLM_NL_ASSERTIONS,
|
|
117
130
|
messages=messages,
|
|
118
|
-
|
|
119
|
-
|
|
131
|
+
temperature=0.0,
|
|
132
|
+
response_format={
|
|
133
|
+
"type": "json_schema",
|
|
134
|
+
"json_schema": {"name": "NLAssertionsResponse", "schema": NLAssertionsResponse.model_json_schema()},
|
|
135
|
+
},
|
|
136
|
+
) # Adding constrained generation to ensure the response is a valid JSON object
|
|
120
137
|
result_data = json.loads(assistant_message.content)
|
|
121
138
|
return [
|
|
122
139
|
NLAssertionCheck(
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
/*! tailwindcss v4.1.11 | MIT License | https://tailwindcss.com */@layer properties{@supports (((-webkit-hyphens:none)) and (not (margin-trim:inline))) or ((-moz-orient:inline) and (not (color:rgb(from red r g b)))){*,:before,:after,::backdrop{--tw-rotate-x:initial;--tw-rotate-y:initial;--tw-rotate-z:initial;--tw-skew-x:initial;--tw-skew-y:initial;--tw-space-y-reverse:0;--tw-space-x-reverse:0;--tw-divide-y-reverse:0;--tw-border-style:solid;--tw-font-weight:initial;--tw-blur:initial;--tw-brightness:initial;--tw-contrast:initial;--tw-grayscale:initial;--tw-hue-rotate:initial;--tw-invert:initial;--tw-opacity:initial;--tw-saturate:initial;--tw-sepia:initial;--tw-drop-shadow:initial;--tw-drop-shadow-color:initial;--tw-drop-shadow-alpha:100%;--tw-drop-shadow-size:initial;--tw-duration:initial}}}@layer theme{:root,:host{--font-sans:ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";--font-mono:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;--color-red-500:oklch(63.7% .237 25.331);--color-red-700:oklch(50.5% .213 27.518);--color-yellow-50:oklch(98.7% .026 102.212);--color-yellow-100:oklch(97.3% .071 103.193);--color-yellow-200:oklch(94.5% .129 101.54);--color-yellow-700:oklch(55.4% .135 66.442);--color-yellow-800:oklch(47.6% .114 61.907);--color-yellow-900:oklch(42.1% .095 57.708);--color-green-50:oklch(98.2% .018 155.826);--color-green-100:oklch(96.2% .044 156.743);--color-green-200:oklch(92.5% .084 155.995);--color-green-500:oklch(72.3% .219 149.579);--color-green-700:oklch(52.7% .154 150.069);--color-green-800:oklch(44.8% .119 151.328);--color-green-900:oklch(39.3% .095 152.535);--color-blue-50:oklch(97% .014 254.604);--color-blue-200:oklch(88.2% .059 254.128);--color-blue-500:oklch(62.3% .214 259.815);--color-blue-700:oklch(48.8% .243 264.376);--color-blue-900:oklch(37.9% .146 265.522);--color-gray-50:oklch(98.5% .002 247.839);--color-gray-100:oklch(96.7% .003 264.542);--color-gray-200:oklch(92.8% .006 264.531);--color-gray-300:oklch(87.2% .01 258.338);--color-gray-400:oklch(70.7% .022 261.325);--color-gray-500:oklch(55.1% .027 264.364);--color-gray-600:oklch(44.6% .03 256.802);--color-gray-700:oklch(37.3% .034 259.733);--color-gray-800:oklch(27.8% .033 256.848);--color-gray-900:oklch(21% .034 264.665);--color-white:#fff;--spacing:.25rem;--container-sm:24rem;--container-md:28rem;--container-lg:32rem;--container-7xl:80rem;--text-xs:.75rem;--text-xs--line-height:calc(1/.75);--text-sm:.875rem;--text-sm--line-height:calc(1.25/.875);--font-weight-medium:500;--font-weight-semibold:600;--animate-spin:spin 1s linear infinite;--default-transition-duration:.15s;--default-transition-timing-function:cubic-bezier(.4,0,.2,1);--default-font-family:var(--font-sans);--default-mono-font-family:var(--font-mono)}}@layer base{*,:after,:before,::backdrop{box-sizing:border-box;border:0 solid;margin:0;padding:0}::file-selector-button{box-sizing:border-box;border:0 solid;margin:0;padding:0}html,:host{-webkit-text-size-adjust:100%;tab-size:4;line-height:1.5;font-family:var(--default-font-family,ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji");font-feature-settings:var(--default-font-feature-settings,normal);font-variation-settings:var(--default-font-variation-settings,normal);-webkit-tap-highlight-color:transparent}hr{height:0;color:inherit;border-top-width:1px}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;-webkit-text-decoration:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,samp,pre{font-family:var(--default-mono-font-family,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace);font-feature-settings:var(--default-mono-font-feature-settings,normal);font-variation-settings:var(--default-mono-font-variation-settings,normal);font-size:1em}small{font-size:80%}sub,sup{vertical-align:baseline;font-size:75%;line-height:0;position:relative}sub{bottom:-.25em}sup{top:-.5em}table{text-indent:0;border-color:inherit;border-collapse:collapse}:-moz-focusring{outline:auto}progress{vertical-align:baseline}summary{display:list-item}ol,ul,menu{list-style:none}img,svg,video,canvas,audio,iframe,embed,object{vertical-align:middle;display:block}img,video{max-width:100%;height:auto}button,input,select,optgroup,textarea{font:inherit;font-feature-settings:inherit;font-variation-settings:inherit;letter-spacing:inherit;color:inherit;opacity:1;background-color:#0000;border-radius:0}::file-selector-button{font:inherit;font-feature-settings:inherit;font-variation-settings:inherit;letter-spacing:inherit;color:inherit;opacity:1;background-color:#0000;border-radius:0}:where(select:is([multiple],[size])) optgroup{font-weight:bolder}:where(select:is([multiple],[size])) optgroup option{padding-inline-start:20px}::file-selector-button{margin-inline-end:4px}::placeholder{opacity:1}@supports (not ((-webkit-appearance:-apple-pay-button))) or (contain-intrinsic-size:1px){::placeholder{color:currentColor}@supports (color:color-mix(in lab,red,red)){::placeholder{color:color-mix(in oklab,currentcolor 50%,transparent)}}}textarea{resize:vertical}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-date-and-time-value{min-height:1lh;text-align:inherit}::-webkit-datetime-edit{display:inline-flex}::-webkit-datetime-edit-fields-wrapper{padding:0}::-webkit-datetime-edit{padding-block:0}::-webkit-datetime-edit-year-field{padding-block:0}::-webkit-datetime-edit-month-field{padding-block:0}::-webkit-datetime-edit-day-field{padding-block:0}::-webkit-datetime-edit-hour-field{padding-block:0}::-webkit-datetime-edit-minute-field{padding-block:0}::-webkit-datetime-edit-second-field{padding-block:0}::-webkit-datetime-edit-millisecond-field{padding-block:0}::-webkit-datetime-edit-meridiem-field{padding-block:0}:-moz-ui-invalid{box-shadow:none}button,input:where([type=button],[type=reset],[type=submit]){appearance:button}::file-selector-button{appearance:button}::-webkit-inner-spin-button{height:auto}::-webkit-outer-spin-button{height:auto}[hidden]:where(:not([hidden=until-found])){display:none!important}}@layer components;@layer utilities{.visible{visibility:visible}.absolute{position:absolute}.relative{position:relative}.static{position:static}.top-0{top:calc(var(--spacing)*0)}.right-0{right:calc(var(--spacing)*0)}.left-0{left:calc(var(--spacing)*0)}.\!container{width:100%!important}@media (min-width:40rem){.\!container{max-width:40rem!important}}@media (min-width:48rem){.\!container{max-width:48rem!important}}@media (min-width:64rem){.\!container{max-width:64rem!important}}@media (min-width:80rem){.\!container{max-width:80rem!important}}@media (min-width:96rem){.\!container{max-width:96rem!important}}.container{width:100%}@media (min-width:40rem){.container{max-width:40rem}}@media (min-width:48rem){.container{max-width:48rem}}@media (min-width:64rem){.container{max-width:64rem}}@media (min-width:80rem){.container{max-width:80rem}}@media (min-width:96rem){.container{max-width:96rem}}.mx-auto{margin-inline:auto}.mt-1{margin-top:calc(var(--spacing)*1)}.mt-2{margin-top:calc(var(--spacing)*2)}.mb-0\.5{margin-bottom:calc(var(--spacing)*.5)}.mb-1{margin-bottom:calc(var(--spacing)*1)}.mb-2{margin-bottom:calc(var(--spacing)*2)}.mb-4{margin-bottom:calc(var(--spacing)*4)}.ml-2{margin-left:calc(var(--spacing)*2)}.block{display:block}.contents{display:contents}.flex{display:flex}.hidden{display:none}.inline-flex{display:inline-flex}.table{display:table}.h-1{height:calc(var(--spacing)*1)}.h-1\.5{height:calc(var(--spacing)*1.5)}.h-3{height:calc(var(--spacing)*3)}.h-4{height:calc(var(--spacing)*4)}.h-6{height:calc(var(--spacing)*6)}.h-10{height:calc(var(--spacing)*10)}.h-12{height:calc(var(--spacing)*12)}.min-h-screen{min-height:100vh}.w-1{width:calc(var(--spacing)*1)}.w-1\.5{width:calc(var(--spacing)*1.5)}.w-3{width:calc(var(--spacing)*3)}.w-4{width:calc(var(--spacing)*4)}.w-8{width:calc(var(--spacing)*8)}.w-12{width:calc(var(--spacing)*12)}.w-\[500px\]{width:500px}.w-auto{width:auto}.w-fit{width:fit-content}.w-full{width:100%}.max-w-7xl{max-width:var(--container-7xl)}.max-w-sm{max-width:var(--container-sm)}.min-w-0{min-width:calc(var(--spacing)*0)}.min-w-max{min-width:max-content}.flex-shrink-0{flex-shrink:0}.rotate-90{rotate:90deg}.rotate-180{rotate:180deg}.transform{transform:var(--tw-rotate-x,)var(--tw-rotate-y,)var(--tw-rotate-z,)var(--tw-skew-x,)var(--tw-skew-y,)}.animate-spin{animation:var(--animate-spin)}.cursor-col-resize{cursor:col-resize}.cursor-nw-resize{cursor:nw-resize}.cursor-pointer{cursor:pointer}.cursor-row-resize{cursor:row-resize}.resize{resize:both}.items-center{align-items:center}.justify-between{justify-content:space-between}.justify-center{justify-content:center}.justify-end{justify-content:flex-end}.justify-start{justify-content:flex-start}.gap-1\.5{gap:calc(var(--spacing)*1.5)}.gap-2{gap:calc(var(--spacing)*2)}.gap-3{gap:calc(var(--spacing)*3)}:where(.space-y-1>:not(:last-child)){--tw-space-y-reverse:0;margin-block-start:calc(calc(var(--spacing)*1)*var(--tw-space-y-reverse));margin-block-end:calc(calc(var(--spacing)*1)*calc(1 - var(--tw-space-y-reverse)))}:where(.space-y-3>:not(:last-child)){--tw-space-y-reverse:0;margin-block-start:calc(calc(var(--spacing)*3)*var(--tw-space-y-reverse));margin-block-end:calc(calc(var(--spacing)*3)*calc(1 - var(--tw-space-y-reverse)))}:where(.space-x-2>:not(:last-child)){--tw-space-x-reverse:0;margin-inline-start:calc(calc(var(--spacing)*2)*var(--tw-space-x-reverse));margin-inline-end:calc(calc(var(--spacing)*2)*calc(1 - var(--tw-space-x-reverse)))}:where(.divide-y>:not(:last-child)){--tw-divide-y-reverse:0;border-bottom-style:var(--tw-border-style);border-top-style:var(--tw-border-style);border-top-width:calc(1px*var(--tw-divide-y-reverse));border-bottom-width:calc(1px*calc(1 - var(--tw-divide-y-reverse)))}:where(.divide-gray-200>:not(:last-child)){border-color:var(--color-gray-200)}.truncate{text-overflow:ellipsis;white-space:nowrap;overflow:hidden}.overflow-hidden{overflow:hidden}.overflow-x-auto{overflow-x:auto}.overflow-y-auto{overflow-y:auto}.rounded{border-radius:.25rem}.rounded-full{border-radius:3.40282e38px}.border{border-style:var(--tw-border-style);border-width:1px}.border-t{border-top-style:var(--tw-border-style);border-top-width:1px}.border-b{border-bottom-style:var(--tw-border-style);border-bottom-width:1px}.border-blue-200{border-color:var(--color-blue-200)}.border-current{border-color:currentColor}.border-gray-200{border-color:var(--color-gray-200)}.border-gray-300{border-color:var(--color-gray-300)}.border-green-200{border-color:var(--color-green-200)}.border-yellow-200{border-color:var(--color-yellow-200)}.border-t-transparent{border-top-color:#0000}.bg-blue-50{background-color:var(--color-blue-50)}.bg-blue-500{background-color:var(--color-blue-500)}.bg-gray-50{background-color:var(--color-gray-50)}.bg-gray-100{background-color:var(--color-gray-100)}.bg-gray-300{background-color:var(--color-gray-300)}.bg-gray-500{background-color:var(--color-gray-500)}.bg-green-50{background-color:var(--color-green-50)}.bg-green-100{background-color:var(--color-green-100)}.bg-green-500{background-color:var(--color-green-500)}.bg-red-500{background-color:var(--color-red-500)}.bg-white{background-color:var(--color-white)}.bg-yellow-50{background-color:var(--color-yellow-50)}.bg-yellow-100{background-color:var(--color-yellow-100)}.p-0{padding:calc(var(--spacing)*0)}.p-1{padding:calc(var(--spacing)*1)}.p-2{padding:calc(var(--spacing)*2)}.p-3{padding:calc(var(--spacing)*3)}.p-4{padding:calc(var(--spacing)*4)}.p-8{padding:calc(var(--spacing)*8)}.px-2{padding-inline:calc(var(--spacing)*2)}.px-3{padding-inline:calc(var(--spacing)*3)}.py-0\.5{padding-block:calc(var(--spacing)*.5)}.py-1{padding-block:calc(var(--spacing)*1)}.py-2{padding-block:calc(var(--spacing)*2)}.py-3{padding-block:calc(var(--spacing)*3)}.py-4{padding-block:calc(var(--spacing)*4)}.pt-1{padding-top:calc(var(--spacing)*1)}.text-center{text-align:center}.text-left{text-align:left}.font-mono{font-family:var(--font-mono)}.text-sm{font-size:var(--text-sm);line-height:var(--tw-leading,var(--text-sm--line-height))}.text-xs{font-size:var(--text-xs);line-height:var(--tw-leading,var(--text-xs--line-height))}.font-medium{--tw-font-weight:var(--font-weight-medium);font-weight:var(--font-weight-medium)}.font-semibold{--tw-font-weight:var(--font-weight-semibold);font-weight:var(--font-weight-semibold)}.break-words{overflow-wrap:break-word}.break-all{word-break:break-all}.whitespace-nowrap{white-space:nowrap}.whitespace-pre-wrap{white-space:pre-wrap}.text-blue-700{color:var(--color-blue-700)}.text-blue-900{color:var(--color-blue-900)}.text-gray-400{color:var(--color-gray-400)}.text-gray-500{color:var(--color-gray-500)}.text-gray-600{color:var(--color-gray-600)}.text-gray-700{color:var(--color-gray-700)}.text-gray-800{color:var(--color-gray-800)}.text-gray-900{color:var(--color-gray-900)}.text-green-700{color:var(--color-green-700)}.text-green-800{color:var(--color-green-800)}.text-green-900{color:var(--color-green-900)}.text-red-700{color:var(--color-red-700)}.text-yellow-700{color:var(--color-yellow-700)}.text-yellow-800{color:var(--color-yellow-800)}.text-yellow-900{color:var(--color-yellow-900)}.capitalize{text-transform:capitalize}.lowercase{text-transform:lowercase}.uppercase{text-transform:uppercase}.italic{font-style:italic}.underline{text-decoration-line:underline}.blur{--tw-blur:blur(8px);filter:var(--tw-blur,)var(--tw-brightness,)var(--tw-contrast,)var(--tw-grayscale,)var(--tw-hue-rotate,)var(--tw-invert,)var(--tw-saturate,)var(--tw-sepia,)var(--tw-drop-shadow,)}.filter{filter:var(--tw-blur,)var(--tw-brightness,)var(--tw-contrast,)var(--tw-grayscale,)var(--tw-hue-rotate,)var(--tw-invert,)var(--tw-saturate,)var(--tw-sepia,)var(--tw-drop-shadow,)}.transition{transition-property:color,background-color,border-color,outline-color,text-decoration-color,fill,stroke,--tw-gradient-from,--tw-gradient-via,--tw-gradient-to,opacity,box-shadow,transform,translate,scale,rotate,filter,-webkit-backdrop-filter,backdrop-filter,display,visibility,content-visibility,overlay,pointer-events;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.transition-colors{transition-property:color,background-color,border-color,outline-color,text-decoration-color,fill,stroke,--tw-gradient-from,--tw-gradient-via,--tw-gradient-to;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.transition-transform{transition-property:transform,translate,scale,rotate;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.duration-200{--tw-duration:.2s;transition-duration:.2s}.select-none{-webkit-user-select:none;user-select:none}@media (hover:hover){.hover\:bg-gray-50:hover{background-color:var(--color-gray-50)}.hover\:bg-gray-200:hover{background-color:var(--color-gray-200)}.hover\:bg-gray-400:hover{background-color:var(--color-gray-400)}.hover\:no-underline:hover{text-decoration-line:none}}.focus\:outline-none:focus{--tw-outline-style:none;outline-style:none}@media (min-width:64rem){.lg\:max-w-md{max-width:var(--container-md)}}@media (min-width:80rem){.xl\:max-w-lg{max-width:var(--container-lg)}}}@property --tw-rotate-x{syntax:"*";inherits:false}@property --tw-rotate-y{syntax:"*";inherits:false}@property --tw-rotate-z{syntax:"*";inherits:false}@property --tw-skew-x{syntax:"*";inherits:false}@property --tw-skew-y{syntax:"*";inherits:false}@property --tw-space-y-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-space-x-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-divide-y-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-border-style{syntax:"*";inherits:false;initial-value:solid}@property --tw-font-weight{syntax:"*";inherits:false}@property --tw-blur{syntax:"*";inherits:false}@property --tw-brightness{syntax:"*";inherits:false}@property --tw-contrast{syntax:"*";inherits:false}@property --tw-grayscale{syntax:"*";inherits:false}@property --tw-hue-rotate{syntax:"*";inherits:false}@property --tw-invert{syntax:"*";inherits:false}@property --tw-opacity{syntax:"*";inherits:false}@property --tw-saturate{syntax:"*";inherits:false}@property --tw-sepia{syntax:"*";inherits:false}@property --tw-drop-shadow{syntax:"*";inherits:false}@property --tw-drop-shadow-color{syntax:"*";inherits:false}@property --tw-drop-shadow-alpha{syntax:"<percentage>";inherits:false;initial-value:100%}@property --tw-drop-shadow-size{syntax:"*";inherits:false}@property --tw-duration{syntax:"*";inherits:false}@keyframes spin{to{transform:rotate(360deg)}}
|