eval-protocol 0.2.6.dev2__tar.gz → 0.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.2.6.dev2/eval_protocol.egg-info → eval_protocol-0.2.7}/PKG-INFO +1 -1
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/_version.py +3 -3
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +3 -4
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +14 -11
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/pytest/default_agent_rollout_processor.py +5 -4
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/pytest/default_single_turn_rollout_process.py +4 -5
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/pytest/evaluation_test.py +29 -3
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/pytest/types.py +8 -2
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7/eval_protocol.egg-info}/PKG-INFO +1 -1
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol.egg-info/SOURCES.txt +3 -3
- eval_protocol-0.2.7/vite-app/dist/assets/index-DWfIf2rx.css +1 -0
- eval_protocol-0.2.7/vite-app/dist/assets/index-D_nkLTVA.js +88 -0
- eval_protocol-0.2.7/vite-app/dist/assets/index-D_nkLTVA.js.map +1 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vite-app/dist/index.html +2 -2
- eval_protocol-0.2.6.dev2/vite-app/dist/assets/index-D9iVTBbF.css +0 -1
- eval_protocol-0.2.6.dev2/vite-app/dist/assets/index-DiF_B1x_.js +0 -88
- eval_protocol-0.2.6.dev2/vite-app/dist/assets/index-DiF_B1x_.js.map +0 -1
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/LICENSE +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/README.md +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/development/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/development/utils/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/resources/http_rollout_protocol.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/resources/http_rollout_resource.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/auth.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/cli.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/cli_commands/deploy.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/cli_commands/preview.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/common_utils.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/config.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/directory_utils.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/evaluation.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/event_bus/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/generation/clients.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/integrations/braintrust.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp/execution/manager.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp_agent/intermediary_server.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp_agent/orchestration/remote_http_client.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp_agent/session.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/models.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/platform_api.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/pytest/default_no_op_rollout_process.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/pytest/plugin.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/pytest/utils.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/server.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol.egg-info/requires.txt +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/pyproject.toml +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/setup.cfg +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/setup.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_auth.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_braintrust_adapter.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_braintrust_example.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_cli.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_config.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_deploy_integration.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_evaluation_integration.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_evaluation_preview_integration.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_format.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_frozen_lake_http_server.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_frozen_lake_seed_evaluation.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_integration.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_length.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_math.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_minimal.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_models.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_packaging.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_readiness.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_repetition.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_server.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/versioneer.py +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.2.6.dev2 → eval_protocol-0.2.7}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.7
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-08-
|
|
11
|
+
"date": "2025-08-11T00:47:52-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.2.
|
|
14
|
+
"full-revisionid": "38a44449f6d48a8a79eb11a0aaf873129df3e994",
|
|
15
|
+
"version": "0.2.7"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -22,9 +22,8 @@ class SqliteDatasetLoggerAdapter(DatasetLogger):
|
|
|
22
22
|
self._store = SqliteEvaluationRowStore(self.db_path)
|
|
23
23
|
|
|
24
24
|
def log(self, row: "EvaluationRow") -> None:
|
|
25
|
-
row_id = row.input_metadata.row_id
|
|
26
25
|
data = row.model_dump(exclude_none=True, mode="json")
|
|
27
|
-
self._store.upsert_row(
|
|
26
|
+
self._store.upsert_row(data=data)
|
|
28
27
|
try:
|
|
29
28
|
event_bus.emit(LOG_EVENT_TYPE, EvaluationRow(**data))
|
|
30
29
|
except Exception as e:
|
|
@@ -32,8 +31,8 @@ class SqliteDatasetLoggerAdapter(DatasetLogger):
|
|
|
32
31
|
logger.error(f"Failed to emit row_upserted event: {e}")
|
|
33
32
|
pass
|
|
34
33
|
|
|
35
|
-
def read(self,
|
|
34
|
+
def read(self, rollout_id: Optional[str] = None) -> List["EvaluationRow"]:
|
|
36
35
|
from eval_protocol.models import EvaluationRow
|
|
37
36
|
|
|
38
|
-
results = self._store.read_rows(
|
|
37
|
+
results = self._store.read_rows(rollout_id=rollout_id)
|
|
39
38
|
return [EvaluationRow(**data) for data in results]
|
|
@@ -11,7 +11,7 @@ class SqliteEvaluationRowStore:
|
|
|
11
11
|
"""
|
|
12
12
|
Lightweight reusable SQLite store for evaluation rows.
|
|
13
13
|
|
|
14
|
-
Stores arbitrary row data as JSON keyed by a unique string `
|
|
14
|
+
Stores arbitrary row data as JSON keyed by a unique string `rollout_id`.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
17
|
def __init__(self, db_path: str):
|
|
@@ -24,7 +24,7 @@ class SqliteEvaluationRowStore:
|
|
|
24
24
|
database = self._db
|
|
25
25
|
|
|
26
26
|
class EvaluationRow(BaseModel): # type: ignore
|
|
27
|
-
|
|
27
|
+
rollout_id = CharField(unique=True)
|
|
28
28
|
data = JSONField()
|
|
29
29
|
|
|
30
30
|
self._EvaluationRow = EvaluationRow
|
|
@@ -36,22 +36,25 @@ class SqliteEvaluationRowStore:
|
|
|
36
36
|
def db_path(self) -> str:
|
|
37
37
|
return self._db_path
|
|
38
38
|
|
|
39
|
-
def upsert_row(self,
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
def upsert_row(self, data: dict) -> None:
|
|
40
|
+
rollout_id = data["rollout_id"]
|
|
41
|
+
if "rollout_id" not in data:
|
|
42
|
+
raise ValueError("rollout_id is required to upsert a row")
|
|
43
|
+
if self._EvaluationRow.select().where(self._EvaluationRow.rollout_id == rollout_id).exists():
|
|
44
|
+
self._EvaluationRow.update(data=data).where(self._EvaluationRow.rollout_id == rollout_id).execute()
|
|
42
45
|
else:
|
|
43
|
-
self._EvaluationRow.create(
|
|
46
|
+
self._EvaluationRow.create(rollout_id=rollout_id, data=data)
|
|
44
47
|
|
|
45
|
-
def read_rows(self,
|
|
46
|
-
if
|
|
48
|
+
def read_rows(self, rollout_id: Optional[str] = None) -> List[dict]:
|
|
49
|
+
if rollout_id is None:
|
|
47
50
|
query = self._EvaluationRow.select().dicts()
|
|
48
51
|
else:
|
|
49
|
-
query = self._EvaluationRow.select().dicts().where(self._EvaluationRow.
|
|
52
|
+
query = self._EvaluationRow.select().dicts().where(self._EvaluationRow.rollout_id == rollout_id)
|
|
50
53
|
results = list(query)
|
|
51
54
|
return [result["data"] for result in results]
|
|
52
55
|
|
|
53
|
-
def delete_row(self,
|
|
54
|
-
return self._EvaluationRow.delete().where(self._EvaluationRow.
|
|
56
|
+
def delete_row(self, rollout_id: str) -> int:
|
|
57
|
+
return self._EvaluationRow.delete().where(self._EvaluationRow.rollout_id == rollout_id).execute()
|
|
55
58
|
|
|
56
59
|
def delete_all_rows(self) -> int:
|
|
57
60
|
return self._EvaluationRow.delete().execute()
|
|
@@ -8,7 +8,7 @@ from openai import NOT_GIVEN, NotGiven
|
|
|
8
8
|
from openai.types.chat import ChatCompletionContentPartTextParam, ChatCompletionMessage, ChatCompletionToolParam
|
|
9
9
|
from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
|
|
10
10
|
|
|
11
|
-
from eval_protocol.dataset_logger import
|
|
11
|
+
from eval_protocol.dataset_logger.dataset_logger import DatasetLogger
|
|
12
12
|
from eval_protocol.mcp.execution.policy import LiteLLMPolicy
|
|
13
13
|
from eval_protocol.mcp.mcp_multi_client import MCPMultiClient
|
|
14
14
|
from eval_protocol.models import EvaluationRow, Message
|
|
@@ -20,12 +20,13 @@ class Agent:
|
|
|
20
20
|
A really simple agent that calls the model until no more tool calls are needed.
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
|
-
def __init__(self, model: str, row: EvaluationRow, config_path: str):
|
|
23
|
+
def __init__(self, model: str, row: EvaluationRow, config_path: str, logger: DatasetLogger):
|
|
24
24
|
self.model = model
|
|
25
25
|
self.evaluation_row: EvaluationRow = row
|
|
26
26
|
self._policy = LiteLLMPolicy(model_id=model)
|
|
27
27
|
self.mcp_client = MCPMultiClient(config_path=config_path) if config_path else None
|
|
28
28
|
self.tools: Union[List[ChatCompletionToolParam], NotGiven] = NOT_GIVEN
|
|
29
|
+
self.logger: DatasetLogger = logger
|
|
29
30
|
|
|
30
31
|
async def setup(self):
|
|
31
32
|
if self.mcp_client:
|
|
@@ -42,7 +43,7 @@ class Agent:
|
|
|
42
43
|
|
|
43
44
|
def append_message_and_log(self, message: Message):
|
|
44
45
|
self.messages.append(message)
|
|
45
|
-
|
|
46
|
+
self.logger.log(self.evaluation_row)
|
|
46
47
|
|
|
47
48
|
async def call_agent(self) -> str:
|
|
48
49
|
"""
|
|
@@ -116,7 +117,7 @@ async def default_agent_rollout_processor(
|
|
|
116
117
|
) -> List[EvaluationRow]:
|
|
117
118
|
dataset: Dataset = []
|
|
118
119
|
for row in rows:
|
|
119
|
-
agent = Agent(model=config.model, row=row, config_path=config.mcp_config_path)
|
|
120
|
+
agent = Agent(model=config.model, row=row, config_path=config.mcp_config_path, logger=config.logger)
|
|
120
121
|
await agent.setup()
|
|
121
122
|
await agent.call_agent()
|
|
122
123
|
dataset.append(agent.evaluation_row)
|
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from typing import List
|
|
3
|
-
|
|
4
2
|
import logging
|
|
5
3
|
import os
|
|
4
|
+
from typing import List
|
|
6
5
|
|
|
7
|
-
from eval_protocol.
|
|
8
|
-
from eval_protocol.models import EvaluationRow, Message, ChatCompletionMessageToolCall
|
|
6
|
+
from eval_protocol.models import ChatCompletionMessageToolCall, EvaluationRow, Message
|
|
9
7
|
from eval_protocol.pytest.types import RolloutProcessorConfig
|
|
10
8
|
|
|
11
9
|
|
|
@@ -49,6 +47,7 @@ async def default_single_turn_rollout_processor(
|
|
|
49
47
|
|
|
50
48
|
# Dynamic import to avoid static dependency/lint errors if LiteLLM isn't installed yet
|
|
51
49
|
import importlib
|
|
50
|
+
|
|
52
51
|
_litellm = importlib.import_module("litellm")
|
|
53
52
|
acompletion = getattr(_litellm, "acompletion")
|
|
54
53
|
response = await acompletion(**request_params)
|
|
@@ -79,7 +78,7 @@ async def default_single_turn_rollout_processor(
|
|
|
79
78
|
]
|
|
80
79
|
|
|
81
80
|
row.messages = messages
|
|
82
|
-
|
|
81
|
+
config.logger.log(row)
|
|
83
82
|
return row
|
|
84
83
|
|
|
85
84
|
# Process rows with bounded concurrency if configured
|
|
@@ -8,6 +8,7 @@ from typing import Any, Callable, Dict, List, Literal, Optional
|
|
|
8
8
|
import pytest
|
|
9
9
|
|
|
10
10
|
from eval_protocol.dataset_logger import default_logger
|
|
11
|
+
from eval_protocol.dataset_logger.dataset_logger import DatasetLogger
|
|
11
12
|
from eval_protocol.human_id import generate_id
|
|
12
13
|
from eval_protocol.models import CompletionParams, EvalMetadata, EvaluationRow, InputMetadata, Message
|
|
13
14
|
from eval_protocol.pytest.default_dataset_adapter import default_dataset_adapter
|
|
@@ -55,6 +56,7 @@ def evaluation_test( # noqa: C901
|
|
|
55
56
|
steps: int = 30,
|
|
56
57
|
mode: EvaluationTestMode = "batch",
|
|
57
58
|
combine_datasets: bool = True,
|
|
59
|
+
logger: Optional[DatasetLogger] = None,
|
|
58
60
|
) -> Callable[
|
|
59
61
|
[TestFunction],
|
|
60
62
|
TestFunction,
|
|
@@ -117,8 +119,11 @@ def evaluation_test( # noqa: C901
|
|
|
117
119
|
mode: Evaluation mode. "batch" (default) expects test function to handle
|
|
118
120
|
full dataset. "pointwise" applies test function to each row. If your evaluation requires
|
|
119
121
|
the full rollout of all rows to compute the score, use
|
|
122
|
+
logger: DatasetLogger to use for logging. If not provided, a default logger will be used.
|
|
120
123
|
"""
|
|
121
124
|
|
|
125
|
+
active_logger: DatasetLogger = logger if logger else default_logger
|
|
126
|
+
|
|
122
127
|
def decorator(
|
|
123
128
|
test_func: TestFunction,
|
|
124
129
|
):
|
|
@@ -287,7 +292,7 @@ def evaluation_test( # noqa: C901
|
|
|
287
292
|
def _log_eval_error(
|
|
288
293
|
status: Literal["finished", "error"], rows: Optional[List[EvaluationRow]] | None, passed: bool
|
|
289
294
|
) -> None:
|
|
290
|
-
log_eval_status_and_rows(eval_metadata, rows, status, passed,
|
|
295
|
+
log_eval_status_and_rows(eval_metadata, rows, status, passed, active_logger)
|
|
291
296
|
|
|
292
297
|
try:
|
|
293
298
|
# Handle dataset loading
|
|
@@ -369,7 +374,6 @@ def evaluation_test( # noqa: C901
|
|
|
369
374
|
# has to be done in the pytest main process since it's
|
|
370
375
|
# used to determine whether this eval has stopped
|
|
371
376
|
row.pid = os.getpid()
|
|
372
|
-
default_logger.log(row)
|
|
373
377
|
|
|
374
378
|
# Prepare rollout processor config once; we will generate fresh outputs per run
|
|
375
379
|
config = RolloutProcessorConfig(
|
|
@@ -379,6 +383,7 @@ def evaluation_test( # noqa: C901
|
|
|
379
383
|
max_concurrent_rollouts=max_concurrent_rollouts,
|
|
380
384
|
server_script_path=server_script_path,
|
|
381
385
|
steps=steps,
|
|
386
|
+
logger=active_logger,
|
|
382
387
|
)
|
|
383
388
|
|
|
384
389
|
for _ in range(num_runs):
|
|
@@ -395,6 +400,10 @@ def evaluation_test( # noqa: C901
|
|
|
395
400
|
for row in fresh_dataset:
|
|
396
401
|
row.rollout_id = generate_id()
|
|
397
402
|
|
|
403
|
+
# log the fresh_dataset
|
|
404
|
+
for row in fresh_dataset:
|
|
405
|
+
active_logger.log(row)
|
|
406
|
+
|
|
398
407
|
processed_dataset = execute_function(rollout_processor, rows=fresh_dataset, config=config)
|
|
399
408
|
|
|
400
409
|
if mode == "pointwise":
|
|
@@ -463,7 +472,7 @@ def evaluation_test( # noqa: C901
|
|
|
463
472
|
if r.eval_metadata is not None:
|
|
464
473
|
r.eval_metadata.status = "finished"
|
|
465
474
|
r.eval_metadata.passed = passed
|
|
466
|
-
|
|
475
|
+
active_logger.log(r)
|
|
467
476
|
|
|
468
477
|
# Optional: print and/or persist a summary artifact for CI
|
|
469
478
|
try:
|
|
@@ -587,6 +596,23 @@ def evaluation_test( # noqa: C901
|
|
|
587
596
|
# Do not fail evaluation if summary writing fails
|
|
588
597
|
pass
|
|
589
598
|
|
|
599
|
+
# # Write all rows from active_logger.read() to a JSONL file in the same directory as the summary
|
|
600
|
+
# try:
|
|
601
|
+
# if active_logger is not None:
|
|
602
|
+
# rows = active_logger.read()
|
|
603
|
+
# # Write to a .jsonl file alongside the summary file
|
|
604
|
+
# jsonl_path = "logs.jsonl"
|
|
605
|
+
# import json
|
|
606
|
+
|
|
607
|
+
# with open(jsonl_path, "w", encoding="utf-8") as f_jsonl:
|
|
608
|
+
# for row in rows:
|
|
609
|
+
# json.dump(row.model_dump(exclude_none=True, mode="json"), f_jsonl)
|
|
610
|
+
# f_jsonl.write("\n")
|
|
611
|
+
# except Exception as e:
|
|
612
|
+
# # Do not fail evaluation if log writing fails
|
|
613
|
+
# print(e)
|
|
614
|
+
# pass
|
|
615
|
+
|
|
590
616
|
# Check threshold after logging
|
|
591
617
|
if threshold_of_success is not None and not passed:
|
|
592
618
|
assert (
|
|
@@ -5,6 +5,9 @@ Parameter types
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from typing import Any, Callable, Dict, List, Literal, Optional
|
|
7
7
|
|
|
8
|
+
from eval_protocol.dataset_logger import default_logger
|
|
9
|
+
from eval_protocol.dataset_logger.dataset_logger import DatasetLogger
|
|
10
|
+
|
|
8
11
|
from ..models import EvaluationRow, Message
|
|
9
12
|
|
|
10
13
|
ModelParam = str # gpt-4o, gpt-4o-mini, accounts/fireworks/models/llama-3.1-8b-instruct
|
|
@@ -39,10 +42,13 @@ Rollout processor types
|
|
|
39
42
|
class RolloutProcessorConfig:
|
|
40
43
|
model: ModelParam
|
|
41
44
|
input_params: RolloutInputParam # optional input parameters for inference
|
|
42
|
-
mcp_config_path: str
|
|
43
|
-
server_script_path: Optional[str] =
|
|
45
|
+
mcp_config_path: str
|
|
46
|
+
server_script_path: Optional[str] = (
|
|
47
|
+
None # TODO: change from server_script_path to mcp_config_path for agent rollout processor
|
|
48
|
+
)
|
|
44
49
|
max_concurrent_rollouts: int = 8 # maximum number of concurrent rollouts
|
|
45
50
|
steps: int = 30 # max number of rollout steps
|
|
51
|
+
logger: DatasetLogger = default_logger # logger to use during rollout for mid-rollout logs
|
|
46
52
|
|
|
47
53
|
|
|
48
54
|
RolloutProcessor = Callable[[List[EvaluationRow], RolloutProcessorConfig], List[EvaluationRow]]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.7
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -40,9 +40,9 @@ eval_protocol.egg-info/requires.txt
|
|
|
40
40
|
eval_protocol.egg-info/top_level.txt
|
|
41
41
|
eval_protocol/../vite-app/dist/index.html
|
|
42
42
|
eval_protocol/../vite-app/dist/assets/favicon-BkAAWQga.png
|
|
43
|
-
eval_protocol/../vite-app/dist/assets/index-
|
|
44
|
-
eval_protocol/../vite-app/dist/assets/index-
|
|
45
|
-
eval_protocol/../vite-app/dist/assets/index-
|
|
43
|
+
eval_protocol/../vite-app/dist/assets/index-DWfIf2rx.css
|
|
44
|
+
eval_protocol/../vite-app/dist/assets/index-D_nkLTVA.js
|
|
45
|
+
eval_protocol/../vite-app/dist/assets/index-D_nkLTVA.js.map
|
|
46
46
|
eval_protocol/../vite-app/dist/assets/logo-light-BprIBJQW.png
|
|
47
47
|
eval_protocol/adapters/__init__.py
|
|
48
48
|
eval_protocol/adapters/braintrust.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
/*! tailwindcss v4.1.11 | MIT License | https://tailwindcss.com */@layer properties{@supports (((-webkit-hyphens:none)) and (not (margin-trim:inline))) or ((-moz-orient:inline) and (not (color:rgb(from red r g b)))){*,:before,:after,::backdrop{--tw-rotate-x:initial;--tw-rotate-y:initial;--tw-rotate-z:initial;--tw-skew-x:initial;--tw-skew-y:initial;--tw-space-y-reverse:0;--tw-space-x-reverse:0;--tw-divide-y-reverse:0;--tw-border-style:solid;--tw-font-weight:initial;--tw-shadow:0 0 #0000;--tw-shadow-color:initial;--tw-shadow-alpha:100%;--tw-inset-shadow:0 0 #0000;--tw-inset-shadow-color:initial;--tw-inset-shadow-alpha:100%;--tw-ring-color:initial;--tw-ring-shadow:0 0 #0000;--tw-inset-ring-color:initial;--tw-inset-ring-shadow:0 0 #0000;--tw-ring-inset:initial;--tw-ring-offset-width:0px;--tw-ring-offset-color:#fff;--tw-ring-offset-shadow:0 0 #0000;--tw-outline-style:solid;--tw-blur:initial;--tw-brightness:initial;--tw-contrast:initial;--tw-grayscale:initial;--tw-hue-rotate:initial;--tw-invert:initial;--tw-opacity:initial;--tw-saturate:initial;--tw-sepia:initial;--tw-drop-shadow:initial;--tw-drop-shadow-color:initial;--tw-drop-shadow-alpha:100%;--tw-drop-shadow-size:initial;--tw-duration:initial}}}@layer theme{:root,:host{--font-sans:ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";--font-mono:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;--color-red-500:oklch(63.7% .237 25.331);--color-red-700:oklch(50.5% .213 27.518);--color-yellow-50:oklch(98.7% .026 102.212);--color-yellow-100:oklch(97.3% .071 103.193);--color-yellow-200:oklch(94.5% .129 101.54);--color-yellow-500:oklch(79.5% .184 86.047);--color-yellow-700:oklch(55.4% .135 66.442);--color-yellow-800:oklch(47.6% .114 61.907);--color-yellow-900:oklch(42.1% .095 57.708);--color-green-50:oklch(98.2% .018 155.826);--color-green-100:oklch(96.2% .044 156.743);--color-green-200:oklch(92.5% .084 155.995);--color-green-500:oklch(72.3% .219 149.579);--color-green-700:oklch(52.7% .154 150.069);--color-green-800:oklch(44.8% .119 151.328);--color-green-900:oklch(39.3% .095 152.535);--color-blue-50:oklch(97% .014 254.604);--color-blue-200:oklch(88.2% .059 254.128);--color-blue-500:oklch(62.3% .214 259.815);--color-blue-700:oklch(48.8% .243 264.376);--color-blue-900:oklch(37.9% .146 265.522);--color-gray-50:oklch(98.5% .002 247.839);--color-gray-100:oklch(96.7% .003 264.542);--color-gray-200:oklch(92.8% .006 264.531);--color-gray-300:oklch(87.2% .01 258.338);--color-gray-400:oklch(70.7% .022 261.325);--color-gray-500:oklch(55.1% .027 264.364);--color-gray-600:oklch(44.6% .03 256.802);--color-gray-700:oklch(37.3% .034 259.733);--color-gray-800:oklch(27.8% .033 256.848);--color-gray-900:oklch(21% .034 264.665);--color-white:#fff;--spacing:.25rem;--container-sm:24rem;--container-md:28rem;--container-lg:32rem;--container-7xl:80rem;--text-xs:.75rem;--text-xs--line-height:calc(1/.75);--text-sm:.875rem;--text-sm--line-height:calc(1.25/.875);--font-weight-medium:500;--font-weight-semibold:600;--animate-spin:spin 1s linear infinite;--default-transition-duration:.15s;--default-transition-timing-function:cubic-bezier(.4,0,.2,1);--default-font-family:var(--font-sans);--default-mono-font-family:var(--font-mono)}}@layer base{*,:after,:before,::backdrop{box-sizing:border-box;border:0 solid;margin:0;padding:0}::file-selector-button{box-sizing:border-box;border:0 solid;margin:0;padding:0}html,:host{-webkit-text-size-adjust:100%;tab-size:4;line-height:1.5;font-family:var(--default-font-family,ui-sans-serif,system-ui,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji");font-feature-settings:var(--default-font-feature-settings,normal);font-variation-settings:var(--default-font-variation-settings,normal);-webkit-tap-highlight-color:transparent}hr{height:0;color:inherit;border-top-width:1px}abbr:where([title]){-webkit-text-decoration:underline dotted;text-decoration:underline dotted}h1,h2,h3,h4,h5,h6{font-size:inherit;font-weight:inherit}a{color:inherit;-webkit-text-decoration:inherit;text-decoration:inherit}b,strong{font-weight:bolder}code,kbd,samp,pre{font-family:var(--default-mono-font-family,ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace);font-feature-settings:var(--default-mono-font-feature-settings,normal);font-variation-settings:var(--default-mono-font-variation-settings,normal);font-size:1em}small{font-size:80%}sub,sup{vertical-align:baseline;font-size:75%;line-height:0;position:relative}sub{bottom:-.25em}sup{top:-.5em}table{text-indent:0;border-color:inherit;border-collapse:collapse}:-moz-focusring{outline:auto}progress{vertical-align:baseline}summary{display:list-item}ol,ul,menu{list-style:none}img,svg,video,canvas,audio,iframe,embed,object{vertical-align:middle;display:block}img,video{max-width:100%;height:auto}button,input,select,optgroup,textarea{font:inherit;font-feature-settings:inherit;font-variation-settings:inherit;letter-spacing:inherit;color:inherit;opacity:1;background-color:#0000;border-radius:0}::file-selector-button{font:inherit;font-feature-settings:inherit;font-variation-settings:inherit;letter-spacing:inherit;color:inherit;opacity:1;background-color:#0000;border-radius:0}:where(select:is([multiple],[size])) optgroup{font-weight:bolder}:where(select:is([multiple],[size])) optgroup option{padding-inline-start:20px}::file-selector-button{margin-inline-end:4px}::placeholder{opacity:1}@supports (not ((-webkit-appearance:-apple-pay-button))) or (contain-intrinsic-size:1px){::placeholder{color:currentColor}@supports (color:color-mix(in lab,red,red)){::placeholder{color:color-mix(in oklab,currentcolor 50%,transparent)}}}textarea{resize:vertical}::-webkit-search-decoration{-webkit-appearance:none}::-webkit-date-and-time-value{min-height:1lh;text-align:inherit}::-webkit-datetime-edit{display:inline-flex}::-webkit-datetime-edit-fields-wrapper{padding:0}::-webkit-datetime-edit{padding-block:0}::-webkit-datetime-edit-year-field{padding-block:0}::-webkit-datetime-edit-month-field{padding-block:0}::-webkit-datetime-edit-day-field{padding-block:0}::-webkit-datetime-edit-hour-field{padding-block:0}::-webkit-datetime-edit-minute-field{padding-block:0}::-webkit-datetime-edit-second-field{padding-block:0}::-webkit-datetime-edit-millisecond-field{padding-block:0}::-webkit-datetime-edit-meridiem-field{padding-block:0}:-moz-ui-invalid{box-shadow:none}button,input:where([type=button],[type=reset],[type=submit]){appearance:button}::file-selector-button{appearance:button}::-webkit-inner-spin-button{height:auto}::-webkit-outer-spin-button{height:auto}[hidden]:where(:not([hidden=until-found])){display:none!important}}@layer components;@layer utilities{.collapse{visibility:collapse}.invisible{visibility:hidden}.visible{visibility:visible}.absolute{position:absolute}.fixed{position:fixed}.relative{position:relative}.static{position:static}.sticky{position:sticky}.top-0{top:calc(var(--spacing)*0)}.right-0{right:calc(var(--spacing)*0)}.left-0{left:calc(var(--spacing)*0)}.\!container{width:100%!important}@media (min-width:40rem){.\!container{max-width:40rem!important}}@media (min-width:48rem){.\!container{max-width:48rem!important}}@media (min-width:64rem){.\!container{max-width:64rem!important}}@media (min-width:80rem){.\!container{max-width:80rem!important}}@media (min-width:96rem){.\!container{max-width:96rem!important}}.container{width:100%}@media (min-width:40rem){.container{max-width:40rem}}@media (min-width:48rem){.container{max-width:48rem}}@media (min-width:64rem){.container{max-width:64rem}}@media (min-width:80rem){.container{max-width:80rem}}@media (min-width:96rem){.container{max-width:96rem}}.mx-auto{margin-inline:auto}.mt-1{margin-top:calc(var(--spacing)*1)}.mt-2{margin-top:calc(var(--spacing)*2)}.mb-0\.5{margin-bottom:calc(var(--spacing)*.5)}.mb-1{margin-bottom:calc(var(--spacing)*1)}.mb-2{margin-bottom:calc(var(--spacing)*2)}.mb-4{margin-bottom:calc(var(--spacing)*4)}.ml-2{margin-left:calc(var(--spacing)*2)}.block{display:block}.flex{display:flex}.hidden{display:none}.inline-flex{display:inline-flex}.table{display:table}.h-1{height:calc(var(--spacing)*1)}.h-1\.5{height:calc(var(--spacing)*1.5)}.h-3{height:calc(var(--spacing)*3)}.h-4{height:calc(var(--spacing)*4)}.h-6{height:calc(var(--spacing)*6)}.h-8{height:calc(var(--spacing)*8)}.h-10{height:calc(var(--spacing)*10)}.h-12{height:calc(var(--spacing)*12)}.min-h-screen{min-height:100vh}.w-1{width:calc(var(--spacing)*1)}.w-1\.5{width:calc(var(--spacing)*1.5)}.w-3{width:calc(var(--spacing)*3)}.w-4{width:calc(var(--spacing)*4)}.w-8{width:calc(var(--spacing)*8)}.w-12{width:calc(var(--spacing)*12)}.w-\[500px\]{width:500px}.w-auto{width:auto}.w-fit{width:fit-content}.w-full{width:100%}.max-w-7xl{max-width:var(--container-7xl)}.max-w-sm{max-width:var(--container-sm)}.min-w-0{min-width:calc(var(--spacing)*0)}.min-w-max{min-width:max-content}.flex-shrink-0{flex-shrink:0}.shrink{flex-shrink:1}.grow{flex-grow:1}.rotate-90{rotate:90deg}.rotate-180{rotate:180deg}.transform{transform:var(--tw-rotate-x,)var(--tw-rotate-y,)var(--tw-rotate-z,)var(--tw-skew-x,)var(--tw-skew-y,)}.animate-spin{animation:var(--animate-spin)}.cursor-col-resize{cursor:col-resize}.cursor-nw-resize{cursor:nw-resize}.cursor-pointer{cursor:pointer}.cursor-row-resize{cursor:row-resize}.resize{resize:both}.items-center{align-items:center}.justify-between{justify-content:space-between}.justify-center{justify-content:center}.justify-end{justify-content:flex-end}.justify-start{justify-content:flex-start}.gap-1{gap:calc(var(--spacing)*1)}.gap-1\.5{gap:calc(var(--spacing)*1.5)}.gap-2{gap:calc(var(--spacing)*2)}.gap-3{gap:calc(var(--spacing)*3)}.gap-4{gap:calc(var(--spacing)*4)}:where(.space-y-1>:not(:last-child)){--tw-space-y-reverse:0;margin-block-start:calc(calc(var(--spacing)*1)*var(--tw-space-y-reverse));margin-block-end:calc(calc(var(--spacing)*1)*calc(1 - var(--tw-space-y-reverse)))}:where(.space-y-3>:not(:last-child)){--tw-space-y-reverse:0;margin-block-start:calc(calc(var(--spacing)*3)*var(--tw-space-y-reverse));margin-block-end:calc(calc(var(--spacing)*3)*calc(1 - var(--tw-space-y-reverse)))}:where(.space-x-2>:not(:last-child)){--tw-space-x-reverse:0;margin-inline-start:calc(calc(var(--spacing)*2)*var(--tw-space-x-reverse));margin-inline-end:calc(calc(var(--spacing)*2)*calc(1 - var(--tw-space-x-reverse)))}:where(.divide-y>:not(:last-child)){--tw-divide-y-reverse:0;border-bottom-style:var(--tw-border-style);border-top-style:var(--tw-border-style);border-top-width:calc(1px*var(--tw-divide-y-reverse));border-bottom-width:calc(1px*calc(1 - var(--tw-divide-y-reverse)))}:where(.divide-gray-200>:not(:last-child)){border-color:var(--color-gray-200)}.truncate{text-overflow:ellipsis;white-space:nowrap;overflow:hidden}.overflow-hidden{overflow:hidden}.overflow-x-auto{overflow-x:auto}.overflow-y-auto{overflow-y:auto}.rounded{border-radius:.25rem}.rounded-full{border-radius:3.40282e38px}.border{border-style:var(--tw-border-style);border-width:1px}.border-t{border-top-style:var(--tw-border-style);border-top-width:1px}.border-b{border-bottom-style:var(--tw-border-style);border-bottom-width:1px}.border-b-2{border-bottom-style:var(--tw-border-style);border-bottom-width:2px}.border-blue-200{border-color:var(--color-blue-200)}.border-current{border-color:currentColor}.border-gray-200{border-color:var(--color-gray-200)}.border-gray-300{border-color:var(--color-gray-300)}.border-gray-900{border-color:var(--color-gray-900)}.border-green-200{border-color:var(--color-green-200)}.border-transparent{border-color:#0000}.border-yellow-200{border-color:var(--color-yellow-200)}.border-t-transparent{border-top-color:#0000}.bg-blue-50{background-color:var(--color-blue-50)}.bg-blue-500{background-color:var(--color-blue-500)}.bg-gray-50{background-color:var(--color-gray-50)}.bg-gray-100{background-color:var(--color-gray-100)}.bg-gray-300{background-color:var(--color-gray-300)}.bg-gray-500{background-color:var(--color-gray-500)}.bg-green-50{background-color:var(--color-green-50)}.bg-green-100{background-color:var(--color-green-100)}.bg-green-500{background-color:var(--color-green-500)}.bg-red-500{background-color:var(--color-red-500)}.bg-transparent{background-color:#0000}.bg-white{background-color:var(--color-white)}.bg-yellow-50{background-color:var(--color-yellow-50)}.bg-yellow-100{background-color:var(--color-yellow-100)}.bg-yellow-500{background-color:var(--color-yellow-500)}.p-0{padding:calc(var(--spacing)*0)}.p-1{padding:calc(var(--spacing)*1)}.p-2{padding:calc(var(--spacing)*2)}.p-3{padding:calc(var(--spacing)*3)}.p-4{padding:calc(var(--spacing)*4)}.p-8{padding:calc(var(--spacing)*8)}.px-2{padding-inline:calc(var(--spacing)*2)}.px-3{padding-inline:calc(var(--spacing)*3)}.py-0\.5{padding-block:calc(var(--spacing)*.5)}.py-1{padding-block:calc(var(--spacing)*1)}.py-2{padding-block:calc(var(--spacing)*2)}.py-3{padding-block:calc(var(--spacing)*3)}.py-4{padding-block:calc(var(--spacing)*4)}.pt-1{padding-top:calc(var(--spacing)*1)}.pt-2{padding-top:calc(var(--spacing)*2)}.pb-2{padding-bottom:calc(var(--spacing)*2)}.text-center{text-align:center}.text-left{text-align:left}.text-right{text-align:right}.font-mono{font-family:var(--font-mono)}.text-sm{font-size:var(--text-sm);line-height:var(--tw-leading,var(--text-sm--line-height))}.text-xs{font-size:var(--text-xs);line-height:var(--tw-leading,var(--text-xs--line-height))}.font-medium{--tw-font-weight:var(--font-weight-medium);font-weight:var(--font-weight-medium)}.font-semibold{--tw-font-weight:var(--font-weight-semibold);font-weight:var(--font-weight-semibold)}.break-words{overflow-wrap:break-word}.break-all{word-break:break-all}.whitespace-nowrap{white-space:nowrap}.whitespace-pre-wrap{white-space:pre-wrap}.text-blue-700{color:var(--color-blue-700)}.text-blue-900{color:var(--color-blue-900)}.text-gray-400{color:var(--color-gray-400)}.text-gray-500{color:var(--color-gray-500)}.text-gray-600{color:var(--color-gray-600)}.text-gray-700{color:var(--color-gray-700)}.text-gray-800{color:var(--color-gray-800)}.text-gray-900{color:var(--color-gray-900)}.text-green-700{color:var(--color-green-700)}.text-green-800{color:var(--color-green-800)}.text-green-900{color:var(--color-green-900)}.text-red-700{color:var(--color-red-700)}.text-yellow-700{color:var(--color-yellow-700)}.text-yellow-800{color:var(--color-yellow-800)}.text-yellow-900{color:var(--color-yellow-900)}.capitalize{text-transform:capitalize}.lowercase{text-transform:lowercase}.uppercase{text-transform:uppercase}.italic{font-style:italic}.underline{text-decoration-line:underline}.shadow{--tw-shadow:0 1px 3px 0 var(--tw-shadow-color,#0000001a),0 1px 2px -1px var(--tw-shadow-color,#0000001a);box-shadow:var(--tw-inset-shadow),var(--tw-inset-ring-shadow),var(--tw-ring-offset-shadow),var(--tw-ring-shadow),var(--tw-shadow)}.outline{outline-style:var(--tw-outline-style);outline-width:1px}.blur{--tw-blur:blur(8px);filter:var(--tw-blur,)var(--tw-brightness,)var(--tw-contrast,)var(--tw-grayscale,)var(--tw-hue-rotate,)var(--tw-invert,)var(--tw-saturate,)var(--tw-sepia,)var(--tw-drop-shadow,)}.filter{filter:var(--tw-blur,)var(--tw-brightness,)var(--tw-contrast,)var(--tw-grayscale,)var(--tw-hue-rotate,)var(--tw-invert,)var(--tw-saturate,)var(--tw-sepia,)var(--tw-drop-shadow,)}.transition{transition-property:color,background-color,border-color,outline-color,text-decoration-color,fill,stroke,--tw-gradient-from,--tw-gradient-via,--tw-gradient-to,opacity,box-shadow,transform,translate,scale,rotate,filter,-webkit-backdrop-filter,backdrop-filter,display,visibility,content-visibility,overlay,pointer-events;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.transition-colors{transition-property:color,background-color,border-color,outline-color,text-decoration-color,fill,stroke,--tw-gradient-from,--tw-gradient-via,--tw-gradient-to;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.transition-transform{transition-property:transform,translate,scale,rotate;transition-timing-function:var(--tw-ease,var(--default-transition-timing-function));transition-duration:var(--tw-duration,var(--default-transition-duration))}.duration-200{--tw-duration:.2s;transition-duration:.2s}.select-none{-webkit-user-select:none;user-select:none}@media (hover:hover){.hover\:border-gray-400:hover{border-color:var(--color-gray-400)}.hover\:bg-gray-50:hover{background-color:var(--color-gray-50)}.hover\:bg-gray-100:hover{background-color:var(--color-gray-100)}.hover\:bg-gray-200:hover{background-color:var(--color-gray-200)}.hover\:bg-gray-400:hover{background-color:var(--color-gray-400)}.hover\:text-gray-900:hover{color:var(--color-gray-900)}.hover\:no-underline:hover{text-decoration-line:none}}.focus\:border-gray-500:focus{border-color:var(--color-gray-500)}.focus\:outline-none:focus{--tw-outline-style:none;outline-style:none}@media (min-width:64rem){.lg\:max-w-md{max-width:var(--container-md)}}@media (min-width:80rem){.xl\:max-w-lg{max-width:var(--container-lg)}}}@property --tw-rotate-x{syntax:"*";inherits:false}@property --tw-rotate-y{syntax:"*";inherits:false}@property --tw-rotate-z{syntax:"*";inherits:false}@property --tw-skew-x{syntax:"*";inherits:false}@property --tw-skew-y{syntax:"*";inherits:false}@property --tw-space-y-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-space-x-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-divide-y-reverse{syntax:"*";inherits:false;initial-value:0}@property --tw-border-style{syntax:"*";inherits:false;initial-value:solid}@property --tw-font-weight{syntax:"*";inherits:false}@property --tw-shadow{syntax:"*";inherits:false;initial-value:0 0 #0000}@property --tw-shadow-color{syntax:"*";inherits:false}@property --tw-shadow-alpha{syntax:"<percentage>";inherits:false;initial-value:100%}@property --tw-inset-shadow{syntax:"*";inherits:false;initial-value:0 0 #0000}@property --tw-inset-shadow-color{syntax:"*";inherits:false}@property --tw-inset-shadow-alpha{syntax:"<percentage>";inherits:false;initial-value:100%}@property --tw-ring-color{syntax:"*";inherits:false}@property --tw-ring-shadow{syntax:"*";inherits:false;initial-value:0 0 #0000}@property --tw-inset-ring-color{syntax:"*";inherits:false}@property --tw-inset-ring-shadow{syntax:"*";inherits:false;initial-value:0 0 #0000}@property --tw-ring-inset{syntax:"*";inherits:false}@property --tw-ring-offset-width{syntax:"<length>";inherits:false;initial-value:0}@property --tw-ring-offset-color{syntax:"*";inherits:false;initial-value:#fff}@property --tw-ring-offset-shadow{syntax:"*";inherits:false;initial-value:0 0 #0000}@property --tw-outline-style{syntax:"*";inherits:false;initial-value:solid}@property --tw-blur{syntax:"*";inherits:false}@property --tw-brightness{syntax:"*";inherits:false}@property --tw-contrast{syntax:"*";inherits:false}@property --tw-grayscale{syntax:"*";inherits:false}@property --tw-hue-rotate{syntax:"*";inherits:false}@property --tw-invert{syntax:"*";inherits:false}@property --tw-opacity{syntax:"*";inherits:false}@property --tw-saturate{syntax:"*";inherits:false}@property --tw-sepia{syntax:"*";inherits:false}@property --tw-drop-shadow{syntax:"*";inherits:false}@property --tw-drop-shadow-color{syntax:"*";inherits:false}@property --tw-drop-shadow-alpha{syntax:"<percentage>";inherits:false;initial-value:100%}@property --tw-drop-shadow-size{syntax:"*";inherits:false}@property --tw-duration{syntax:"*";inherits:false}@keyframes spin{to{transform:rotate(360deg)}}
|