eval-protocol 0.2.98.dev1__tar.gz → 0.3.9.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.2.98.dev1/eval_protocol.egg-info → eval_protocol-0.3.9.dev1}/PKG-INFO +6 -3
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/_version.py +3 -3
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/__init__.py +16 -0
- eval_protocol-0.3.9.dev1/eval_protocol/adapters/dataframe.py +66 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/fireworks_tracing.py +2 -1
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/sql_resource.py +60 -5
- eval_protocol-0.3.9.dev1/eval_protocol/auth.py +106 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/test_glm_streaming_compliance.py +255 -130
- eval_protocol-0.3.9.dev1/eval_protocol/cli.py +367 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/cli_commands/create_rft.py +135 -180
- eval_protocol-0.3.9.dev1/eval_protocol/cli_commands/export_docs.py +300 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/cli_commands/local_test.py +2 -12
- eval_protocol-0.3.9.dev1/eval_protocol/cli_commands/logs.py +146 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/cli_commands/upload.py +138 -45
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/cli_commands/utils.py +286 -43
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +1 -1
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +23 -4
- eval_protocol-0.3.9.dev1/eval_protocol/evaluation.py +387 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/event_bus/__init__.py +6 -0
- eval_protocol-0.3.9.dev1/eval_protocol/event_bus/sqlite_event_bus_database.py +255 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/fireworks_rft.py +4 -32
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/integrations/tinker_rollout_processor.py +1 -1
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +7 -5
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/execution/manager.py +1 -1
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/models.py +105 -3
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/platform_api.py +66 -119
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/proxy_core/app.py +5 -1
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/proxy_core/redis_utils.py +9 -2
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/__init__.py +13 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/default_agent_rollout_processor.py +1 -1
- eval_protocol-0.3.9.dev1/eval_protocol/pytest/default_klavis_sandbox_rollout_processor.py +174 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +1 -1
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/default_single_turn_rollout_process.py +51 -3
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/evaluation_test.py +64 -26
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/evaluation_test_utils.py +31 -4
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/github_action_rollout_processor.py +7 -10
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/integrations/openenv_trl_vllm.py +7 -4
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/openenv_rollout_processor.py +3 -3
- eval_protocol-0.3.9.dev1/eval_protocol/pytest/priority_scheduler.py +515 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/remote_rollout_processor.py +44 -51
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/rollout_processor.py +4 -0
- eval_protocol-0.3.9.dev1/eval_protocol/training/__init__.py +45 -0
- eval_protocol-0.3.9.dev1/eval_protocol/training/gepa_trainer.py +522 -0
- eval_protocol-0.3.9.dev1/eval_protocol/training/gepa_utils.py +489 -0
- eval_protocol-0.3.9.dev1/eval_protocol/training/trainer.py +19 -0
- eval_protocol-0.3.9.dev1/eval_protocol/training/utils.py +19 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1/eval_protocol.egg-info}/PKG-INFO +6 -3
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol.egg-info/SOURCES.txt +11 -7
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol.egg-info/requires.txt +6 -2
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/pyproject.toml +9 -5
- eval_protocol-0.3.9.dev1/tests/test_auth.py +73 -0
- eval_protocol-0.3.9.dev1/tests/test_cli_args.py +43 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_cli_create_rft.py +291 -224
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_ep_upload_e2e.py +197 -153
- eval_protocol-0.3.9.dev1/tests/test_evaluation.py +118 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_examples_end_to_end.py +1 -1
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_minimal.py +2 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_models.py +32 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_priority_scheduler.py +18 -10
- eval_protocol-0.3.9.dev1/tests/test_rollout_logprobs.py +58 -0
- eval_protocol-0.3.9.dev1/tests/test_sqlite_hardening.py +474 -0
- eval_protocol-0.3.9.dev1/tests/test_training_utils.py +32 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vite-app/dist/assets/index-CuQbfdPD.js +1 -1
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vite-app/dist/assets/index-CuQbfdPD.js.map +1 -1
- eval_protocol-0.2.98.dev1/eval_protocol/auth.py +0 -331
- eval_protocol-0.2.98.dev1/eval_protocol/cli.py +0 -714
- eval_protocol-0.2.98.dev1/eval_protocol/cli_commands/deploy.py +0 -509
- eval_protocol-0.2.98.dev1/eval_protocol/cli_commands/deploy_mcp.py +0 -290
- eval_protocol-0.2.98.dev1/eval_protocol/cli_commands/logs.py +0 -57
- eval_protocol-0.2.98.dev1/eval_protocol/cli_commands/preview.py +0 -186
- eval_protocol-0.2.98.dev1/eval_protocol/evaluation.py +0 -1471
- eval_protocol-0.2.98.dev1/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -93
- eval_protocol-0.2.98.dev1/eval_protocol/pytest/priority_scheduler.py +0 -348
- eval_protocol-0.2.98.dev1/tests/test_auth.py +0 -396
- eval_protocol-0.2.98.dev1/tests/test_cli.py +0 -170
- eval_protocol-0.2.98.dev1/tests/test_cli_args.py +0 -156
- eval_protocol-0.2.98.dev1/tests/test_deploy_integration.py +0 -214
- eval_protocol-0.2.98.dev1/tests/test_evaluation.py +0 -431
- eval_protocol-0.2.98.dev1/tests/test_evaluation_integration.py +0 -365
- eval_protocol-0.2.98.dev1/tests/test_evaluation_preview_integration.py +0 -470
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/LICENSE +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/README.md +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/development/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/development/utils/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/base.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/bigquery.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/langchain.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/langsmith.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/openai_responses.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/weave.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/test_aime25.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/test_gpqa.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/common_utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/config.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/data_loader/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/data_loader/factory_data_loader.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/data_loader/inline_data_loader.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/data_loader/jsonl_data_loader.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/data_loader/models.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/directory_utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/exceptions.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/generation/clients.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/integrations/openai_rft.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/integrations/tinker_cookbook.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/init.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/rollout_context.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/log_utils/util.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/execution/vllm_policy.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/README.md +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/server.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/proxy_core/auth.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/proxy_core/litellm.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/proxy_core/main.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/proxy/proxy_core/models.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/buffer.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/exception_config.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/execution.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/handle_persist_flow.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/parameterize.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/plugin.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/rollout_result_post_processor.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/store_experiment_link.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/store_results_url.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/tracing_utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/pytest/validate_signature.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/llm_judge.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/svg_agent/evaluator/test_svgagent.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/svg_agent/evaluator/utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/quickstart/utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/server.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/types/errors.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/types/remote_rollout_processor.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/browser_utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/check_server_status.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/evaluation_row_utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/logs_models.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/show_results_url.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/subprocess_utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/setup.cfg +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/setup.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_cli_local_test.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_config.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_directory_utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_evaluation_postprocess.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_event_bus_helper.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_exception_config.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_exceptions.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_format.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_human_id.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_integration.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_length.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_litellm_policy_provider_fields.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_logs_server.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_logs_server_simple.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_math.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_message_field_filtering.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_openai_rft_integration.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_packaging.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_quickstart_utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_readiness.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_repetition.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_retry_mechanism.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_server.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_show_results_url.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_status_migration_changes.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_status_migration_integration.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_status_model.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_tau_bench_airline_smoke.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_upload_entrypoint.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/tests/test_vite_server.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/airline/policy.md +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/mock/policy.md +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/retail/policy.md +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/versioneer.py +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vite-app/dist/assets/index-iZp_HgyW.css +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
- {eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/vite-app/dist/index.html +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.9.dev1
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -29,6 +29,7 @@ Requires-Dist: pytest>=6.0.0
|
|
|
29
29
|
Requires-Dist: pytest-asyncio>=0.21.0
|
|
30
30
|
Requires-Dist: peewee>=3.18.2
|
|
31
31
|
Requires-Dist: backoff>=2.2.0
|
|
32
|
+
Requires-Dist: fireworks-ai==1.0.0a20
|
|
32
33
|
Requires-Dist: questionary>=2.0.0
|
|
33
34
|
Requires-Dist: toml>=0.10.0
|
|
34
35
|
Requires-Dist: loguru>=0.6.0
|
|
@@ -72,8 +73,6 @@ Requires-Dist: transformers>=4.0.0; extra == "trl"
|
|
|
72
73
|
Requires-Dist: accelerate>=0.28.0; extra == "trl"
|
|
73
74
|
Provides-Extra: openevals
|
|
74
75
|
Requires-Dist: openevals>=0.1.0; extra == "openevals"
|
|
75
|
-
Provides-Extra: fireworks
|
|
76
|
-
Requires-Dist: fireworks-ai>=0.19.19; extra == "fireworks"
|
|
77
76
|
Provides-Extra: box2d
|
|
78
77
|
Requires-Dist: swig; extra == "box2d"
|
|
79
78
|
Requires-Dist: gymnasium[box2d]>=0.29.0; extra == "box2d"
|
|
@@ -102,6 +101,10 @@ Provides-Extra: braintrust
|
|
|
102
101
|
Requires-Dist: braintrust[otel]; extra == "braintrust"
|
|
103
102
|
Provides-Extra: openenv
|
|
104
103
|
Requires-Dist: openenv-core; extra == "openenv"
|
|
104
|
+
Provides-Extra: dspy
|
|
105
|
+
Requires-Dist: dspy>=3.0.0; extra == "dspy"
|
|
106
|
+
Provides-Extra: klavis
|
|
107
|
+
Requires-Dist: klavis>=2.18.0; extra == "klavis"
|
|
105
108
|
Provides-Extra: langgraph
|
|
106
109
|
Requires-Dist: langgraph>=0.6.7; extra == "langgraph"
|
|
107
110
|
Requires-Dist: langchain-core>=0.3.75; extra == "langgraph"
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "
|
|
11
|
+
"date": "2026-01-08T13:29:17-0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.
|
|
14
|
+
"full-revisionid": "764ac4f132c35fe01c354b4150cbc19c7eedea12",
|
|
15
|
+
"version": "0.3.9.dev.1"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -99,3 +99,19 @@ try:
|
|
|
99
99
|
__all__.extend(["WeaveAdapter"])
|
|
100
100
|
except ImportError:
|
|
101
101
|
pass
|
|
102
|
+
|
|
103
|
+
# DataFrame adapter (pandas integration for Lilac, etc.)
|
|
104
|
+
try:
|
|
105
|
+
from .dataframe import (
|
|
106
|
+
evaluation_rows_to_dataframe,
|
|
107
|
+
dataframe_to_evaluation_rows,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
__all__.extend(
|
|
111
|
+
[
|
|
112
|
+
"evaluation_rows_to_dataframe",
|
|
113
|
+
"dataframe_to_evaluation_rows",
|
|
114
|
+
]
|
|
115
|
+
)
|
|
116
|
+
except ImportError:
|
|
117
|
+
pass
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pandas DataFrame adapter for Eval Protocol.
|
|
3
|
+
|
|
4
|
+
This module provides utilities for converting between EvaluationRow format
|
|
5
|
+
and pandas DataFrame format, enabling integration with data curation tools
|
|
6
|
+
such as Lilac, Great Expectations, or any pandas-based workflow.
|
|
7
|
+
|
|
8
|
+
Example usage:
|
|
9
|
+
>>> from eval_protocol.adapters.dataframe import (
|
|
10
|
+
... evaluation_rows_to_dataframe,
|
|
11
|
+
... dataframe_to_evaluation_rows,
|
|
12
|
+
... )
|
|
13
|
+
>>>
|
|
14
|
+
>>> # Convert EvaluationRows to DataFrame
|
|
15
|
+
>>> df = evaluation_rows_to_dataframe(rows)
|
|
16
|
+
>>>
|
|
17
|
+
>>> # Convert back to EvaluationRows
|
|
18
|
+
>>> rows = dataframe_to_evaluation_rows(df)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
|
|
25
|
+
import pandas as pd
|
|
26
|
+
|
|
27
|
+
from ..models import EvaluationRow
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def evaluation_rows_to_dataframe(rows: list[EvaluationRow]) -> pd.DataFrame:
|
|
33
|
+
"""Convert EvaluationRows to a pandas DataFrame.
|
|
34
|
+
|
|
35
|
+
Uses EvaluationRow.to_dict() for serialization.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
rows: List of EvaluationRow objects
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
DataFrame with 'data_json' containing serialized rows plus convenience fields
|
|
42
|
+
"""
|
|
43
|
+
records = [row.to_dict() for row in rows]
|
|
44
|
+
return pd.DataFrame(records)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def dataframe_to_evaluation_rows(df: pd.DataFrame) -> list[EvaluationRow]:
|
|
48
|
+
"""Convert a pandas DataFrame back to EvaluationRows.
|
|
49
|
+
|
|
50
|
+
Uses EvaluationRow.from_dict() for deserialization.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
df: DataFrame with 'data_json' column containing serialized EvaluationRows
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
List of EvaluationRow objects
|
|
57
|
+
"""
|
|
58
|
+
rows = []
|
|
59
|
+
for _, row_data in df.iterrows():
|
|
60
|
+
try:
|
|
61
|
+
row = EvaluationRow.from_dict(row_data.to_dict())
|
|
62
|
+
rows.append(row)
|
|
63
|
+
except Exception as e:
|
|
64
|
+
logger.warning(f"Failed to convert row: {e}")
|
|
65
|
+
continue
|
|
66
|
+
return rows
|
{eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/adapters/fireworks_tracing.py
RENAMED
|
@@ -268,7 +268,7 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
268
268
|
def search_logs(self, tags: List[str], limit: int = 100, hours_back: int = 24) -> List[Dict[str, Any]]:
|
|
269
269
|
"""Fetch logs from Fireworks tracing gateway /logs endpoint.
|
|
270
270
|
|
|
271
|
-
Returns entries with keys: timestamp, message, severity, tags.
|
|
271
|
+
Returns entries with keys: timestamp, message, severity, tags, status, extras.
|
|
272
272
|
"""
|
|
273
273
|
if not tags:
|
|
274
274
|
raise ValueError("At least one tag is required to fetch logs")
|
|
@@ -315,6 +315,7 @@ class FireworksTracingAdapter(BaseAdapter):
|
|
|
315
315
|
"severity": e.get("severity", "INFO"),
|
|
316
316
|
"tags": e.get("tags", []),
|
|
317
317
|
"status": e.get("status"),
|
|
318
|
+
"extras": e.get("extras"),
|
|
318
319
|
}
|
|
319
320
|
)
|
|
320
321
|
return results
|
{eval_protocol-0.2.98.dev1 → eval_protocol-0.3.9.dev1}/eval_protocol/agent/resources/sql_resource.py
RENAMED
|
@@ -12,6 +12,50 @@ from typing import Any, Dict, List, Optional
|
|
|
12
12
|
from ..resource_abc import ForkableResource
|
|
13
13
|
|
|
14
14
|
|
|
15
|
+
# SQLite connection settings for hardened concurrency safety
|
|
16
|
+
SQLITE_CONNECTION_TIMEOUT = 30 # 30 seconds
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _apply_hardened_pragmas(conn: sqlite3.Connection) -> None:
|
|
20
|
+
"""Apply hardened SQLite pragmas for concurrency safety."""
|
|
21
|
+
conn.execute("PRAGMA journal_mode=WAL") # Write-Ahead Logging
|
|
22
|
+
conn.execute("PRAGMA synchronous=NORMAL") # Balance safety and performance
|
|
23
|
+
conn.execute("PRAGMA busy_timeout=30000") # 30 second timeout
|
|
24
|
+
conn.execute("PRAGMA wal_autocheckpoint=1000") # Checkpoint every 1000 pages
|
|
25
|
+
conn.execute("PRAGMA cache_size=-64000") # 64MB cache
|
|
26
|
+
conn.execute("PRAGMA foreign_keys=ON") # Enable foreign key constraints
|
|
27
|
+
conn.execute("PRAGMA temp_store=MEMORY") # Store temp tables in memory
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _checkpoint_and_copy_database(
|
|
31
|
+
source_path: Path, dest_path: Path, timeout: int = SQLITE_CONNECTION_TIMEOUT
|
|
32
|
+
) -> None:
|
|
33
|
+
"""
|
|
34
|
+
Safely copy a SQLite database by checkpointing WAL first.
|
|
35
|
+
|
|
36
|
+
In WAL mode, data may exist in the -wal file that hasn't been written
|
|
37
|
+
to the main database file. This function performs a TRUNCATE checkpoint
|
|
38
|
+
to flush all WAL data to the main file before copying, ensuring a
|
|
39
|
+
complete and consistent copy.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
source_path: Path to the source database file.
|
|
43
|
+
dest_path: Path where the copy should be created.
|
|
44
|
+
timeout: Connection timeout in seconds.
|
|
45
|
+
"""
|
|
46
|
+
# First, checkpoint the WAL to ensure all data is in the main file
|
|
47
|
+
conn = sqlite3.connect(str(source_path), timeout=timeout)
|
|
48
|
+
try:
|
|
49
|
+
# TRUNCATE mode: checkpoint and truncate the WAL file to zero bytes
|
|
50
|
+
# This ensures all data is flushed to the main database file
|
|
51
|
+
conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
|
|
52
|
+
finally:
|
|
53
|
+
conn.close()
|
|
54
|
+
|
|
55
|
+
# Now safely copy just the main database file
|
|
56
|
+
shutil.copyfile(str(source_path), str(dest_path))
|
|
57
|
+
|
|
58
|
+
|
|
15
59
|
class SQLResource(ForkableResource):
|
|
16
60
|
"""
|
|
17
61
|
A ForkableResource for managing SQL database states, primarily SQLite.
|
|
@@ -20,6 +64,8 @@ class SQLResource(ForkableResource):
|
|
|
20
64
|
and seed data, forked (by copying the DB file), checkpointed (by copying),
|
|
21
65
|
and restored.
|
|
22
66
|
|
|
67
|
+
Uses hardened SQLite settings for concurrency safety.
|
|
68
|
+
|
|
23
69
|
Attributes:
|
|
24
70
|
_config (Dict[str, Any]): Configuration for the resource.
|
|
25
71
|
_db_path (Optional[Path]): Path to the current SQLite database file.
|
|
@@ -38,8 +84,14 @@ class SQLResource(ForkableResource):
|
|
|
38
84
|
def _get_db_connection(self) -> sqlite3.Connection:
|
|
39
85
|
if not self._db_path:
|
|
40
86
|
raise ConnectionError("Database path not set. Call setup() or fork() first.")
|
|
41
|
-
# Set timeout to prevent indefinite hangs
|
|
42
|
-
|
|
87
|
+
# Set timeout to prevent indefinite hangs with hardened settings
|
|
88
|
+
conn = sqlite3.connect(
|
|
89
|
+
str(self._db_path),
|
|
90
|
+
timeout=SQLITE_CONNECTION_TIMEOUT,
|
|
91
|
+
isolation_level="DEFERRED", # Better for concurrent access
|
|
92
|
+
)
|
|
93
|
+
_apply_hardened_pragmas(conn)
|
|
94
|
+
return conn
|
|
43
95
|
|
|
44
96
|
async def setup(self, config: Dict[str, Any]) -> None:
|
|
45
97
|
"""
|
|
@@ -111,7 +163,8 @@ class SQLResource(ForkableResource):
|
|
|
111
163
|
forked_db_name = f"fork_{uuid.uuid4().hex}.sqlite"
|
|
112
164
|
forked_resource._db_path = self._temp_dir / forked_db_name
|
|
113
165
|
|
|
114
|
-
|
|
166
|
+
# Use checkpoint-and-copy to ensure WAL data is flushed before copying
|
|
167
|
+
_checkpoint_and_copy_database(self._db_path, forked_resource._db_path)
|
|
115
168
|
return forked_resource
|
|
116
169
|
|
|
117
170
|
async def checkpoint(self) -> Dict[str, Any]:
|
|
@@ -125,7 +178,8 @@ class SQLResource(ForkableResource):
|
|
|
125
178
|
|
|
126
179
|
checkpoint_name = f"checkpoint_{self._db_path.stem}_{uuid.uuid4().hex}.sqlite"
|
|
127
180
|
checkpoint_path = self._temp_dir / checkpoint_name
|
|
128
|
-
|
|
181
|
+
# Use checkpoint-and-copy to ensure WAL data is flushed before copying
|
|
182
|
+
_checkpoint_and_copy_database(self._db_path, checkpoint_path)
|
|
129
183
|
return {"db_type": "sqlite", "checkpoint_path": str(checkpoint_path)}
|
|
130
184
|
|
|
131
185
|
async def restore(self, state_data: Dict[str, Any]) -> None:
|
|
@@ -147,7 +201,8 @@ class SQLResource(ForkableResource):
|
|
|
147
201
|
if not self._db_path:
|
|
148
202
|
self._db_path = self._temp_dir / f"restored_{uuid.uuid4().hex}.sqlite"
|
|
149
203
|
|
|
150
|
-
|
|
204
|
+
# Use checkpoint-and-copy to ensure WAL data is flushed before copying
|
|
205
|
+
_checkpoint_and_copy_database(checkpoint_path, self._db_path)
|
|
151
206
|
self._base_db_path = self._db_path # The restored state becomes the new base for future forks
|
|
152
207
|
|
|
153
208
|
async def step(self, action_name: str, action_params: Dict[str, Any]) -> Any:
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_fireworks_api_key() -> Optional[str]:
|
|
11
|
+
"""
|
|
12
|
+
Retrieves the Fireworks API key.
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
The API key if found, otherwise None.
|
|
16
|
+
"""
|
|
17
|
+
api_key = os.environ.get("FIREWORKS_API_KEY")
|
|
18
|
+
if api_key and api_key.strip():
|
|
19
|
+
logger.debug("Using FIREWORKS_API_KEY from environment variable.")
|
|
20
|
+
return api_key.strip()
|
|
21
|
+
logger.debug("Fireworks API key not found in environment variables.")
|
|
22
|
+
return None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_fireworks_account_id() -> Optional[str]:
|
|
26
|
+
"""
|
|
27
|
+
Retrieves the Fireworks Account ID.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
The Account ID if found, otherwise None.
|
|
31
|
+
"""
|
|
32
|
+
# Account id is derived from the API key (single source of truth).
|
|
33
|
+
try:
|
|
34
|
+
api_key_for_verify = get_fireworks_api_key()
|
|
35
|
+
if api_key_for_verify:
|
|
36
|
+
resolved = verify_api_key_and_get_account_id(api_key=api_key_for_verify, api_base=get_fireworks_api_base())
|
|
37
|
+
if resolved:
|
|
38
|
+
logger.debug("Resolved account id via verifyApiKey: %s", resolved)
|
|
39
|
+
return resolved
|
|
40
|
+
except Exception as e:
|
|
41
|
+
logger.debug("Failed to resolve account id via verifyApiKey: %s", e)
|
|
42
|
+
|
|
43
|
+
logger.debug("Fireworks Account ID not found via verifyApiKey.")
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_fireworks_api_base() -> str:
|
|
48
|
+
"""
|
|
49
|
+
Retrieves the Fireworks API base URL.
|
|
50
|
+
|
|
51
|
+
The base URL is sourced from the FIREWORKS_API_BASE environment variable.
|
|
52
|
+
If not set, it defaults to "https://api.fireworks.ai".
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
The API base URL.
|
|
56
|
+
"""
|
|
57
|
+
api_base = os.environ.get("FIREWORKS_API_BASE", "https://api.fireworks.ai")
|
|
58
|
+
if os.environ.get("FIREWORKS_API_BASE"):
|
|
59
|
+
logger.debug("Using FIREWORKS_API_BASE from environment variable.")
|
|
60
|
+
else:
|
|
61
|
+
logger.debug("FIREWORKS_API_BASE not set in environment, defaulting to %s.", api_base)
|
|
62
|
+
return api_base
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def verify_api_key_and_get_account_id(
|
|
66
|
+
api_key: Optional[str] = None,
|
|
67
|
+
api_base: Optional[str] = None,
|
|
68
|
+
) -> Optional[str]:
|
|
69
|
+
"""
|
|
70
|
+
Calls the Fireworks API verify endpoint to validate the API key and returns the
|
|
71
|
+
account id from response headers when available.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
api_key: Optional explicit API key. When None, resolves via get_fireworks_api_key().
|
|
75
|
+
api_base: Optional explicit API base. When None, resolves via get_fireworks_api_base().
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
The resolved account id if verification succeeds and the header is present; otherwise None.
|
|
79
|
+
"""
|
|
80
|
+
try:
|
|
81
|
+
resolved_key = api_key or get_fireworks_api_key()
|
|
82
|
+
if not resolved_key:
|
|
83
|
+
return None
|
|
84
|
+
resolved_base = api_base or get_fireworks_api_base()
|
|
85
|
+
|
|
86
|
+
from .common_utils import get_user_agent
|
|
87
|
+
|
|
88
|
+
url = f"{resolved_base.rstrip('/')}/verifyApiKey"
|
|
89
|
+
headers = {
|
|
90
|
+
"Authorization": f"Bearer {resolved_key}",
|
|
91
|
+
"User-Agent": get_user_agent(),
|
|
92
|
+
}
|
|
93
|
+
resp = requests.get(url, headers=headers, timeout=10)
|
|
94
|
+
|
|
95
|
+
if resp.status_code != 200:
|
|
96
|
+
logger.debug("verifyApiKey returned status %s", resp.status_code)
|
|
97
|
+
return None
|
|
98
|
+
# Header keys could vary in case; requests provides case-insensitive dict
|
|
99
|
+
account_id = resp.headers.get("x-fireworks-account-id") or resp.headers.get("X-Fireworks-Account-Id")
|
|
100
|
+
if account_id and account_id.strip():
|
|
101
|
+
logger.debug("Resolved account id via verifyApiKey: %s", account_id)
|
|
102
|
+
return account_id.strip()
|
|
103
|
+
return None
|
|
104
|
+
except Exception as e:
|
|
105
|
+
logger.debug("Failed to verify API key for account id resolution: %s", e)
|
|
106
|
+
return None
|