eval-protocol 0.2.6__tar.gz → 0.2.6.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.2.6/eval_protocol.egg-info → eval_protocol-0.2.6.dev2}/PKG-INFO +3 -3
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/__init__.py +4 -3
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/_version.py +3 -3
- eval_protocol-0.2.6.dev2/eval_protocol/common_utils.py +55 -0
- eval_protocol-0.2.6.dev2/eval_protocol/dataset_logger/__init__.py +3 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/dataset_logger/dataset_logger.py +2 -0
- eval_protocol-0.2.6.dev2/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +98 -0
- eval_protocol-0.2.6.dev2/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +39 -0
- eval_protocol-0.2.6.dev2/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +57 -0
- eval_protocol-0.2.6.dev2/eval_protocol/directory_utils.py +55 -0
- eval_protocol-0.2.6.dev2/eval_protocol/event_bus/__init__.py +5 -0
- eval_protocol-0.2.6.dev2/eval_protocol/event_bus/event_bus.py +50 -0
- eval_protocol-0.2.6.dev2/eval_protocol/event_bus/logger.py +3 -0
- eval_protocol-0.2.6.dev2/eval_protocol/event_bus/sqlite_event_bus.py +109 -0
- eval_protocol-0.2.6.dev2/eval_protocol/event_bus/sqlite_event_bus_database.py +95 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/generation/clients.py +4 -1
- eval_protocol-0.2.6.dev2/eval_protocol/get_pep440_version.py +133 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/human_id/__init__.py +3 -2
- eval_protocol-0.2.6.dev2/eval_protocol/logging_utils.py +175 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/client/connection.py +16 -49
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/execution/base_policy.py +1 -1
- eval_protocol-0.2.6.dev2/eval_protocol/mcp/execution/manager.py +562 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/mcpgym.py +67 -102
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/session/manager.py +4 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_env.py +35 -16
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/models.py +54 -4
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +14 -1
- eval_protocol-0.2.6.dev2/eval_protocol/pytest/default_single_turn_rollout_process.py +96 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/pytest/evaluation_test.py +316 -51
- eval_protocol-0.2.6.dev2/eval_protocol/pytest/plugin.py +144 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/pytest/utils.py +32 -2
- eval_protocol-0.2.6.dev2/eval_protocol/stats/__init__.py +5 -0
- eval_protocol-0.2.6.dev2/eval_protocol/stats/confidence_intervals.py +116 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/types/types.py +4 -0
- eval_protocol-0.2.6.dev2/eval_protocol/utils/logs_server.py +338 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2/eval_protocol.egg-info}/PKG-INFO +3 -3
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol.egg-info/SOURCES.txt +17 -3
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol.egg-info/entry_points.txt +4 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol.egg-info/requires.txt +2 -2
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/pyproject.toml +15 -2
- eval_protocol-0.2.6.dev2/tests/test_event_bus.py +265 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_parallel_rollouts.py +2 -2
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_rollout_control_plane_integration.py +1 -1
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_url_handling.py +8 -26
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/agent/llm_agent.py +22 -36
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/user/user_simulator.py +9 -5
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/utils/llm_utils.py +18 -3
- eval_protocol-0.2.6.dev2/vite-app/dist/assets/index-D9iVTBbF.css +1 -0
- eval_protocol-0.2.6/vite-app/dist/assets/index-CRkZ6JGL.js → eval_protocol-0.2.6.dev2/vite-app/dist/assets/index-DiF_B1x_.js +19 -19
- eval_protocol-0.2.6/vite-app/dist/assets/index-CRkZ6JGL.js.map → eval_protocol-0.2.6.dev2/vite-app/dist/assets/index-DiF_B1x_.js.map +1 -1
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vite-app/dist/index.html +3 -3
- eval_protocol-0.2.6/eval_protocol/common_utils.py +0 -36
- eval_protocol-0.2.6/eval_protocol/dataset_logger/__init__.py +0 -3
- eval_protocol-0.2.6/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -114
- eval_protocol-0.2.6/eval_protocol/mcp/execution/manager.py +0 -518
- eval_protocol-0.2.6/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -50
- eval_protocol-0.2.6/eval_protocol/utils/logs_server.py +0 -295
- eval_protocol-0.2.6/vite-app/dist/assets/index-BySN1scz.css +0 -1
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/LICENSE +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/README.md +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/development/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/development/utils/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/http_rollout_protocol.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/http_rollout_resource.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/auth.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/cli.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/cli_commands/deploy.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/cli_commands/preview.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/config.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/evaluation.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/integrations/braintrust.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/intermediary_server.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/orchestration/remote_http_client.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/mcp_agent/session.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/platform_api.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/pytest/default_no_op_rollout_process.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/server.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/setup.cfg +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/setup.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_auth.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_braintrust_adapter.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_braintrust_example.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_cli.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_config.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_deploy_integration.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_evaluation_integration.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_evaluation_preview_integration.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_format.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_frozen_lake_http_server.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_frozen_lake_seed_evaluation.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_integration.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_length.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_math.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_minimal.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_models.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_packaging.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_readiness.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_repetition.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_server.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/versioneer.py +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.2.6 → eval_protocol-0.2.6.dev2}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.6
|
|
3
|
+
Version: 0.2.6.dev2
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -38,13 +38,13 @@ Requires-Dist: litellm>=1.0.0
|
|
|
38
38
|
Requires-Dist: addict>=2.4.0
|
|
39
39
|
Requires-Dist: deepdiff>=6.0.0
|
|
40
40
|
Requires-Dist: pandas>=1.5.0
|
|
41
|
-
Requires-Dist: watchdog>=2.1.0
|
|
42
41
|
Requires-Dist: websockets>=15.0.1
|
|
43
42
|
Requires-Dist: fastapi>=0.116.1
|
|
43
|
+
Requires-Dist: pytest>=6.0.0
|
|
44
|
+
Requires-Dist: peewee>=3.18.2
|
|
44
45
|
Provides-Extra: dev
|
|
45
46
|
Requires-Dist: build; extra == "dev"
|
|
46
47
|
Requires-Dist: twine; extra == "dev"
|
|
47
|
-
Requires-Dist: pytest>=6.0.0; extra == "dev"
|
|
48
48
|
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
49
49
|
Requires-Dist: pytest-httpserver; extra == "dev"
|
|
50
50
|
Requires-Dist: werkzeug>=2.0.0; extra == "dev"
|
|
@@ -10,15 +10,16 @@ tool-augmented models using self-contained task bundles.
|
|
|
10
10
|
|
|
11
11
|
import warnings
|
|
12
12
|
|
|
13
|
-
from .adapters.braintrust import reward_fn_to_scorer, scorer_to_reward_fn
|
|
13
|
+
from eval_protocol.adapters.braintrust import reward_fn_to_scorer, scorer_to_reward_fn
|
|
14
|
+
|
|
14
15
|
from .auth import get_fireworks_account_id, get_fireworks_api_key
|
|
15
16
|
from .common_utils import load_jsonl
|
|
16
17
|
from .config import RewardKitConfig, get_config, load_config
|
|
17
18
|
from .mcp_env import (
|
|
18
19
|
AnthropicPolicy,
|
|
19
|
-
OpenAIPolicy,
|
|
20
|
-
LiteLLMPolicy,
|
|
21
20
|
FireworksPolicy,
|
|
21
|
+
LiteLLMPolicy,
|
|
22
|
+
OpenAIPolicy,
|
|
22
23
|
make,
|
|
23
24
|
rollout,
|
|
24
25
|
test_mcp,
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-08-
|
|
11
|
+
"date": "2025-08-10T19:39:17-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.2.6"
|
|
14
|
+
"full-revisionid": "a50c3f62fdb5be7347741446338d8c1771e92547",
|
|
15
|
+
"version": "0.2.6-dev2"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
from typing import Any, Dict, List
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def load_jsonl(file_path: str) -> List[Dict[str, Any]]:
|
|
9
|
+
"""
|
|
10
|
+
Reads a JSONL file where each line is a valid JSON object and returns a list of these objects.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
file_path: Path to the JSONL file.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
A list of dictionaries, where each dictionary is a parsed JSON object from a line.
|
|
17
|
+
Returns an empty list if the file is not found or if errors occur during parsing. Supports HTTP urls and local file paths.
|
|
18
|
+
"""
|
|
19
|
+
data: List[Dict[str, Any]] = []
|
|
20
|
+
if file_path.startswith("http://") or file_path.startswith("https://"):
|
|
21
|
+
resp = requests.get(file_path, stream=True, timeout=30)
|
|
22
|
+
resp.raise_for_status()
|
|
23
|
+
for line_number, raw in enumerate(resp.iter_lines(decode_unicode=True), start=1):
|
|
24
|
+
if raw is None:
|
|
25
|
+
continue
|
|
26
|
+
stripped = raw.strip()
|
|
27
|
+
if not stripped:
|
|
28
|
+
continue
|
|
29
|
+
try:
|
|
30
|
+
data.append(json.loads(stripped))
|
|
31
|
+
except json.JSONDecodeError as e:
|
|
32
|
+
print(f"Error parsing JSON line for URL {file_path} at line {line_number}")
|
|
33
|
+
row_id_index = stripped.find("row_id")
|
|
34
|
+
if row_id_index != -1:
|
|
35
|
+
row_id = re.search(r'"row_id": (.*),', stripped[row_id_index:])
|
|
36
|
+
raise ValueError(f"{e.msg} at line {line_number}: {stripped} ({row_id})") from e
|
|
37
|
+
raise e
|
|
38
|
+
else:
|
|
39
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
40
|
+
for line_number, line in enumerate(f, start=1):
|
|
41
|
+
# Skip entirely blank or whitespace-only lines to be robust to trailing newlines
|
|
42
|
+
stripped = line.strip()
|
|
43
|
+
if not stripped:
|
|
44
|
+
continue
|
|
45
|
+
try:
|
|
46
|
+
data.append(json.loads(stripped))
|
|
47
|
+
except json.JSONDecodeError as e:
|
|
48
|
+
print(f"Error parsing JSON line for file {file_path} at line {line_number}")
|
|
49
|
+
# attempt to find "row_id" in the line by finding index of "row_id" and performing regex of `"row_id": (.*),`
|
|
50
|
+
row_id_index = line.find("row_id")
|
|
51
|
+
if row_id_index != -1:
|
|
52
|
+
row_id = re.search(r'"row_id": (.*),', line[row_id_index:])
|
|
53
|
+
raise ValueError(f"{e.msg} at line {line_number}: {line} ({row_id})") from e
|
|
54
|
+
raise e
|
|
55
|
+
return data
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING, List, Optional
|
|
7
|
+
|
|
8
|
+
from eval_protocol.common_utils import load_jsonl
|
|
9
|
+
from eval_protocol.dataset_logger.dataset_logger import DatasetLogger
|
|
10
|
+
from eval_protocol.dataset_logger.directory_utils import find_eval_protocol_datasets_dir
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from eval_protocol.models import EvaluationRow
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class LocalFSDatasetLoggerAdapter(DatasetLogger):
|
|
17
|
+
"""
|
|
18
|
+
Logger that stores logs in the local filesystem with file locking to prevent race conditions.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
self.log_dir = os.path.dirname(find_eval_protocol_datasets_dir())
|
|
23
|
+
self.datasets_dir = find_eval_protocol_datasets_dir()
|
|
24
|
+
|
|
25
|
+
# ensure that log file exists
|
|
26
|
+
if not os.path.exists(self.current_jsonl_path):
|
|
27
|
+
with open(self.current_jsonl_path, "w") as f:
|
|
28
|
+
f.write("")
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def current_date(self) -> str:
|
|
32
|
+
# Use UTC timezone to be consistent across local device/locations/CI
|
|
33
|
+
return datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def current_jsonl_path(self) -> str:
|
|
37
|
+
"""
|
|
38
|
+
The current JSONL file path. Based on the current date.
|
|
39
|
+
"""
|
|
40
|
+
return os.path.join(self.datasets_dir, f"{self.current_date}.jsonl")
|
|
41
|
+
|
|
42
|
+
def log(self, row: "EvaluationRow") -> None:
|
|
43
|
+
"""Log a row, updating existing row with same ID or appending new row."""
|
|
44
|
+
row_id = row.input_metadata.row_id
|
|
45
|
+
|
|
46
|
+
# Check if row with this ID already exists in any JSONL file
|
|
47
|
+
if os.path.exists(self.datasets_dir):
|
|
48
|
+
for filename in os.listdir(self.datasets_dir):
|
|
49
|
+
if filename.endswith(".jsonl"):
|
|
50
|
+
file_path = os.path.join(self.datasets_dir, filename)
|
|
51
|
+
if os.path.exists(file_path):
|
|
52
|
+
with open(file_path, "r") as f:
|
|
53
|
+
lines = f.readlines()
|
|
54
|
+
|
|
55
|
+
# Find the line with matching ID
|
|
56
|
+
for i, line in enumerate(lines):
|
|
57
|
+
try:
|
|
58
|
+
line_data = json.loads(line.strip())
|
|
59
|
+
if line_data["input_metadata"]["row_id"] == row_id:
|
|
60
|
+
# Update existing row
|
|
61
|
+
lines[i] = row.model_dump_json(exclude_none=True) + os.linesep
|
|
62
|
+
with open(file_path, "w") as f:
|
|
63
|
+
f.writelines(lines)
|
|
64
|
+
return
|
|
65
|
+
except json.JSONDecodeError:
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
# If no existing row found, append new row to current file
|
|
69
|
+
with open(self.current_jsonl_path, "a") as f:
|
|
70
|
+
f.write(row.model_dump_json(exclude_none=True) + os.linesep)
|
|
71
|
+
|
|
72
|
+
def read(self, row_id: Optional[str] = None) -> List["EvaluationRow"]:
|
|
73
|
+
"""Read rows from all JSONL files in the datasets directory. Also
|
|
74
|
+
ensures that there are no duplicate row IDs."""
|
|
75
|
+
from eval_protocol.models import EvaluationRow
|
|
76
|
+
|
|
77
|
+
if not os.path.exists(self.datasets_dir):
|
|
78
|
+
return []
|
|
79
|
+
|
|
80
|
+
all_rows = []
|
|
81
|
+
existing_row_ids = set()
|
|
82
|
+
for filename in os.listdir(self.datasets_dir):
|
|
83
|
+
if filename.endswith(".jsonl"):
|
|
84
|
+
file_path = os.path.join(self.datasets_dir, filename)
|
|
85
|
+
data = load_jsonl(file_path)
|
|
86
|
+
for r in data:
|
|
87
|
+
row = EvaluationRow(**r)
|
|
88
|
+
if row.input_metadata.row_id not in existing_row_ids:
|
|
89
|
+
existing_row_ids.add(row.input_metadata.row_id)
|
|
90
|
+
else:
|
|
91
|
+
raise ValueError(f"Duplicate Row ID {row.input_metadata.row_id} already exists")
|
|
92
|
+
all_rows.append(row)
|
|
93
|
+
|
|
94
|
+
if row_id:
|
|
95
|
+
# Filter by row_id if specified
|
|
96
|
+
return [row for row in all_rows if getattr(row.input_metadata, "row_id", None) == row_id]
|
|
97
|
+
else:
|
|
98
|
+
return all_rows
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from eval_protocol.dataset_logger.dataset_logger import LOG_EVENT_TYPE, DatasetLogger
|
|
5
|
+
from eval_protocol.dataset_logger.sqlite_evaluation_row_store import SqliteEvaluationRowStore
|
|
6
|
+
from eval_protocol.directory_utils import find_eval_protocol_dir
|
|
7
|
+
from eval_protocol.event_bus import event_bus
|
|
8
|
+
from eval_protocol.event_bus.logger import logger
|
|
9
|
+
from eval_protocol.models import EvaluationRow
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SqliteDatasetLoggerAdapter(DatasetLogger):
|
|
13
|
+
def __init__(self, db_path: Optional[str] = None, store: Optional[SqliteEvaluationRowStore] = None):
|
|
14
|
+
eval_protocol_dir = find_eval_protocol_dir()
|
|
15
|
+
if db_path is not None and store is not None:
|
|
16
|
+
raise ValueError("Provide only one of db_path or store, not both.")
|
|
17
|
+
if store is not None:
|
|
18
|
+
self.db_path = store.db_path
|
|
19
|
+
self._store = store
|
|
20
|
+
else:
|
|
21
|
+
self.db_path = db_path if db_path is not None else os.path.join(eval_protocol_dir, "logs.db")
|
|
22
|
+
self._store = SqliteEvaluationRowStore(self.db_path)
|
|
23
|
+
|
|
24
|
+
def log(self, row: "EvaluationRow") -> None:
|
|
25
|
+
row_id = row.input_metadata.row_id
|
|
26
|
+
data = row.model_dump(exclude_none=True, mode="json")
|
|
27
|
+
self._store.upsert_row(row_id=row_id, data=data)
|
|
28
|
+
try:
|
|
29
|
+
event_bus.emit(LOG_EVENT_TYPE, EvaluationRow(**data))
|
|
30
|
+
except Exception as e:
|
|
31
|
+
# Avoid breaking storage due to event emission issues
|
|
32
|
+
logger.error(f"Failed to emit row_upserted event: {e}")
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
def read(self, row_id: Optional[str] = None) -> List["EvaluationRow"]:
|
|
36
|
+
from eval_protocol.models import EvaluationRow
|
|
37
|
+
|
|
38
|
+
results = self._store.read_rows(row_id=row_id)
|
|
39
|
+
return [EvaluationRow(**data) for data in results]
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from peewee import CharField, Model, SqliteDatabase
|
|
5
|
+
from playhouse.sqlite_ext import JSONField
|
|
6
|
+
|
|
7
|
+
from eval_protocol.models import EvaluationRow
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SqliteEvaluationRowStore:
|
|
11
|
+
"""
|
|
12
|
+
Lightweight reusable SQLite store for evaluation rows.
|
|
13
|
+
|
|
14
|
+
Stores arbitrary row data as JSON keyed by a unique string `row_id`.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, db_path: str):
|
|
18
|
+
os.makedirs(os.path.dirname(db_path), exist_ok=True)
|
|
19
|
+
self._db_path = db_path
|
|
20
|
+
self._db = SqliteDatabase(self._db_path)
|
|
21
|
+
|
|
22
|
+
class BaseModel(Model):
|
|
23
|
+
class Meta:
|
|
24
|
+
database = self._db
|
|
25
|
+
|
|
26
|
+
class EvaluationRow(BaseModel): # type: ignore
|
|
27
|
+
row_id = CharField(unique=True)
|
|
28
|
+
data = JSONField()
|
|
29
|
+
|
|
30
|
+
self._EvaluationRow = EvaluationRow
|
|
31
|
+
|
|
32
|
+
self._db.connect()
|
|
33
|
+
self._db.create_tables([EvaluationRow])
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def db_path(self) -> str:
|
|
37
|
+
return self._db_path
|
|
38
|
+
|
|
39
|
+
def upsert_row(self, row_id: str, data: dict) -> None:
|
|
40
|
+
if self._EvaluationRow.select().where(self._EvaluationRow.row_id == row_id).exists():
|
|
41
|
+
self._EvaluationRow.update(data=data).where(self._EvaluationRow.row_id == row_id).execute()
|
|
42
|
+
else:
|
|
43
|
+
self._EvaluationRow.create(row_id=row_id, data=data)
|
|
44
|
+
|
|
45
|
+
def read_rows(self, row_id: Optional[str] = None) -> List[dict]:
|
|
46
|
+
if row_id is None:
|
|
47
|
+
query = self._EvaluationRow.select().dicts()
|
|
48
|
+
else:
|
|
49
|
+
query = self._EvaluationRow.select().dicts().where(self._EvaluationRow.row_id == row_id)
|
|
50
|
+
results = list(query)
|
|
51
|
+
return [result["data"] for result in results]
|
|
52
|
+
|
|
53
|
+
def delete_row(self, row_id: str) -> int:
|
|
54
|
+
return self._EvaluationRow.delete().where(self._EvaluationRow.row_id == row_id).execute()
|
|
55
|
+
|
|
56
|
+
def delete_all_rows(self) -> int:
|
|
57
|
+
return self._EvaluationRow.delete().execute()
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
# Shared constants for directory discovery
|
|
5
|
+
EVAL_PROTOCOL_DIR = ".eval_protocol"
|
|
6
|
+
PYTHON_FILES = ["pyproject.toml", "requirements.txt"]
|
|
7
|
+
DATASETS_DIR = "datasets"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def find_eval_protocol_dir() -> str:
|
|
11
|
+
"""
|
|
12
|
+
Find the .eval_protocol directory by looking up the directory tree.
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
Path to the .eval_protocol directory
|
|
16
|
+
"""
|
|
17
|
+
# recursively look up for a .eval_protocol directory
|
|
18
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
19
|
+
while current_dir != "/":
|
|
20
|
+
if os.path.exists(os.path.join(current_dir, EVAL_PROTOCOL_DIR)):
|
|
21
|
+
log_dir = os.path.join(current_dir, EVAL_PROTOCOL_DIR)
|
|
22
|
+
break
|
|
23
|
+
current_dir = os.path.dirname(current_dir)
|
|
24
|
+
else:
|
|
25
|
+
# if not found, recursively look up until a pyproject.toml or requirements.txt is found
|
|
26
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
27
|
+
while current_dir != "/":
|
|
28
|
+
if any(os.path.exists(os.path.join(current_dir, f)) for f in PYTHON_FILES):
|
|
29
|
+
log_dir = os.path.join(current_dir, EVAL_PROTOCOL_DIR)
|
|
30
|
+
break
|
|
31
|
+
current_dir = os.path.dirname(current_dir)
|
|
32
|
+
else:
|
|
33
|
+
# get the PWD that this python process is running in
|
|
34
|
+
log_dir = os.path.join(os.getcwd(), EVAL_PROTOCOL_DIR)
|
|
35
|
+
|
|
36
|
+
# create the .eval_protocol directory if it doesn't exist
|
|
37
|
+
os.makedirs(log_dir, exist_ok=True)
|
|
38
|
+
|
|
39
|
+
return log_dir
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def find_eval_protocol_datasets_dir() -> str:
|
|
43
|
+
"""
|
|
44
|
+
Find the .eval_protocol/datasets directory by looking up the directory tree.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Path to the .eval_protocol/datasets directory
|
|
48
|
+
"""
|
|
49
|
+
log_dir = find_eval_protocol_dir()
|
|
50
|
+
|
|
51
|
+
# create the datasets subdirectory
|
|
52
|
+
datasets_dir = os.path.join(log_dir, DATASETS_DIR)
|
|
53
|
+
os.makedirs(datasets_dir, exist_ok=True)
|
|
54
|
+
|
|
55
|
+
return datasets_dir
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from typing import Any, Callable, List
|
|
2
|
+
|
|
3
|
+
from eval_protocol.event_bus.logger import logger
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class EventBus:
|
|
7
|
+
"""Core event bus interface for decoupling components in the evaluation system."""
|
|
8
|
+
|
|
9
|
+
def __init__(self):
|
|
10
|
+
self._listeners: List[Callable[[str, Any], None]] = []
|
|
11
|
+
|
|
12
|
+
def subscribe(self, callback: Callable[[str, Any], None]) -> None:
|
|
13
|
+
"""Subscribe to events.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
callback: Function that takes (event_type, data) parameters
|
|
17
|
+
"""
|
|
18
|
+
self._listeners.append(callback)
|
|
19
|
+
|
|
20
|
+
def unsubscribe(self, callback: Callable[[str, Any], None]) -> None:
|
|
21
|
+
"""Unsubscribe from events.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
callback: The callback function to remove
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
self._listeners.remove(callback)
|
|
28
|
+
except ValueError:
|
|
29
|
+
pass # Callback wasn't subscribed
|
|
30
|
+
|
|
31
|
+
def emit(self, event_type: str, data: Any) -> None:
|
|
32
|
+
"""Emit an event to all subscribers.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
event_type: Type of event (e.g., "row_upserted")
|
|
36
|
+
data: Event data
|
|
37
|
+
"""
|
|
38
|
+
for listener in self._listeners:
|
|
39
|
+
try:
|
|
40
|
+
listener(event_type, data)
|
|
41
|
+
except Exception as e:
|
|
42
|
+
logger.debug(f"Event listener failed for {event_type}: {e}")
|
|
43
|
+
|
|
44
|
+
def start_listening(self) -> None:
|
|
45
|
+
"""Start listening for cross-process events. Override in subclasses."""
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
def stop_listening(self) -> None:
|
|
49
|
+
"""Stop listening for cross-process events. Override in subclasses."""
|
|
50
|
+
pass
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
import time
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
from uuid import uuid4
|
|
5
|
+
|
|
6
|
+
from eval_protocol.event_bus.event_bus import EventBus
|
|
7
|
+
from eval_protocol.event_bus.logger import logger
|
|
8
|
+
from eval_protocol.event_bus.sqlite_event_bus_database import SqliteEventBusDatabase
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SqliteEventBus(EventBus):
|
|
12
|
+
"""SQLite-based event bus implementation that supports cross-process communication."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, db_path: Optional[str] = None):
|
|
15
|
+
super().__init__()
|
|
16
|
+
|
|
17
|
+
# Use the same database as the evaluation row store
|
|
18
|
+
if db_path is None:
|
|
19
|
+
import os
|
|
20
|
+
|
|
21
|
+
from eval_protocol.directory_utils import find_eval_protocol_dir
|
|
22
|
+
|
|
23
|
+
eval_protocol_dir = find_eval_protocol_dir()
|
|
24
|
+
db_path = os.path.join(eval_protocol_dir, "logs.db")
|
|
25
|
+
|
|
26
|
+
self._db = SqliteEventBusDatabase(db_path)
|
|
27
|
+
self._running = False
|
|
28
|
+
self._listener_thread: Optional[threading.Thread] = None
|
|
29
|
+
self._process_id = str(uuid4())
|
|
30
|
+
|
|
31
|
+
def emit(self, event_type: str, data: Any) -> None:
|
|
32
|
+
"""Emit an event to all subscribers.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
event_type: Type of event (e.g., "log")
|
|
36
|
+
data: Event data
|
|
37
|
+
"""
|
|
38
|
+
# Call local listeners immediately
|
|
39
|
+
super().emit(event_type, data)
|
|
40
|
+
|
|
41
|
+
# Publish to cross-process subscribers
|
|
42
|
+
self._publish_cross_process(event_type, data)
|
|
43
|
+
|
|
44
|
+
def _publish_cross_process(self, event_type: str, data: Any) -> None:
|
|
45
|
+
"""Publish event to cross-process subscribers via database."""
|
|
46
|
+
self._db.publish_event(event_type, data, self._process_id)
|
|
47
|
+
|
|
48
|
+
def start_listening(self) -> None:
|
|
49
|
+
"""Start listening for cross-process events."""
|
|
50
|
+
if self._running:
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
self._running = True
|
|
54
|
+
self._start_database_listener()
|
|
55
|
+
|
|
56
|
+
def stop_listening(self) -> None:
|
|
57
|
+
"""Stop listening for cross-process events."""
|
|
58
|
+
self._running = False
|
|
59
|
+
if self._listener_thread and self._listener_thread.is_alive():
|
|
60
|
+
self._listener_thread.join(timeout=1)
|
|
61
|
+
|
|
62
|
+
def _start_database_listener(self) -> None:
|
|
63
|
+
"""Start database-based event listener."""
|
|
64
|
+
|
|
65
|
+
def database_listener():
|
|
66
|
+
last_cleanup = time.time()
|
|
67
|
+
|
|
68
|
+
while self._running:
|
|
69
|
+
try:
|
|
70
|
+
# Get unprocessed events from other processes
|
|
71
|
+
events = self._db.get_unprocessed_events(self._process_id)
|
|
72
|
+
|
|
73
|
+
for event in events:
|
|
74
|
+
if not self._running:
|
|
75
|
+
break
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
# Handle the event
|
|
79
|
+
self._handle_cross_process_event(event["event_type"], event["data"])
|
|
80
|
+
|
|
81
|
+
# Mark as processed
|
|
82
|
+
self._db.mark_event_processed(event["event_id"])
|
|
83
|
+
|
|
84
|
+
except Exception as e:
|
|
85
|
+
logger.debug(f"Failed to process event {event['event_id']}: {e}")
|
|
86
|
+
|
|
87
|
+
# Clean up old events every hour
|
|
88
|
+
current_time = time.time()
|
|
89
|
+
if current_time - last_cleanup >= 3600:
|
|
90
|
+
self._db.cleanup_old_events()
|
|
91
|
+
last_cleanup = current_time
|
|
92
|
+
|
|
93
|
+
# Small sleep to prevent busy waiting
|
|
94
|
+
time.sleep(0.1)
|
|
95
|
+
|
|
96
|
+
except Exception as e:
|
|
97
|
+
logger.debug(f"Database listener error: {e}")
|
|
98
|
+
time.sleep(1)
|
|
99
|
+
|
|
100
|
+
self._listener_thread = threading.Thread(target=database_listener, daemon=True)
|
|
101
|
+
self._listener_thread.start()
|
|
102
|
+
|
|
103
|
+
def _handle_cross_process_event(self, event_type: str, data: Any) -> None:
|
|
104
|
+
"""Handle events received from other processes."""
|
|
105
|
+
for listener in self._listeners:
|
|
106
|
+
try:
|
|
107
|
+
listener(event_type, data)
|
|
108
|
+
except Exception as e:
|
|
109
|
+
logger.debug(f"Cross-process event listener failed for {event_type}: {e}")
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import Any, List
|
|
3
|
+
from uuid import uuid4
|
|
4
|
+
|
|
5
|
+
from peewee import CharField, DateTimeField, Model, SqliteDatabase
|
|
6
|
+
from playhouse.sqlite_ext import JSONField
|
|
7
|
+
|
|
8
|
+
from eval_protocol.event_bus.logger import logger
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SqliteEventBusDatabase:
|
|
12
|
+
"""SQLite database for cross-process event communication."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, db_path: str):
|
|
15
|
+
self._db_path = db_path
|
|
16
|
+
self._db = SqliteDatabase(db_path)
|
|
17
|
+
|
|
18
|
+
class BaseModel(Model):
|
|
19
|
+
class Meta:
|
|
20
|
+
database = self._db
|
|
21
|
+
|
|
22
|
+
class Event(BaseModel): # type: ignore
|
|
23
|
+
event_id = CharField(unique=True)
|
|
24
|
+
event_type = CharField()
|
|
25
|
+
data = JSONField()
|
|
26
|
+
timestamp = DateTimeField()
|
|
27
|
+
process_id = CharField()
|
|
28
|
+
processed = CharField(default="false") # Track if event has been processed
|
|
29
|
+
|
|
30
|
+
self._Event = Event
|
|
31
|
+
self._db.connect()
|
|
32
|
+
self._db.create_tables([Event])
|
|
33
|
+
|
|
34
|
+
def publish_event(self, event_type: str, data: Any, process_id: str) -> None:
|
|
35
|
+
"""Publish an event to the database."""
|
|
36
|
+
try:
|
|
37
|
+
# Serialize data, handling pydantic models
|
|
38
|
+
if hasattr(data, "model_dump"):
|
|
39
|
+
serialized_data = data.model_dump(mode="json", exclude_none=True)
|
|
40
|
+
else:
|
|
41
|
+
serialized_data = data
|
|
42
|
+
|
|
43
|
+
self._Event.create(
|
|
44
|
+
event_id=str(uuid4()),
|
|
45
|
+
event_type=event_type,
|
|
46
|
+
data=serialized_data,
|
|
47
|
+
timestamp=time.time(),
|
|
48
|
+
process_id=process_id,
|
|
49
|
+
processed="false",
|
|
50
|
+
)
|
|
51
|
+
except Exception as e:
|
|
52
|
+
logger.warning(f"Failed to publish event to database: {e}")
|
|
53
|
+
|
|
54
|
+
def get_unprocessed_events(self, process_id: str) -> List[dict]:
|
|
55
|
+
"""Get unprocessed events from other processes."""
|
|
56
|
+
try:
|
|
57
|
+
query = (
|
|
58
|
+
self._Event.select()
|
|
59
|
+
.where((self._Event.process_id != process_id) & (self._Event.processed == "false"))
|
|
60
|
+
.order_by(self._Event.timestamp)
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
events = []
|
|
64
|
+
for event in query:
|
|
65
|
+
events.append(
|
|
66
|
+
{
|
|
67
|
+
"event_id": event.event_id,
|
|
68
|
+
"event_type": event.event_type,
|
|
69
|
+
"data": event.data,
|
|
70
|
+
"timestamp": event.timestamp,
|
|
71
|
+
"process_id": event.process_id,
|
|
72
|
+
}
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
return events
|
|
76
|
+
except Exception as e:
|
|
77
|
+
logger.warning(f"Failed to get unprocessed events: {e}")
|
|
78
|
+
return []
|
|
79
|
+
|
|
80
|
+
def mark_event_processed(self, event_id: str) -> None:
|
|
81
|
+
"""Mark an event as processed."""
|
|
82
|
+
try:
|
|
83
|
+
self._Event.update(processed="true").where(self._Event.event_id == event_id).execute()
|
|
84
|
+
except Exception as e:
|
|
85
|
+
logger.debug(f"Failed to mark event as processed: {e}")
|
|
86
|
+
|
|
87
|
+
def cleanup_old_events(self, max_age_hours: int = 24) -> None:
|
|
88
|
+
"""Clean up old processed events."""
|
|
89
|
+
try:
|
|
90
|
+
cutoff_time = time.time() - (max_age_hours * 3600)
|
|
91
|
+
self._Event.delete().where(
|
|
92
|
+
(self._Event.processed == "true") & (self._Event.timestamp < cutoff_time)
|
|
93
|
+
).execute()
|
|
94
|
+
except Exception as e:
|
|
95
|
+
logger.debug(f"Failed to cleanup old events: {e}")
|
|
@@ -11,7 +11,7 @@ from typing import Any, Dict, List, Optional
|
|
|
11
11
|
|
|
12
12
|
import aiohttp
|
|
13
13
|
from omegaconf import DictConfig
|
|
14
|
-
from pydantic import BaseModel
|
|
14
|
+
from pydantic import BaseModel # Added for new models
|
|
15
15
|
|
|
16
16
|
logger = logging.getLogger(__name__)
|
|
17
17
|
|
|
@@ -83,6 +83,9 @@ class FireworksModelClient(ModelClient):
|
|
|
83
83
|
}
|
|
84
84
|
if self.top_p is not None:
|
|
85
85
|
payload["top_p"] = self.top_p
|
|
86
|
+
# Include reasoning settings if configured (for reasoning-capable models)
|
|
87
|
+
if self.reasoning_effort:
|
|
88
|
+
payload["reasoning_effort"] = self.reasoning_effort
|
|
86
89
|
|
|
87
90
|
if tools:
|
|
88
91
|
payload["tools"] = tools
|