eval-protocol 0.2.64.dev2__tar.gz → 0.2.65__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {eval_protocol-0.2.64.dev2/eval_protocol.egg-info → eval_protocol-0.2.65}/PKG-INFO +1 -5
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/__init__.py +4 -21
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/_version.py +3 -3
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli.py +71 -0
- eval_protocol-0.2.65/eval_protocol/cli_commands/create_rft.py +254 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/upload.py +18 -0
- eval_protocol-0.2.65/eval_protocol/fireworks_rft.py +218 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/proxy_core/app.py +0 -2
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/proxy_core/litellm.py +0 -4
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +25 -27
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65/eval_protocol.egg-info}/PKG-INFO +1 -5
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol.egg-info/SOURCES.txt +2 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol.egg-info/requires.txt +0 -5
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/pyproject.toml +0 -6
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/LICENSE +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/README.md +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/development/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/development/normalize_sandbox_fusion.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/development/utils/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/development/utils/generate_api_key.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/development/utils/subprocess_manager.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/__main__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/base.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/bigquery.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/braintrust.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/fireworks_tracing.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/huggingface.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/langchain.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/langfuse.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/langsmith.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/openai_responses.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/trl.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/adapters/weave.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/models.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/orchestrator.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resource_abc.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resource_pool.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/bfcl_envs/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/bfcl_envs/math_api.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/bfcl_envs/posting_api.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/bfcl_sim_api_resource.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/docker_resource.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/filesystem_resource.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/python_state_resource.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/resources/sql_resource.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/task_manager.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/agent/tool_registry.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/auth.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/data/airline_dataset.jsonl +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/data/retail_dataset.jsonl +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/test_aime25.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/test_frozen_lake.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/test_gpqa.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/test_livebench_data_analysis.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/test_tau_bench_airline.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/benchmarks/test_tau_bench_retail.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/agent_eval_cmd.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/common.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/deploy.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/deploy_mcp.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/logs.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/preview.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/cli_commands/run_eval_cmd.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/common_utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/config.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/data_loader/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/data_loader/dynamic_data_loader.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/data_loader/factory_data_loader.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/data_loader/inline_data_loader.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/data_loader/models.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/dataset_logger/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/dataset_logger/dataset_logger.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/datasets/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/datasets/loader.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/directory_utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/evaluation.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/event_bus/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/event_bus/event_bus.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/event_bus/logger.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/event_bus/sqlite_event_bus.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/event_bus/sqlite_event_bus_database.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/execution/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/execution/pipeline.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/gcp_tools.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/generation/cache.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/generation/clients/base.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/generation/clients.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/generic_server.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/get_pep440_version.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/human_id/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/human_id/dictionary.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/integrations/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/integrations/deepeval.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/integrations/openeval.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/integrations/trl.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/elasticsearch_client.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/elasticsearch_index_manager.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/fireworks_tracing_http_handler.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/init.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/rollout_context.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/rollout_id_filter.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/log_utils/util.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/logging_utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/adapter.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/client/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/client/connection.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/clients.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/execution/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/execution/base_policy.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/execution/manager.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/execution/policy.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/grid_renderer.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/mcp_multi_client.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/mcpgym.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/process_manager.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/session/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/session/manager.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/simple_process_manager.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp/simulation_server.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_agent/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_agent/config.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_agent/main.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_agent/orchestration/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_agent/orchestration/base_client.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_agent/orchestration/local_docker_client.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_env.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/frozen_lake/server.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/README.md +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/server.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/tau2_mcp.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/models.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/packaging.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/platform_api.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/playback_policy.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/proxy_core/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/proxy_core/auth.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/proxy_core/langfuse.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/proxy_core/main.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/proxy_core/models.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/proxy/proxy_core/redis_utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/default_agent_rollout_processor.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/default_dataset_adapter.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/default_langchain_rollout_processor.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/default_no_op_rollout_processor.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/default_single_turn_rollout_process.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/dual_mode_wrapper.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/elasticsearch_setup.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/evaluation_test.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/evaluation_test_postprocess.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/evaluation_test_utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/exception_config.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/execution.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/generate_parameter_combinations.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/github_action_rollout_processor.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/handle_persist_flow.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/parameterize.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/plugin.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/remote_rollout_processor.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/rollout_processor.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/store_experiment_link.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/store_results_url.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/tracing_utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/types.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/pytest/validate_signature.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/aha_judge/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/aha_judge/llm_judge.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/aha_judge/utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/llm_judge.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/llm_judge_braintrust.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/quickstart/utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/resources.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/reward_function.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/accuracy.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/accuracy_length.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/apps_coding_reward.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/apps_execution_utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/apps_testing_util.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/bfcl_reward.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/code_execution.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/code_execution_utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/cpp_code.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/deepcoder_reward.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/format.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/function_calling.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/json_schema.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/language_consistency.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/lean_prover.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/length.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/math.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/reasoning_steps.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/repetition.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rewards/tag_count.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/rl_processing.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/server.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/stats/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/stats/confidence_intervals.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/typed_interface.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/types/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/types/errors.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/types/remote_rollout_processor.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/types/types.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/batch_evaluation.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/batch_transformation.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/browser_utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/check_server_status.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/dataset_helpers.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/evaluation_row_utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/logs_models.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/logs_server.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/module_loader.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/packaging_utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/show_results_url.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/static_policy.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/subprocess_utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol/utils/vite_server.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol.egg-info/dependency_links.txt +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol.egg-info/entry_points.txt +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/eval_protocol.egg-info/top_level.txt +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/setup.cfg +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/setup.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_accuracy.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_accuracy_length.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_adapters_e2e.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_agent_orchestrator.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_agent_resources.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_auth.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_batch_evaluation.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_cli.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_cli_agent.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_cli_args.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_code_execution.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_config.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_control_plane_separation.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_cpp_code.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_data_driven_task_manager.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_deepcoder_reward.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_deepeval_integration.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_deploy_integration.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_directory_utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_e2b_integration.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_e2b_js_integration.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_edge_cases.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_ep_upload_e2e.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_eval_protocol_import.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_evaluation.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_evaluation_integration.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_evaluation_postprocess.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_evaluation_preview_integration.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_event_bus.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_event_bus_helper.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_examples_end_to_end.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_fireworks_api.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_format.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_fractional_code.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_function_calling.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_gcp_tools.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_generic_server.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_human_id.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_integration.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_json_schema.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_kwargs_validation.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_language_consistency.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_lean_prover.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_lean_prover_runner.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_length.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_list_comparison_math_reward.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_logs_server.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_logs_server_simple.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_math.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_minimal.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_models.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_models_rl.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_multiple_choice_math_reward.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_n_variant_batch_integration.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_n_variant_integration.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_openai_compatibility.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_openeval_integration.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_packaging.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_parallel_rollouts.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_platform_api.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_quickstart_utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_readiness.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_reasoning_steps.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_repetition.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_repetition_debug.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_retry_mechanism.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_reward_function.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_reward_protocol_import.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_rl_processing.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_rollout_control_plane_integration.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_server.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_show_results_url.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_status_migration_changes.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_status_migration_integration.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_status_model.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_tag_count.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_tau_bench_airline_smoke.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_typed_interface.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_typed_interface_rl.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_upload_entrypoint.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_url_handling.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/tests/test_vite_server.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/agent/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/agent/base.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/agent/llm_agent.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/api_service/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/api_service/api_config.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/api_service/data_model.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/api_service/simulation_service.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/cli.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/config.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/airline/policy.md +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/mock/policy.md +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/mock/policy_solo.md +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/retail/policy.md +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/telecom/main_policy.md +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/telecom/main_policy_solo.md +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/telecom/tech_support_manual.md +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/telecom/tech_support_workflow.md +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/user_simulator/simulation_guidelines.md +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data_model/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data_model/message.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data_model/simulation.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/data_model/tasks.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/airline/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/airline/data_model.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/airline/environment.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/airline/tools.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/airline/utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/mock/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/mock/data_model.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/mock/environment.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/mock/tools.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/mock/utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/retail/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/retail/data_model.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/retail/environment.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/retail/tools.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/retail/utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/data_model.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/environment.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tasks/const.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tasks/create_tasks.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tasks/manager.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tasks/mms_issues.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tasks/service_issues.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tasks/utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/tools.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/user_data_model.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/user_tools.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/domains/telecom/utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/environment/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/environment/db.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/environment/environment.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/environment/server.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/environment/tool.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/environment/toolkit.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/environment/utils/interface_agent.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/evaluator/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/evaluator/evaluator.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/evaluator/evaluator_action.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/evaluator/evaluator_base.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/evaluator/evaluator_communicate.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/evaluator/evaluator_env.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/evaluator/evaluator_nl_assertions.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/metrics/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/metrics/agent_metrics.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/metrics/break_down_metrics.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/orchestrator/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/orchestrator/environment_manager.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/orchestrator/orchestrator.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/orchestrator/utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/registry.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/run.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/scripts/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/scripts/check_data.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/scripts/show_domain_doc.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/scripts/start_servers.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/scripts/view_simulations.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/user/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/user/base.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/user/user_simulator.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/utils/__init__.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/utils/display.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/utils/io_utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/utils/llm_utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/utils/pydantic_utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vendor/tau2/utils/utils.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/versioneer.py +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vite-app/dist/assets/index-BnDJont9.css +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vite-app/dist/assets/index-Cu9t0G5i.js +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vite-app/dist/assets/index-Cu9t0G5i.js.map +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
- {eval_protocol-0.2.64.dev2 → eval_protocol-0.2.65}/vite-app/dist/index.html +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.65
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -107,10 +107,6 @@ Provides-Extra: langgraph-tools
|
|
|
107
107
|
Requires-Dist: langgraph>=0.6.7; extra == "langgraph-tools"
|
|
108
108
|
Requires-Dist: langchain>=0.3.0; extra == "langgraph-tools"
|
|
109
109
|
Requires-Dist: langchain-fireworks>=0.3.0; extra == "langgraph-tools"
|
|
110
|
-
Provides-Extra: proxy
|
|
111
|
-
Requires-Dist: redis>=5.0.0; extra == "proxy"
|
|
112
|
-
Requires-Dist: langfuse>=2.0.0; extra == "proxy"
|
|
113
|
-
Requires-Dist: uuid6>=2025.0.0; extra == "proxy"
|
|
114
110
|
Dynamic: license-file
|
|
115
111
|
|
|
116
112
|
# Eval Protocol (EP)
|
|
@@ -79,28 +79,11 @@ except ImportError:
|
|
|
79
79
|
WeaveAdapter = None
|
|
80
80
|
|
|
81
81
|
try:
|
|
82
|
-
from .proxy import create_app, AuthProvider, AccountInfo
|
|
82
|
+
from .proxy import create_app, AuthProvider, AccountInfo
|
|
83
83
|
except ImportError:
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
"Proxy functionality requires additional dependencies. "
|
|
88
|
-
"Please install with: pip install eval-protocol[proxy]"
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
class AuthProvider:
|
|
92
|
-
def __init__(self, *args, **kwargs):
|
|
93
|
-
raise ImportError(
|
|
94
|
-
"Proxy functionality requires additional dependencies. "
|
|
95
|
-
"Please install with: pip install eval-protocol[proxy]"
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
class AccountInfo:
|
|
99
|
-
def __init__(self, *args, **kwargs):
|
|
100
|
-
raise ImportError(
|
|
101
|
-
"Proxy functionality requires additional dependencies. "
|
|
102
|
-
"Please install with: pip install eval-protocol[proxy]"
|
|
103
|
-
)
|
|
84
|
+
create_app = None
|
|
85
|
+
AuthProvider = None
|
|
86
|
+
AccountInfo = None
|
|
104
87
|
|
|
105
88
|
|
|
106
89
|
warnings.filterwarnings("default", category=DeprecationWarning, module="eval_protocol")
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-10-
|
|
11
|
+
"date": "2025-10-27T18:42:49-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.2.
|
|
14
|
+
"full-revisionid": "bc7fee952c3a4d4285245a83af0401e25eeb59d8",
|
|
15
|
+
"version": "0.2.65"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -356,6 +356,70 @@ def parse_args(args=None):
|
|
|
356
356
|
help="Non-interactive: upload all discovered evaluation tests",
|
|
357
357
|
)
|
|
358
358
|
|
|
359
|
+
# Create command group
|
|
360
|
+
create_parser = subparsers.add_parser(
|
|
361
|
+
"create",
|
|
362
|
+
help="Resource creation commands",
|
|
363
|
+
)
|
|
364
|
+
create_subparsers = create_parser.add_subparsers(dest="create_command")
|
|
365
|
+
rft_parser = create_subparsers.add_parser(
|
|
366
|
+
"rft",
|
|
367
|
+
help="Create a Reinforcement Fine-tuning Job on Fireworks",
|
|
368
|
+
)
|
|
369
|
+
rft_parser.add_argument(
|
|
370
|
+
"--evaluator-id",
|
|
371
|
+
help="Evaluator ID used during upload; if omitted, derive from local traces or a single discovered test",
|
|
372
|
+
)
|
|
373
|
+
# Dataset options
|
|
374
|
+
rft_parser.add_argument(
|
|
375
|
+
"--dataset-id",
|
|
376
|
+
help="Use existing Fireworks dataset id (skip local materialization)",
|
|
377
|
+
)
|
|
378
|
+
rft_parser.add_argument(
|
|
379
|
+
"--dataset-jsonl",
|
|
380
|
+
help="Path to JSONL to upload as a new Fireworks dataset",
|
|
381
|
+
)
|
|
382
|
+
rft_parser.add_argument(
|
|
383
|
+
"--dataset-builder",
|
|
384
|
+
help="Explicit dataset builder spec (module::function or path::function)",
|
|
385
|
+
)
|
|
386
|
+
rft_parser.add_argument(
|
|
387
|
+
"--dataset-display-name",
|
|
388
|
+
help="Display name for dataset on Fireworks (defaults to dataset id)",
|
|
389
|
+
)
|
|
390
|
+
# Training config and evaluator/job settings
|
|
391
|
+
rft_parser.add_argument("--base-model", help="Base model resource id")
|
|
392
|
+
rft_parser.add_argument("--warm-start-from", help="Addon model to warm start from")
|
|
393
|
+
rft_parser.add_argument("--output-model", help="Output model id (defaults from evaluator)")
|
|
394
|
+
rft_parser.add_argument("--epochs", type=int)
|
|
395
|
+
rft_parser.add_argument("--batch-size", type=int)
|
|
396
|
+
rft_parser.add_argument("--learning-rate", type=float)
|
|
397
|
+
rft_parser.add_argument("--max-context-length", type=int)
|
|
398
|
+
rft_parser.add_argument("--lora-rank", type=int)
|
|
399
|
+
rft_parser.add_argument("--accelerator-count", type=int)
|
|
400
|
+
rft_parser.add_argument("--region", help="Fireworks region enum value")
|
|
401
|
+
rft_parser.add_argument("--display-name", help="RFT job display name")
|
|
402
|
+
rft_parser.add_argument("--evaluation-dataset", help="Optional separate eval dataset id")
|
|
403
|
+
rft_parser.add_argument("--eval-auto-carveout", dest="eval_auto_carveout", action="store_true", default=True)
|
|
404
|
+
rft_parser.add_argument("--no-eval-auto-carveout", dest="eval_auto_carveout", action="store_false")
|
|
405
|
+
# Inference params
|
|
406
|
+
rft_parser.add_argument("--temperature", type=float)
|
|
407
|
+
rft_parser.add_argument("--top-p", type=float)
|
|
408
|
+
rft_parser.add_argument("--top-k", type=int)
|
|
409
|
+
rft_parser.add_argument("--max-tokens", type=int)
|
|
410
|
+
rft_parser.add_argument("--n", type=int)
|
|
411
|
+
rft_parser.add_argument("--inference-extra-body", help="JSON string for extra inference params")
|
|
412
|
+
# Wandb
|
|
413
|
+
rft_parser.add_argument("--wandb-enabled", action="store_true")
|
|
414
|
+
rft_parser.add_argument("--wandb-project")
|
|
415
|
+
rft_parser.add_argument("--wandb-entity")
|
|
416
|
+
rft_parser.add_argument("--wandb-run-id")
|
|
417
|
+
rft_parser.add_argument("--wandb-api-key")
|
|
418
|
+
# Misc
|
|
419
|
+
rft_parser.add_argument("--rft-job-id", help="Specify an explicit RFT job id")
|
|
420
|
+
rft_parser.add_argument("--yes", "-y", action="store_true", help="Non-interactive mode")
|
|
421
|
+
rft_parser.add_argument("--dry-run", action="store_true", help="Print planned REST calls without sending")
|
|
422
|
+
|
|
359
423
|
# Run command (for Hydra-based evaluations)
|
|
360
424
|
# This subparser intentionally defines no arguments itself.
|
|
361
425
|
# All arguments after 'run' will be passed to Hydra by parse_known_args.
|
|
@@ -481,6 +545,13 @@ def main():
|
|
|
481
545
|
from .cli_commands.upload import upload_command
|
|
482
546
|
|
|
483
547
|
return upload_command(args)
|
|
548
|
+
elif args.command == "create":
|
|
549
|
+
if args.create_command == "rft":
|
|
550
|
+
from .cli_commands.create_rft import create_rft_command
|
|
551
|
+
|
|
552
|
+
return create_rft_command(args)
|
|
553
|
+
print("Error: missing subcommand for 'create'. Try: eval-protocol create rft")
|
|
554
|
+
return 1
|
|
484
555
|
elif args.command == "run":
|
|
485
556
|
# For the 'run' command, Hydra takes over argument parsing.
|
|
486
557
|
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
from typing import Any, Dict, Optional
|
|
5
|
+
|
|
6
|
+
from ..auth import (
|
|
7
|
+
get_fireworks_account_id,
|
|
8
|
+
get_fireworks_api_base,
|
|
9
|
+
get_fireworks_api_key,
|
|
10
|
+
verify_api_key_and_get_account_id,
|
|
11
|
+
)
|
|
12
|
+
from ..fireworks_rft import (
|
|
13
|
+
_map_api_host_to_app_host,
|
|
14
|
+
build_default_dataset_id,
|
|
15
|
+
build_default_output_model,
|
|
16
|
+
create_dataset_from_jsonl,
|
|
17
|
+
create_reinforcement_fine_tuning_job,
|
|
18
|
+
detect_dataset_builder,
|
|
19
|
+
load_evaluator_trace,
|
|
20
|
+
materialize_dataset_via_builder,
|
|
21
|
+
)
|
|
22
|
+
from .upload import _discover_tests, _normalize_evaluator_id, _resolve_entry_to_qual_and_source
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _ensure_account_id() -> Optional[str]:
|
|
26
|
+
account_id = get_fireworks_account_id()
|
|
27
|
+
api_key = get_fireworks_api_key()
|
|
28
|
+
if not account_id and api_key:
|
|
29
|
+
resolved = verify_api_key_and_get_account_id(api_key=api_key, api_base=get_fireworks_api_base())
|
|
30
|
+
if resolved:
|
|
31
|
+
os.environ["FIREWORKS_ACCOUNT_ID"] = resolved
|
|
32
|
+
return resolved
|
|
33
|
+
return account_id
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _extract_terminal_segment(resource_name: str) -> str:
|
|
37
|
+
"""Return the last path segment if a fully-qualified resource name is provided."""
|
|
38
|
+
try:
|
|
39
|
+
return resource_name.strip("/").split("/")[-1]
|
|
40
|
+
except Exception:
|
|
41
|
+
return resource_name
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _print_links(evaluator_id: str, dataset_id: str, job_name: Optional[str]) -> None:
|
|
45
|
+
api_base = get_fireworks_api_base()
|
|
46
|
+
app_base = _map_api_host_to_app_host(api_base)
|
|
47
|
+
print("\n📊 Dashboard Links:")
|
|
48
|
+
evaluator_slug = _extract_terminal_segment(evaluator_id)
|
|
49
|
+
print(f" Evaluator: {app_base}/dashboard/evaluators/{evaluator_slug}")
|
|
50
|
+
if dataset_id:
|
|
51
|
+
print(f" Dataset: {app_base}/dashboard/datasets/{dataset_id}")
|
|
52
|
+
if job_name:
|
|
53
|
+
# job_name likely like accounts/{account}/reinforcementFineTuningJobs/{id}
|
|
54
|
+
try:
|
|
55
|
+
job_id = job_name.strip().split("/")[-1]
|
|
56
|
+
print(f" RFT Job: {app_base}/dashboard/fine-tuning/reinforcement/{job_id}")
|
|
57
|
+
except Exception:
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _auto_select_evaluator_id(cwd: str) -> Optional[str]:
|
|
62
|
+
# Try local traces
|
|
63
|
+
traces_dir = os.path.join(cwd, ".eval_protocol", "evaluators")
|
|
64
|
+
if os.path.isdir(traces_dir):
|
|
65
|
+
candidates = [f[:-5] for f in os.listdir(traces_dir) if f.endswith(".json")]
|
|
66
|
+
if len(candidates) == 1:
|
|
67
|
+
return candidates[0]
|
|
68
|
+
# Fall back to discovering a single evaluation_test
|
|
69
|
+
tests = _discover_tests(cwd)
|
|
70
|
+
if len(tests) == 1:
|
|
71
|
+
qualname, source_file_path = tests[0].qualname, tests[0].file_path
|
|
72
|
+
test_func_name = qualname.split(".")[-1]
|
|
73
|
+
source_file_name = os.path.splitext(os.path.basename(source_file_path))[0]
|
|
74
|
+
evaluator_id = _normalize_evaluator_id(f"{source_file_name}-{test_func_name}")
|
|
75
|
+
return evaluator_id
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def create_rft_command(args) -> int:
|
|
80
|
+
evaluator_id: Optional[str] = getattr(args, "evaluator_id", None)
|
|
81
|
+
non_interactive: bool = bool(getattr(args, "yes", False))
|
|
82
|
+
dry_run: bool = bool(getattr(args, "dry_run", False))
|
|
83
|
+
|
|
84
|
+
api_key = get_fireworks_api_key()
|
|
85
|
+
if not api_key:
|
|
86
|
+
print("Error: FIREWORKS_API_KEY not set.")
|
|
87
|
+
return 1
|
|
88
|
+
|
|
89
|
+
account_id = _ensure_account_id()
|
|
90
|
+
if not account_id:
|
|
91
|
+
print("Error: FIREWORKS_ACCOUNT_ID not set and could not be resolved.")
|
|
92
|
+
return 1
|
|
93
|
+
|
|
94
|
+
api_base = get_fireworks_api_base()
|
|
95
|
+
|
|
96
|
+
# Resolve evaluator id if omitted
|
|
97
|
+
project_root = os.getcwd()
|
|
98
|
+
if not evaluator_id:
|
|
99
|
+
evaluator_id = _auto_select_evaluator_id(project_root)
|
|
100
|
+
if not evaluator_id:
|
|
101
|
+
print("Error: Could not infer evaluator id. Provide --evaluator-id or run 'eval-protocol upload' first.")
|
|
102
|
+
return 1
|
|
103
|
+
|
|
104
|
+
# Resolve evaluator resource name via local trace
|
|
105
|
+
# trace = load_evaluator_trace(project_root, evaluator_id)
|
|
106
|
+
# if not trace or not isinstance(trace, dict):
|
|
107
|
+
# print(
|
|
108
|
+
# "Error: Evaluator trace not found. Run 'eval-protocol upload' first or provide --dataset-id/--dataset-jsonl and --evaluator-id."
|
|
109
|
+
# )
|
|
110
|
+
# return 1
|
|
111
|
+
# evaluator_resource_name = trace.get("evaluator_resource_name") or trace.get("name") or evaluator_id
|
|
112
|
+
evaluator_resource_name = evaluator_id
|
|
113
|
+
|
|
114
|
+
# Determine dataset id and materialization path
|
|
115
|
+
dataset_id = getattr(args, "dataset_id", None)
|
|
116
|
+
dataset_jsonl = getattr(args, "dataset_jsonl", None)
|
|
117
|
+
dataset_display_name = getattr(args, "dataset_display_name", None)
|
|
118
|
+
dataset_builder = getattr(args, "dataset_builder", None)
|
|
119
|
+
|
|
120
|
+
if not dataset_id:
|
|
121
|
+
# Try builder from args, else from trace detection
|
|
122
|
+
# TODO: build dataset from traces directly
|
|
123
|
+
# builder_spec = dataset_builder or trace.get("dataset_builder")
|
|
124
|
+
# if not builder_spec:
|
|
125
|
+
# # Attempt detect from metric_dir
|
|
126
|
+
# metric_dir = trace.get("metric_dir")
|
|
127
|
+
# if metric_dir:
|
|
128
|
+
# builder_spec = detect_dataset_builder(metric_dir)
|
|
129
|
+
# if not builder_spec:
|
|
130
|
+
# print(
|
|
131
|
+
# "Error: Could not determine dataset. Provide --dataset-id, --dataset-jsonl, or --dataset-builder."
|
|
132
|
+
# )
|
|
133
|
+
# return 1
|
|
134
|
+
# try:
|
|
135
|
+
# dataset_jsonl, count = materialize_dataset_via_builder(builder_spec)
|
|
136
|
+
# print(f"✓ Materialized dataset via builder ({builder_spec}): {count} rows → {dataset_jsonl}")
|
|
137
|
+
# except Exception as e:
|
|
138
|
+
# print(f"Error: dataset builder failed: {e}")
|
|
139
|
+
# return 1
|
|
140
|
+
|
|
141
|
+
if not dataset_jsonl:
|
|
142
|
+
print("Error: Could not determine dataset. Provide --dataset-id or --dataset-jsonl.")
|
|
143
|
+
return 1
|
|
144
|
+
|
|
145
|
+
inferred_dataset_id = build_default_dataset_id(evaluator_id)
|
|
146
|
+
if dry_run:
|
|
147
|
+
print("--dry-run: would create dataset and upload JSONL")
|
|
148
|
+
dataset_id = inferred_dataset_id
|
|
149
|
+
else:
|
|
150
|
+
try:
|
|
151
|
+
dataset_id, _ = create_dataset_from_jsonl(
|
|
152
|
+
account_id=account_id,
|
|
153
|
+
api_key=api_key,
|
|
154
|
+
api_base=api_base,
|
|
155
|
+
dataset_id=inferred_dataset_id,
|
|
156
|
+
display_name=dataset_display_name or inferred_dataset_id,
|
|
157
|
+
jsonl_path=dataset_jsonl,
|
|
158
|
+
)
|
|
159
|
+
print(f"✓ Created and uploaded dataset: {dataset_id}")
|
|
160
|
+
except Exception as e:
|
|
161
|
+
print(f"Error creating/uploading dataset: {e}")
|
|
162
|
+
return 1
|
|
163
|
+
|
|
164
|
+
# Build training config/body
|
|
165
|
+
training_config: Dict[str, Any] = {}
|
|
166
|
+
if getattr(args, "base_model", None):
|
|
167
|
+
training_config["baseModel"] = args.base_model
|
|
168
|
+
if getattr(args, "warm_start_from", None):
|
|
169
|
+
training_config["warmStartFrom"] = args.warm_start_from
|
|
170
|
+
if "baseModel" not in training_config and "warmStartFrom" not in training_config:
|
|
171
|
+
# Provide a conservative default if neither is set
|
|
172
|
+
training_config["baseModel"] = "accounts/fireworks/models/llama-v3p1-8b-instruct"
|
|
173
|
+
|
|
174
|
+
# Optional hyperparameters
|
|
175
|
+
for key, arg_name in [
|
|
176
|
+
("epochs", "epochs"),
|
|
177
|
+
("batchSize", "batch_size"),
|
|
178
|
+
("learningRate", "learning_rate"),
|
|
179
|
+
("maxContextLength", "max_context_length"),
|
|
180
|
+
("loraRank", "lora_rank"),
|
|
181
|
+
("acceleratorCount", "accelerator_count"),
|
|
182
|
+
("region", "region"),
|
|
183
|
+
]:
|
|
184
|
+
val = getattr(args, arg_name, None)
|
|
185
|
+
if val is not None:
|
|
186
|
+
training_config[key] = val
|
|
187
|
+
|
|
188
|
+
inference_params: Dict[str, Any] = {}
|
|
189
|
+
for key, arg_name in [
|
|
190
|
+
("temperature", "temperature"),
|
|
191
|
+
("topP", "top_p"),
|
|
192
|
+
("topK", "top_k"),
|
|
193
|
+
("maxTokens", "max_tokens"),
|
|
194
|
+
("n", "n"),
|
|
195
|
+
]:
|
|
196
|
+
val = getattr(args, arg_name, None)
|
|
197
|
+
if val is not None:
|
|
198
|
+
inference_params[key] = val
|
|
199
|
+
if getattr(args, "inference_extra_body", None):
|
|
200
|
+
inference_params["extraBody"] = args.inference_extra_body
|
|
201
|
+
|
|
202
|
+
wandb_config: Optional[Dict[str, Any]] = None
|
|
203
|
+
if getattr(args, "wandb_enabled", False):
|
|
204
|
+
wandb_config = {
|
|
205
|
+
"enabled": True,
|
|
206
|
+
"apiKey": getattr(args, "wandb_api_key", None),
|
|
207
|
+
"project": getattr(args, "wandb_project", None),
|
|
208
|
+
"entity": getattr(args, "wandb_entity", None),
|
|
209
|
+
"runId": getattr(args, "wandb_run_id", None),
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
body: Dict[str, Any] = {
|
|
213
|
+
# "displayName": getattr(args, "display_name", None) or f"{evaluator_id}-rft",
|
|
214
|
+
"dataset": f"accounts/{account_id}/datasets/{dataset_id}",
|
|
215
|
+
"evaluator": evaluator_resource_name,
|
|
216
|
+
"evalAutoCarveout": bool(getattr(args, "eval_auto_carveout", True)),
|
|
217
|
+
"trainingConfig": training_config,
|
|
218
|
+
"inferenceParameters": inference_params or None,
|
|
219
|
+
"wandbConfig": wandb_config,
|
|
220
|
+
"outputStats": None,
|
|
221
|
+
"outputMetrics": None,
|
|
222
|
+
"mcpServer": None,
|
|
223
|
+
}
|
|
224
|
+
print("Show body:")
|
|
225
|
+
print(json.dumps(body, indent=2))
|
|
226
|
+
if getattr(args, "evaluation_dataset", None):
|
|
227
|
+
body["evaluationDataset"] = args.evaluation_dataset
|
|
228
|
+
if getattr(args, "output_model", None):
|
|
229
|
+
body.setdefault("trainingConfig", {})["outputModel"] = f"accounts/{account_id}/models/{args.output_model}"
|
|
230
|
+
else:
|
|
231
|
+
body.setdefault("trainingConfig", {})["outputModel"] = build_default_output_model(evaluator_id)
|
|
232
|
+
|
|
233
|
+
# Clean None fields to avoid noisy payloads
|
|
234
|
+
body = {k: v for k, v in body.items() if v is not None}
|
|
235
|
+
|
|
236
|
+
if dry_run:
|
|
237
|
+
print("--dry-run: would create RFT job with body:")
|
|
238
|
+
print(json.dumps(body, indent=2))
|
|
239
|
+
_print_links(evaluator_id, dataset_id, None)
|
|
240
|
+
return 0
|
|
241
|
+
|
|
242
|
+
try:
|
|
243
|
+
result = create_reinforcement_fine_tuning_job(
|
|
244
|
+
account_id=account_id, api_key=api_key, api_base=api_base, body=body
|
|
245
|
+
)
|
|
246
|
+
job_name = result.get("name") if isinstance(result, dict) else None
|
|
247
|
+
print("\n✅ Created Reinforcement Fine-tuning Job")
|
|
248
|
+
if job_name:
|
|
249
|
+
print(f" name: {job_name}")
|
|
250
|
+
_print_links(evaluator_id, dataset_id, job_name)
|
|
251
|
+
return 0
|
|
252
|
+
except Exception as e:
|
|
253
|
+
print(f"Error creating RFT job: {e}")
|
|
254
|
+
return 1
|
|
@@ -21,6 +21,7 @@ from eval_protocol.auth import (
|
|
|
21
21
|
from eval_protocol.platform_api import create_or_update_fireworks_secret
|
|
22
22
|
|
|
23
23
|
from eval_protocol.evaluation import create_evaluation
|
|
24
|
+
from eval_protocol.fireworks_rft import save_evaluator_trace, detect_dataset_builder
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
@dataclass
|
|
@@ -666,6 +667,23 @@ def upload_command(args: argparse.Namespace) -> int:
|
|
|
666
667
|
)
|
|
667
668
|
name = result.get("name", evaluator_id) if isinstance(result, dict) else evaluator_id
|
|
668
669
|
|
|
670
|
+
# Persist local evaluator trace for later `create rft`
|
|
671
|
+
try:
|
|
672
|
+
metric_dir = os.path.dirname(source_file_path) if source_file_path else root
|
|
673
|
+
builder_spec = detect_dataset_builder(metric_dir) or None
|
|
674
|
+
trace_payload = {
|
|
675
|
+
"evaluator_id": evaluator_id,
|
|
676
|
+
"evaluator_resource_name": name,
|
|
677
|
+
"entry_point": entry_point,
|
|
678
|
+
"metric_dir": metric_dir,
|
|
679
|
+
"project_root": root,
|
|
680
|
+
"dataset_builder": builder_spec,
|
|
681
|
+
}
|
|
682
|
+
save_evaluator_trace(project_root=root, evaluator_id=evaluator_id, trace=trace_payload)
|
|
683
|
+
except Exception:
|
|
684
|
+
# Non-fatal; continue
|
|
685
|
+
pass
|
|
686
|
+
|
|
669
687
|
# Print success message with Fireworks dashboard link
|
|
670
688
|
print(f"\n✅ Successfully uploaded evaluator: {evaluator_id}")
|
|
671
689
|
print("📊 View in Fireworks Dashboard:")
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import importlib.util
|
|
2
|
+
import io
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import tempfile
|
|
7
|
+
import time
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Callable, Dict, Iterable, Optional, Tuple
|
|
10
|
+
|
|
11
|
+
import requests
|
|
12
|
+
|
|
13
|
+
from .auth import get_fireworks_account_id, get_fireworks_api_base, get_fireworks_api_key
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _map_api_host_to_app_host(api_base: str) -> str:
|
|
17
|
+
try:
|
|
18
|
+
from urllib.parse import urlparse
|
|
19
|
+
|
|
20
|
+
parsed = urlparse(api_base)
|
|
21
|
+
host = parsed.netloc or parsed.path
|
|
22
|
+
if host.startswith("dev.api.fireworks.ai"):
|
|
23
|
+
return f"{parsed.scheme or 'https'}://dev.fireworks.ai"
|
|
24
|
+
if host.startswith("api."):
|
|
25
|
+
return f"{parsed.scheme or 'https'}://{host.replace('api.', 'app.', 1)}"
|
|
26
|
+
return f"{parsed.scheme or 'https'}://{host}"
|
|
27
|
+
except Exception:
|
|
28
|
+
return "https://app.fireworks.ai"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def load_evaluator_trace(project_root: str, evaluator_id: str) -> Optional[Dict[str, Any]]:
|
|
32
|
+
trace_path = Path(project_root) / ".eval_protocol" / "evaluators" / f"{evaluator_id}.json"
|
|
33
|
+
if not trace_path.exists():
|
|
34
|
+
return None
|
|
35
|
+
try:
|
|
36
|
+
with open(trace_path, "r", encoding="utf-8") as f:
|
|
37
|
+
return json.load(f)
|
|
38
|
+
except Exception:
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def save_evaluator_trace(project_root: str, evaluator_id: str, trace: Dict[str, Any]) -> None:
|
|
43
|
+
base_dir = Path(project_root) / ".eval_protocol" / "evaluators"
|
|
44
|
+
base_dir.mkdir(parents=True, exist_ok=True)
|
|
45
|
+
trace_path = base_dir / f"{evaluator_id}.json"
|
|
46
|
+
with open(trace_path, "w", encoding="utf-8") as f:
|
|
47
|
+
json.dump(trace, f, indent=2, ensure_ascii=False)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def detect_dataset_builder(metric_dir: str) -> Optional[str]:
|
|
51
|
+
"""
|
|
52
|
+
Best-effort scan for a dataset builder callable inside the metric directory.
|
|
53
|
+
Returns a builder spec string in the form "path/to/module.py::function" if found.
|
|
54
|
+
"""
|
|
55
|
+
try:
|
|
56
|
+
candidates: list[Tuple[str, str]] = []
|
|
57
|
+
for root, _, files in os.walk(metric_dir):
|
|
58
|
+
for name in files:
|
|
59
|
+
if not name.endswith(".py"):
|
|
60
|
+
continue
|
|
61
|
+
file_path = os.path.join(root, name)
|
|
62
|
+
# Load module via file location
|
|
63
|
+
module_name = Path(file_path).stem
|
|
64
|
+
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
|
65
|
+
if not spec or not spec.loader:
|
|
66
|
+
continue
|
|
67
|
+
module = importlib.util.module_from_spec(spec)
|
|
68
|
+
try:
|
|
69
|
+
sys.modules[spec.name] = module
|
|
70
|
+
spec.loader.exec_module(module) # type: ignore[attr-defined]
|
|
71
|
+
except Exception:
|
|
72
|
+
continue
|
|
73
|
+
# Common exported symbol names
|
|
74
|
+
symbol_names = [
|
|
75
|
+
"build_training_dataset",
|
|
76
|
+
"get_training_dataset",
|
|
77
|
+
"get_dataset",
|
|
78
|
+
"dataset",
|
|
79
|
+
"DATASET_BUILDER",
|
|
80
|
+
]
|
|
81
|
+
for symbol in symbol_names:
|
|
82
|
+
if hasattr(module, symbol):
|
|
83
|
+
candidates.append((file_path, symbol))
|
|
84
|
+
if not candidates:
|
|
85
|
+
return None
|
|
86
|
+
# Prefer build_training_dataset then get_training_dataset, else first
|
|
87
|
+
preference = {
|
|
88
|
+
"build_training_dataset": 0,
|
|
89
|
+
"get_training_dataset": 1,
|
|
90
|
+
"get_dataset": 2,
|
|
91
|
+
"dataset": 3,
|
|
92
|
+
"DATASET_BUILDER": 4,
|
|
93
|
+
}
|
|
94
|
+
candidates.sort(key=lambda x: preference.get(x[1], 99))
|
|
95
|
+
best_file, best_symbol = candidates[0]
|
|
96
|
+
return f"{best_file}::{best_symbol}"
|
|
97
|
+
except Exception:
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _import_builder(builder_spec: str) -> Callable[[], Iterable[Dict[str, Any]]]:
|
|
102
|
+
target, func = builder_spec.split("::", 1)
|
|
103
|
+
# If target looks like a path, load from file
|
|
104
|
+
if "/" in target or target.endswith(".py") or os.path.exists(target):
|
|
105
|
+
file_path = target if target.endswith(".py") else f"{target}.py"
|
|
106
|
+
if not os.path.isfile(file_path):
|
|
107
|
+
raise ValueError(f"Builder file not found: {file_path}")
|
|
108
|
+
module_name = Path(file_path).stem
|
|
109
|
+
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
|
110
|
+
if not spec or not spec.loader:
|
|
111
|
+
raise ValueError(f"Unable to load builder module: {file_path}")
|
|
112
|
+
module = importlib.util.module_from_spec(spec)
|
|
113
|
+
sys.modules[spec.name] = module
|
|
114
|
+
spec.loader.exec_module(module) # type: ignore[attr-defined]
|
|
115
|
+
else:
|
|
116
|
+
# Treat as module path
|
|
117
|
+
module = importlib.import_module(target)
|
|
118
|
+
if not hasattr(module, func):
|
|
119
|
+
raise ValueError(f"Function '{func}' not found in module '{getattr(module, '__name__', target)}'")
|
|
120
|
+
callable_obj = getattr(module, func)
|
|
121
|
+
if callable(callable_obj):
|
|
122
|
+
return callable_obj # type: ignore[return-value]
|
|
123
|
+
# If symbol is a constant like DATASET_BUILDER, expect it to be callable
|
|
124
|
+
if hasattr(callable_obj, "__call__"):
|
|
125
|
+
return callable_obj # type: ignore[return-value]
|
|
126
|
+
raise ValueError("Dataset builder is not callable")
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def materialize_dataset_via_builder(builder_spec: str, output_path: Optional[str] = None) -> Tuple[str, int]:
|
|
130
|
+
builder = _import_builder(builder_spec)
|
|
131
|
+
rows_iter = builder()
|
|
132
|
+
if output_path is None:
|
|
133
|
+
fd, tmp_path = tempfile.mkstemp(prefix="ep_rft_dataset_", suffix=".jsonl")
|
|
134
|
+
os.close(fd)
|
|
135
|
+
output_path = tmp_path
|
|
136
|
+
count = 0
|
|
137
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
138
|
+
for row in rows_iter:
|
|
139
|
+
f.write(json.dumps(row, ensure_ascii=False) + "\n")
|
|
140
|
+
count += 1
|
|
141
|
+
return output_path, count
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def create_dataset_from_jsonl(
|
|
145
|
+
account_id: str,
|
|
146
|
+
api_key: str,
|
|
147
|
+
api_base: str,
|
|
148
|
+
dataset_id: str,
|
|
149
|
+
display_name: Optional[str],
|
|
150
|
+
jsonl_path: str,
|
|
151
|
+
) -> Tuple[str, Dict[str, Any]]:
|
|
152
|
+
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
|
153
|
+
# Count examples quickly
|
|
154
|
+
example_count = 0
|
|
155
|
+
with open(jsonl_path, "r", encoding="utf-8") as f:
|
|
156
|
+
for _ in f:
|
|
157
|
+
example_count += 1
|
|
158
|
+
dataset_url = f"{api_base.rstrip('/')}/v1/accounts/{account_id}/datasets"
|
|
159
|
+
payload = {
|
|
160
|
+
"dataset": {
|
|
161
|
+
"displayName": display_name or dataset_id,
|
|
162
|
+
"evalProtocol": {},
|
|
163
|
+
"format": "FORMAT_UNSPECIFIED",
|
|
164
|
+
"exampleCount": str(example_count),
|
|
165
|
+
},
|
|
166
|
+
"datasetId": dataset_id,
|
|
167
|
+
}
|
|
168
|
+
resp = requests.post(dataset_url, json=payload, headers=headers, timeout=60)
|
|
169
|
+
if resp.status_code not in (200, 201):
|
|
170
|
+
raise RuntimeError(f"Dataset creation failed: {resp.status_code} {resp.text}")
|
|
171
|
+
ds = resp.json()
|
|
172
|
+
|
|
173
|
+
upload_url = f"{api_base.rstrip('/')}/v1/accounts/{account_id}/datasets/{dataset_id}:upload"
|
|
174
|
+
with open(jsonl_path, "rb") as f:
|
|
175
|
+
files = {"file": f}
|
|
176
|
+
up_headers = {"Authorization": f"Bearer {api_key}"}
|
|
177
|
+
up_resp = requests.post(upload_url, files=files, headers=up_headers, timeout=600)
|
|
178
|
+
if up_resp.status_code not in (200, 201):
|
|
179
|
+
raise RuntimeError(f"Dataset upload failed: {up_resp.status_code} {up_resp.text}")
|
|
180
|
+
return dataset_id, ds
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def create_reinforcement_fine_tuning_job(
|
|
184
|
+
account_id: str,
|
|
185
|
+
api_key: str,
|
|
186
|
+
api_base: str,
|
|
187
|
+
body: Dict[str, Any],
|
|
188
|
+
) -> Dict[str, Any]:
|
|
189
|
+
url = f"{api_base.rstrip('/')}/v1/accounts/{account_id}/reinforcementFineTuningJobs"
|
|
190
|
+
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "application/json"}
|
|
191
|
+
resp = requests.post(url, json=body, headers=headers, timeout=60)
|
|
192
|
+
if resp.status_code not in (200, 201):
|
|
193
|
+
raise RuntimeError(f"RFT job creation failed: {resp.status_code} {resp.text}")
|
|
194
|
+
return resp.json()
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def build_default_dataset_id(evaluator_id: str) -> str:
|
|
198
|
+
ts = time.strftime("%Y%m%d%H%M%S")
|
|
199
|
+
base = evaluator_id.lower().replace("_", "-")
|
|
200
|
+
return f"{base}-dataset-{ts}"
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def build_default_output_model(evaluator_id: str) -> str:
|
|
204
|
+
base = evaluator_id.lower().replace("_", "-")
|
|
205
|
+
return f"{base}-rft"
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
__all__ = [
|
|
209
|
+
"load_evaluator_trace",
|
|
210
|
+
"save_evaluator_trace",
|
|
211
|
+
"detect_dataset_builder",
|
|
212
|
+
"materialize_dataset_via_builder",
|
|
213
|
+
"create_dataset_from_jsonl",
|
|
214
|
+
"create_reinforcement_fine_tuning_job",
|
|
215
|
+
"build_default_dataset_id",
|
|
216
|
+
"build_default_output_model",
|
|
217
|
+
"_map_api_host_to_app_host",
|
|
218
|
+
]
|
|
@@ -208,7 +208,6 @@ def create_app(
|
|
|
208
208
|
encoded_base_url: Optional[str] = None,
|
|
209
209
|
config: ProxyConfig = Depends(get_config),
|
|
210
210
|
redis_client: redis.Redis = Depends(get_redis),
|
|
211
|
-
_: None = Depends(require_auth),
|
|
212
211
|
):
|
|
213
212
|
params = ChatParams(
|
|
214
213
|
project_id=project_id,
|
|
@@ -233,7 +232,6 @@ def create_app(
|
|
|
233
232
|
request: Request,
|
|
234
233
|
config: ProxyConfig = Depends(get_config),
|
|
235
234
|
redis_client: redis.Redis = Depends(get_redis),
|
|
236
|
-
_: None = Depends(require_auth),
|
|
237
235
|
):
|
|
238
236
|
params = ChatParams(project_id=project_id)
|
|
239
237
|
return await handle_chat_completion(
|