eval-protocol 0.2.88__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eval_protocol-0.2.88/LICENSE +21 -0
- eval_protocol-0.2.88/PKG-INFO +154 -0
- eval_protocol-0.2.88/README.md +39 -0
- eval_protocol-0.2.88/development/__init__.py +1 -0
- eval_protocol-0.2.88/development/normalize_sandbox_fusion.py +522 -0
- eval_protocol-0.2.88/development/utils/__init__.py +1 -0
- eval_protocol-0.2.88/development/utils/generate_api_key.py +31 -0
- eval_protocol-0.2.88/development/utils/subprocess_manager.py +435 -0
- eval_protocol-0.2.88/eval_protocol/__init__.py +178 -0
- eval_protocol-0.2.88/eval_protocol/__main__.py +10 -0
- eval_protocol-0.2.88/eval_protocol/_version.py +21 -0
- eval_protocol-0.2.88/eval_protocol/adapters/__init__.py +101 -0
- eval_protocol-0.2.88/eval_protocol/adapters/base.py +25 -0
- eval_protocol-0.2.88/eval_protocol/adapters/bigquery.py +304 -0
- eval_protocol-0.2.88/eval_protocol/adapters/braintrust.py +315 -0
- eval_protocol-0.2.88/eval_protocol/adapters/fireworks_tracing.py +453 -0
- eval_protocol-0.2.88/eval_protocol/adapters/huggingface.py +435 -0
- eval_protocol-0.2.88/eval_protocol/adapters/langchain.py +214 -0
- eval_protocol-0.2.88/eval_protocol/adapters/langfuse.py +552 -0
- eval_protocol-0.2.88/eval_protocol/adapters/langsmith.py +413 -0
- eval_protocol-0.2.88/eval_protocol/adapters/openai_responses.py +216 -0
- eval_protocol-0.2.88/eval_protocol/adapters/trl.py +8 -0
- eval_protocol-0.2.88/eval_protocol/adapters/utils.py +98 -0
- eval_protocol-0.2.88/eval_protocol/adapters/weave.py +130 -0
- eval_protocol-0.2.88/eval_protocol/agent/__init__.py +29 -0
- eval_protocol-0.2.88/eval_protocol/agent/models.py +69 -0
- eval_protocol-0.2.88/eval_protocol/agent/orchestrator.py +891 -0
- eval_protocol-0.2.88/eval_protocol/agent/resource_abc.py +89 -0
- eval_protocol-0.2.88/eval_protocol/agent/resource_pool.py +184 -0
- eval_protocol-0.2.88/eval_protocol/agent/resources/__init__.py +19 -0
- eval_protocol-0.2.88/eval_protocol/agent/resources/bfcl_envs/__init__.py +1 -0
- eval_protocol-0.2.88/eval_protocol/agent/resources/bfcl_envs/gorilla_file_system.py +342 -0
- eval_protocol-0.2.88/eval_protocol/agent/resources/bfcl_envs/math_api.py +40 -0
- eval_protocol-0.2.88/eval_protocol/agent/resources/bfcl_envs/posting_api.py +157 -0
- eval_protocol-0.2.88/eval_protocol/agent/resources/bfcl_sim_api_resource.py +313 -0
- eval_protocol-0.2.88/eval_protocol/agent/resources/docker_resource.py +476 -0
- eval_protocol-0.2.88/eval_protocol/agent/resources/filesystem_resource.py +371 -0
- eval_protocol-0.2.88/eval_protocol/agent/resources/python_state_resource.py +170 -0
- eval_protocol-0.2.88/eval_protocol/agent/resources/sql_resource.py +271 -0
- eval_protocol-0.2.88/eval_protocol/agent/task_manager.py +1073 -0
- eval_protocol-0.2.88/eval_protocol/agent/tool_registry.py +111 -0
- eval_protocol-0.2.88/eval_protocol/auth.py +331 -0
- eval_protocol-0.2.88/eval_protocol/benchmarks/__init__.py +0 -0
- eval_protocol-0.2.88/eval_protocol/benchmarks/data/airline_dataset.jsonl +50 -0
- eval_protocol-0.2.88/eval_protocol/benchmarks/data/retail_dataset.jsonl +114 -0
- eval_protocol-0.2.88/eval_protocol/benchmarks/test_aime25.py +130 -0
- eval_protocol-0.2.88/eval_protocol/benchmarks/test_frozen_lake.py +76 -0
- eval_protocol-0.2.88/eval_protocol/benchmarks/test_gpqa.py +154 -0
- eval_protocol-0.2.88/eval_protocol/benchmarks/test_livebench_data_analysis.py +549 -0
- eval_protocol-0.2.88/eval_protocol/benchmarks/test_tau_bench_airline.py +304 -0
- eval_protocol-0.2.88/eval_protocol/benchmarks/test_tau_bench_retail.py +294 -0
- eval_protocol-0.2.88/eval_protocol/cli.py +694 -0
- eval_protocol-0.2.88/eval_protocol/cli_commands/__init__.py +1 -0
- eval_protocol-0.2.88/eval_protocol/cli_commands/agent_eval_cmd.py +260 -0
- eval_protocol-0.2.88/eval_protocol/cli_commands/common.py +242 -0
- eval_protocol-0.2.88/eval_protocol/cli_commands/create_rft.py +734 -0
- eval_protocol-0.2.88/eval_protocol/cli_commands/deploy.py +509 -0
- eval_protocol-0.2.88/eval_protocol/cli_commands/deploy_mcp.py +290 -0
- eval_protocol-0.2.88/eval_protocol/cli_commands/local_test.py +175 -0
- eval_protocol-0.2.88/eval_protocol/cli_commands/logs.py +57 -0
- eval_protocol-0.2.88/eval_protocol/cli_commands/preview.py +186 -0
- eval_protocol-0.2.88/eval_protocol/cli_commands/run_eval_cmd.py +203 -0
- eval_protocol-0.2.88/eval_protocol/cli_commands/upload.py +743 -0
- eval_protocol-0.2.88/eval_protocol/common_utils.py +72 -0
- eval_protocol-0.2.88/eval_protocol/config.py +180 -0
- eval_protocol-0.2.88/eval_protocol/data_loader/__init__.py +5 -0
- eval_protocol-0.2.88/eval_protocol/data_loader/dynamic_data_loader.py +38 -0
- eval_protocol-0.2.88/eval_protocol/data_loader/factory_data_loader.py +38 -0
- eval_protocol-0.2.88/eval_protocol/data_loader/inline_data_loader.py +68 -0
- eval_protocol-0.2.88/eval_protocol/data_loader/jsonl_data_loader.py +42 -0
- eval_protocol-0.2.88/eval_protocol/data_loader/models.py +128 -0
- eval_protocol-0.2.88/eval_protocol/dataset_logger/__init__.py +40 -0
- eval_protocol-0.2.88/eval_protocol/dataset_logger/dataset_logger.py +37 -0
- eval_protocol-0.2.88/eval_protocol/dataset_logger/local_fs_dataset_logger_adapter.py +98 -0
- eval_protocol-0.2.88/eval_protocol/dataset_logger/sqlite_dataset_logger_adapter.py +45 -0
- eval_protocol-0.2.88/eval_protocol/dataset_logger/sqlite_evaluation_row_store.py +63 -0
- eval_protocol-0.2.88/eval_protocol/datasets/__init__.py +1 -0
- eval_protocol-0.2.88/eval_protocol/datasets/loader.py +519 -0
- eval_protocol-0.2.88/eval_protocol/directory_utils.py +39 -0
- eval_protocol-0.2.88/eval_protocol/evaluation.py +1471 -0
- eval_protocol-0.2.88/eval_protocol/event_bus/__init__.py +38 -0
- eval_protocol-0.2.88/eval_protocol/event_bus/event_bus.py +50 -0
- eval_protocol-0.2.88/eval_protocol/event_bus/logger.py +3 -0
- eval_protocol-0.2.88/eval_protocol/event_bus/sqlite_event_bus.py +126 -0
- eval_protocol-0.2.88/eval_protocol/event_bus/sqlite_event_bus_database.py +93 -0
- eval_protocol-0.2.88/eval_protocol/exceptions.py +177 -0
- eval_protocol-0.2.88/eval_protocol/execution/__init__.py +1 -0
- eval_protocol-0.2.88/eval_protocol/execution/pipeline.py +954 -0
- eval_protocol-0.2.88/eval_protocol/fireworks_rft.py +230 -0
- eval_protocol-0.2.88/eval_protocol/gcp_tools.py +484 -0
- eval_protocol-0.2.88/eval_protocol/generation/cache.py +141 -0
- eval_protocol-0.2.88/eval_protocol/generation/clients/base.py +67 -0
- eval_protocol-0.2.88/eval_protocol/generation/clients.py +254 -0
- eval_protocol-0.2.88/eval_protocol/generic_server.py +165 -0
- eval_protocol-0.2.88/eval_protocol/get_pep440_version.py +141 -0
- eval_protocol-0.2.88/eval_protocol/human_id/__init__.py +77 -0
- eval_protocol-0.2.88/eval_protocol/human_id/dictionary.py +507 -0
- eval_protocol-0.2.88/eval_protocol/integrations/__init__.py +9 -0
- eval_protocol-0.2.88/eval_protocol/integrations/deepeval.py +115 -0
- eval_protocol-0.2.88/eval_protocol/integrations/openeval.py +40 -0
- eval_protocol-0.2.88/eval_protocol/integrations/trl.py +187 -0
- eval_protocol-0.2.88/eval_protocol/log_utils/__init__.py +0 -0
- eval_protocol-0.2.88/eval_protocol/log_utils/elasticsearch_client.py +338 -0
- eval_protocol-0.2.88/eval_protocol/log_utils/elasticsearch_direct_http_handler.py +160 -0
- eval_protocol-0.2.88/eval_protocol/log_utils/elasticsearch_index_manager.py +168 -0
- eval_protocol-0.2.88/eval_protocol/log_utils/fireworks_tracing_http_handler.py +138 -0
- eval_protocol-0.2.88/eval_protocol/log_utils/init.py +69 -0
- eval_protocol-0.2.88/eval_protocol/log_utils/rollout_context.py +84 -0
- eval_protocol-0.2.88/eval_protocol/log_utils/rollout_id_filter.py +28 -0
- eval_protocol-0.2.88/eval_protocol/log_utils/util.py +22 -0
- eval_protocol-0.2.88/eval_protocol/logging_utils.py +175 -0
- eval_protocol-0.2.88/eval_protocol/mcp/__init__.py +49 -0
- eval_protocol-0.2.88/eval_protocol/mcp/adapter.py +131 -0
- eval_protocol-0.2.88/eval_protocol/mcp/client/__init__.py +12 -0
- eval_protocol-0.2.88/eval_protocol/mcp/client/connection.py +565 -0
- eval_protocol-0.2.88/eval_protocol/mcp/clients.py +197 -0
- eval_protocol-0.2.88/eval_protocol/mcp/execution/__init__.py +23 -0
- eval_protocol-0.2.88/eval_protocol/mcp/execution/base_policy.py +236 -0
- eval_protocol-0.2.88/eval_protocol/mcp/execution/manager.py +618 -0
- eval_protocol-0.2.88/eval_protocol/mcp/execution/policy.py +327 -0
- eval_protocol-0.2.88/eval_protocol/mcp/grid_renderer.py +54 -0
- eval_protocol-0.2.88/eval_protocol/mcp/mcp_multi_client.py +211 -0
- eval_protocol-0.2.88/eval_protocol/mcp/mcpgym.py +664 -0
- eval_protocol-0.2.88/eval_protocol/mcp/process_manager.py +177 -0
- eval_protocol-0.2.88/eval_protocol/mcp/session/__init__.py +11 -0
- eval_protocol-0.2.88/eval_protocol/mcp/session/manager.py +229 -0
- eval_protocol-0.2.88/eval_protocol/mcp/simple_process_manager.py +291 -0
- eval_protocol-0.2.88/eval_protocol/mcp/simulation_server.py +480 -0
- eval_protocol-0.2.88/eval_protocol/mcp_agent/__init__.py +1 -0
- eval_protocol-0.2.88/eval_protocol/mcp_agent/config.py +147 -0
- eval_protocol-0.2.88/eval_protocol/mcp_agent/main.py +18 -0
- eval_protocol-0.2.88/eval_protocol/mcp_agent/orchestration/__init__.py +1 -0
- eval_protocol-0.2.88/eval_protocol/mcp_agent/orchestration/base_client.py +132 -0
- eval_protocol-0.2.88/eval_protocol/mcp_agent/orchestration/local_docker_client.py +711 -0
- eval_protocol-0.2.88/eval_protocol/mcp_agent/orchestration/stdio_mcp_client_helper.py +3 -0
- eval_protocol-0.2.88/eval_protocol/mcp_env.py +393 -0
- eval_protocol-0.2.88/eval_protocol/mcp_servers/__init__.py +0 -0
- eval_protocol-0.2.88/eval_protocol/mcp_servers/frozen_lake/frozen_lake_adapter.py +160 -0
- eval_protocol-0.2.88/eval_protocol/mcp_servers/frozen_lake/frozen_lake_mcp.py +102 -0
- eval_protocol-0.2.88/eval_protocol/mcp_servers/frozen_lake/server.py +57 -0
- eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/README.md +250 -0
- eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/__init__.py +61 -0
- eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/airplane_environment/airline_environment.py +107 -0
- eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/mock_environment/mock_environment.py +100 -0
- eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/retail_environment/retail_environment.py +112 -0
- eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/server.py +83 -0
- eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/tau2_mcp.py +767 -0
- eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/tests/system_prompts/airline_agent_system_prompt.md +178 -0
- eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/tests/system_prompts/mock_agent_system_prompt.md +18 -0
- eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/tests/system_prompts/retail_agent_system_prompt.md +147 -0
- eval_protocol-0.2.88/eval_protocol/mcp_servers/tau2/tests/test_tau2_e2e.py +1689 -0
- eval_protocol-0.2.88/eval_protocol/models.py +1131 -0
- eval_protocol-0.2.88/eval_protocol/packaging.py +219 -0
- eval_protocol-0.2.88/eval_protocol/platform_api.py +379 -0
- eval_protocol-0.2.88/eval_protocol/playback_policy.py +374 -0
- eval_protocol-0.2.88/eval_protocol/proxy/__init__.py +18 -0
- eval_protocol-0.2.88/eval_protocol/proxy/proxy_core/__init__.py +13 -0
- eval_protocol-0.2.88/eval_protocol/proxy/proxy_core/app.py +305 -0
- eval_protocol-0.2.88/eval_protocol/proxy/proxy_core/auth.py +17 -0
- eval_protocol-0.2.88/eval_protocol/proxy/proxy_core/langfuse.py +528 -0
- eval_protocol-0.2.88/eval_protocol/proxy/proxy_core/litellm.py +173 -0
- eval_protocol-0.2.88/eval_protocol/proxy/proxy_core/main.py +10 -0
- eval_protocol-0.2.88/eval_protocol/proxy/proxy_core/models.py +98 -0
- eval_protocol-0.2.88/eval_protocol/proxy/proxy_core/redis_utils.py +57 -0
- eval_protocol-0.2.88/eval_protocol/pytest/__init__.py +52 -0
- eval_protocol-0.2.88/eval_protocol/pytest/default_agent_rollout_processor.py +279 -0
- eval_protocol-0.2.88/eval_protocol/pytest/default_dataset_adapter.py +9 -0
- eval_protocol-0.2.88/eval_protocol/pytest/default_langchain_rollout_processor.py +159 -0
- eval_protocol-0.2.88/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +332 -0
- eval_protocol-0.2.88/eval_protocol/pytest/default_no_op_rollout_processor.py +27 -0
- eval_protocol-0.2.88/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +163 -0
- eval_protocol-0.2.88/eval_protocol/pytest/default_single_turn_rollout_process.py +166 -0
- eval_protocol-0.2.88/eval_protocol/pytest/dual_mode_wrapper.py +78 -0
- eval_protocol-0.2.88/eval_protocol/pytest/elasticsearch_setup.py +167 -0
- eval_protocol-0.2.88/eval_protocol/pytest/evaluation_test.py +708 -0
- eval_protocol-0.2.88/eval_protocol/pytest/evaluation_test_postprocess.py +208 -0
- eval_protocol-0.2.88/eval_protocol/pytest/evaluation_test_utils.py +594 -0
- eval_protocol-0.2.88/eval_protocol/pytest/exception_config.py +144 -0
- eval_protocol-0.2.88/eval_protocol/pytest/execution.py +111 -0
- eval_protocol-0.2.88/eval_protocol/pytest/generate_parameter_combinations.py +145 -0
- eval_protocol-0.2.88/eval_protocol/pytest/github_action_rollout_processor.py +225 -0
- eval_protocol-0.2.88/eval_protocol/pytest/handle_persist_flow.py +225 -0
- eval_protocol-0.2.88/eval_protocol/pytest/parameterize.py +424 -0
- eval_protocol-0.2.88/eval_protocol/pytest/plugin.py +413 -0
- eval_protocol-0.2.88/eval_protocol/pytest/remote_rollout_processor.py +207 -0
- eval_protocol-0.2.88/eval_protocol/pytest/rollout_processor.py +24 -0
- eval_protocol-0.2.88/eval_protocol/pytest/store_experiment_link.py +41 -0
- eval_protocol-0.2.88/eval_protocol/pytest/store_results_url.py +49 -0
- eval_protocol-0.2.88/eval_protocol/pytest/tracing_utils.py +177 -0
- eval_protocol-0.2.88/eval_protocol/pytest/types.py +77 -0
- eval_protocol-0.2.88/eval_protocol/pytest/validate_signature.py +71 -0
- eval_protocol-0.2.88/eval_protocol/quickstart/__init__.py +8 -0
- eval_protocol-0.2.88/eval_protocol/quickstart/aha_judge/__init__.py +4 -0
- eval_protocol-0.2.88/eval_protocol/quickstart/aha_judge/llm_judge.py +90 -0
- eval_protocol-0.2.88/eval_protocol/quickstart/aha_judge/llm_judge_braintrust.py +63 -0
- eval_protocol-0.2.88/eval_protocol/quickstart/aha_judge/llm_judge_langfuse.py +58 -0
- eval_protocol-0.2.88/eval_protocol/quickstart/aha_judge/llm_judge_langsmith.py +82 -0
- eval_protocol-0.2.88/eval_protocol/quickstart/aha_judge/llm_judge_openai_responses.py +66 -0
- eval_protocol-0.2.88/eval_protocol/quickstart/aha_judge/utils.py +133 -0
- eval_protocol-0.2.88/eval_protocol/quickstart/llm_judge.py +90 -0
- eval_protocol-0.2.88/eval_protocol/quickstart/llm_judge_braintrust.py +63 -0
- eval_protocol-0.2.88/eval_protocol/quickstart/svg_agent/evaluator/test_svgagent.py +223 -0
- eval_protocol-0.2.88/eval_protocol/quickstart/svg_agent/evaluator/utils.py +523 -0
- eval_protocol-0.2.88/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +202 -0
- eval_protocol-0.2.88/eval_protocol/quickstart/utils.py +251 -0
- eval_protocol-0.2.88/eval_protocol/resources.py +128 -0
- eval_protocol-0.2.88/eval_protocol/reward_function.py +410 -0
- eval_protocol-0.2.88/eval_protocol/rewards/__init__.py +90 -0
- eval_protocol-0.2.88/eval_protocol/rewards/accuracy.py +469 -0
- eval_protocol-0.2.88/eval_protocol/rewards/accuracy_length.py +186 -0
- eval_protocol-0.2.88/eval_protocol/rewards/apps_coding_reward.py +331 -0
- eval_protocol-0.2.88/eval_protocol/rewards/apps_execution_utils.py +149 -0
- eval_protocol-0.2.88/eval_protocol/rewards/apps_testing_util.py +564 -0
- eval_protocol-0.2.88/eval_protocol/rewards/bfcl_reward.py +314 -0
- eval_protocol-0.2.88/eval_protocol/rewards/code_execution.py +1634 -0
- eval_protocol-0.2.88/eval_protocol/rewards/code_execution_utils.py +72 -0
- eval_protocol-0.2.88/eval_protocol/rewards/cpp_code.py +861 -0
- eval_protocol-0.2.88/eval_protocol/rewards/deepcoder_reward.py +166 -0
- eval_protocol-0.2.88/eval_protocol/rewards/format.py +132 -0
- eval_protocol-0.2.88/eval_protocol/rewards/function_calling.py +543 -0
- eval_protocol-0.2.88/eval_protocol/rewards/json_schema.py +444 -0
- eval_protocol-0.2.88/eval_protocol/rewards/language_consistency.py +705 -0
- eval_protocol-0.2.88/eval_protocol/rewards/lean_prover.py +482 -0
- eval_protocol-0.2.88/eval_protocol/rewards/length.py +377 -0
- eval_protocol-0.2.88/eval_protocol/rewards/list_comparison_math_reward.py +226 -0
- eval_protocol-0.2.88/eval_protocol/rewards/math.py +772 -0
- eval_protocol-0.2.88/eval_protocol/rewards/multiple_choice_math_reward.py +242 -0
- eval_protocol-0.2.88/eval_protocol/rewards/reasoning_steps.py +249 -0
- eval_protocol-0.2.88/eval_protocol/rewards/repetition.py +356 -0
- eval_protocol-0.2.88/eval_protocol/rewards/tag_count.py +175 -0
- eval_protocol-0.2.88/eval_protocol/rl_processing.py +82 -0
- eval_protocol-0.2.88/eval_protocol/server.py +271 -0
- eval_protocol-0.2.88/eval_protocol/stats/__init__.py +3 -0
- eval_protocol-0.2.88/eval_protocol/stats/confidence_intervals.py +114 -0
- eval_protocol-0.2.88/eval_protocol/typed_interface.py +306 -0
- eval_protocol-0.2.88/eval_protocol/types/__init__.py +4 -0
- eval_protocol-0.2.88/eval_protocol/types/errors.py +11 -0
- eval_protocol-0.2.88/eval_protocol/types/remote_rollout_processor.py +87 -0
- eval_protocol-0.2.88/eval_protocol/types/types.py +107 -0
- eval_protocol-0.2.88/eval_protocol/utils/__init__.py +13 -0
- eval_protocol-0.2.88/eval_protocol/utils/batch_evaluation.py +217 -0
- eval_protocol-0.2.88/eval_protocol/utils/batch_transformation.py +205 -0
- eval_protocol-0.2.88/eval_protocol/utils/browser_utils.py +114 -0
- eval_protocol-0.2.88/eval_protocol/utils/check_server_status.py +77 -0
- eval_protocol-0.2.88/eval_protocol/utils/dataset_helpers.py +112 -0
- eval_protocol-0.2.88/eval_protocol/utils/evaluation_row_utils.py +158 -0
- eval_protocol-0.2.88/eval_protocol/utils/logs_models.py +45 -0
- eval_protocol-0.2.88/eval_protocol/utils/logs_server.py +720 -0
- eval_protocol-0.2.88/eval_protocol/utils/module_loader.py +56 -0
- eval_protocol-0.2.88/eval_protocol/utils/packaging_utils.py +108 -0
- eval_protocol-0.2.88/eval_protocol/utils/show_results_url.py +74 -0
- eval_protocol-0.2.88/eval_protocol/utils/static_policy.py +309 -0
- eval_protocol-0.2.88/eval_protocol/utils/subprocess_utils.py +118 -0
- eval_protocol-0.2.88/eval_protocol/utils/vite_server.py +143 -0
- eval_protocol-0.2.88/eval_protocol.egg-info/PKG-INFO +154 -0
- eval_protocol-0.2.88/eval_protocol.egg-info/SOURCES.txt +448 -0
- eval_protocol-0.2.88/eval_protocol.egg-info/dependency_links.txt +1 -0
- eval_protocol-0.2.88/eval_protocol.egg-info/entry_points.txt +7 -0
- eval_protocol-0.2.88/eval_protocol.egg-info/requires.txt +119 -0
- eval_protocol-0.2.88/eval_protocol.egg-info/top_level.txt +3 -0
- eval_protocol-0.2.88/pyproject.toml +225 -0
- eval_protocol-0.2.88/setup.cfg +16 -0
- eval_protocol-0.2.88/setup.py +8 -0
- eval_protocol-0.2.88/tests/test_accuracy.py +344 -0
- eval_protocol-0.2.88/tests/test_accuracy_length.py +286 -0
- eval_protocol-0.2.88/tests/test_adapters_e2e.py +765 -0
- eval_protocol-0.2.88/tests/test_agent_orchestrator.py +507 -0
- eval_protocol-0.2.88/tests/test_agent_resources.py +426 -0
- eval_protocol-0.2.88/tests/test_auth.py +396 -0
- eval_protocol-0.2.88/tests/test_batch_evaluation.py +1202 -0
- eval_protocol-0.2.88/tests/test_cli.py +170 -0
- eval_protocol-0.2.88/tests/test_cli_agent.py +217 -0
- eval_protocol-0.2.88/tests/test_cli_args.py +156 -0
- eval_protocol-0.2.88/tests/test_cli_create_rft_infer.py +1038 -0
- eval_protocol-0.2.88/tests/test_cli_local_test.py +256 -0
- eval_protocol-0.2.88/tests/test_code_execution.py +572 -0
- eval_protocol-0.2.88/tests/test_config.py +219 -0
- eval_protocol-0.2.88/tests/test_control_plane_separation.py +284 -0
- eval_protocol-0.2.88/tests/test_cpp_code.py +833 -0
- eval_protocol-0.2.88/tests/test_data_driven_task_manager.py +483 -0
- eval_protocol-0.2.88/tests/test_deepcoder_reward.py +334 -0
- eval_protocol-0.2.88/tests/test_deepeval_integration.py +377 -0
- eval_protocol-0.2.88/tests/test_deploy_integration.py +214 -0
- eval_protocol-0.2.88/tests/test_directory_utils.py +95 -0
- eval_protocol-0.2.88/tests/test_e2b_integration.py +74 -0
- eval_protocol-0.2.88/tests/test_e2b_js_integration.py +80 -0
- eval_protocol-0.2.88/tests/test_edge_cases.py +160 -0
- eval_protocol-0.2.88/tests/test_ep_upload_e2e.py +646 -0
- eval_protocol-0.2.88/tests/test_eval_protocol_import.py +275 -0
- eval_protocol-0.2.88/tests/test_evaluation.py +431 -0
- eval_protocol-0.2.88/tests/test_evaluation_integration.py +365 -0
- eval_protocol-0.2.88/tests/test_evaluation_postprocess.py +467 -0
- eval_protocol-0.2.88/tests/test_evaluation_preview_integration.py +470 -0
- eval_protocol-0.2.88/tests/test_event_bus.py +301 -0
- eval_protocol-0.2.88/tests/test_event_bus_helper.py +73 -0
- eval_protocol-0.2.88/tests/test_examples_end_to_end.py +962 -0
- eval_protocol-0.2.88/tests/test_exceptions.py +371 -0
- eval_protocol-0.2.88/tests/test_fireworks_api.py +68 -0
- eval_protocol-0.2.88/tests/test_format.py +227 -0
- eval_protocol-0.2.88/tests/test_fractional_code.py +312 -0
- eval_protocol-0.2.88/tests/test_function_calling.py +1152 -0
- eval_protocol-0.2.88/tests/test_gcp_tools.py +578 -0
- eval_protocol-0.2.88/tests/test_generic_server.py +207 -0
- eval_protocol-0.2.88/tests/test_human_id.py +94 -0
- eval_protocol-0.2.88/tests/test_integration.py +159 -0
- eval_protocol-0.2.88/tests/test_json_schema.py +425 -0
- eval_protocol-0.2.88/tests/test_kwargs_validation.py +178 -0
- eval_protocol-0.2.88/tests/test_language_consistency.py +232 -0
- eval_protocol-0.2.88/tests/test_lean_prover.py +165 -0
- eval_protocol-0.2.88/tests/test_lean_prover_runner.py +127 -0
- eval_protocol-0.2.88/tests/test_length.py +379 -0
- eval_protocol-0.2.88/tests/test_list_comparison_math_reward.py +207 -0
- eval_protocol-0.2.88/tests/test_logs_server.py +596 -0
- eval_protocol-0.2.88/tests/test_logs_server_simple.py +88 -0
- eval_protocol-0.2.88/tests/test_math.py +540 -0
- eval_protocol-0.2.88/tests/test_message_field_filtering.py +64 -0
- eval_protocol-0.2.88/tests/test_minimal.py +113 -0
- eval_protocol-0.2.88/tests/test_models.py +723 -0
- eval_protocol-0.2.88/tests/test_models_rl.py +158 -0
- eval_protocol-0.2.88/tests/test_multiple_choice_math_reward.py +230 -0
- eval_protocol-0.2.88/tests/test_n_variant_batch_integration.py +407 -0
- eval_protocol-0.2.88/tests/test_n_variant_integration.py +205 -0
- eval_protocol-0.2.88/tests/test_openai_compatibility.py +82 -0
- eval_protocol-0.2.88/tests/test_openeval_integration.py +44 -0
- eval_protocol-0.2.88/tests/test_packaging.py +119 -0
- eval_protocol-0.2.88/tests/test_parallel_rollouts.py +379 -0
- eval_protocol-0.2.88/tests/test_platform_api.py +166 -0
- eval_protocol-0.2.88/tests/test_quickstart_utils.py +388 -0
- eval_protocol-0.2.88/tests/test_readiness.py +332 -0
- eval_protocol-0.2.88/tests/test_reasoning_steps.py +359 -0
- eval_protocol-0.2.88/tests/test_repetition.py +285 -0
- eval_protocol-0.2.88/tests/test_repetition_debug.py +21 -0
- eval_protocol-0.2.88/tests/test_retry_mechanism.py +399 -0
- eval_protocol-0.2.88/tests/test_reward_function.py +236 -0
- eval_protocol-0.2.88/tests/test_reward_protocol_import.py +274 -0
- eval_protocol-0.2.88/tests/test_rl_processing.py +167 -0
- eval_protocol-0.2.88/tests/test_rollout_control_plane_integration.py +630 -0
- eval_protocol-0.2.88/tests/test_server.py +99 -0
- eval_protocol-0.2.88/tests/test_show_results_url.py +336 -0
- eval_protocol-0.2.88/tests/test_status_migration_changes.py +440 -0
- eval_protocol-0.2.88/tests/test_status_migration_integration.py +388 -0
- eval_protocol-0.2.88/tests/test_status_model.py +360 -0
- eval_protocol-0.2.88/tests/test_tag_count.py +274 -0
- eval_protocol-0.2.88/tests/test_tau_bench_airline_smoke.py +241 -0
- eval_protocol-0.2.88/tests/test_typed_interface.py +262 -0
- eval_protocol-0.2.88/tests/test_typed_interface_rl.py +211 -0
- eval_protocol-0.2.88/tests/test_upload_entrypoint.py +227 -0
- eval_protocol-0.2.88/tests/test_url_handling.py +68 -0
- eval_protocol-0.2.88/tests/test_vite_server.py +224 -0
- eval_protocol-0.2.88/vendor/tau2/__init__.py +21 -0
- eval_protocol-0.2.88/vendor/tau2/agent/__init__.py +0 -0
- eval_protocol-0.2.88/vendor/tau2/agent/base.py +91 -0
- eval_protocol-0.2.88/vendor/tau2/agent/llm_agent.py +462 -0
- eval_protocol-0.2.88/vendor/tau2/api_service/__init__.py +1 -0
- eval_protocol-0.2.88/vendor/tau2/api_service/api_config.py +30 -0
- eval_protocol-0.2.88/vendor/tau2/api_service/data_model.py +19 -0
- eval_protocol-0.2.88/vendor/tau2/api_service/simulation_service.py +56 -0
- eval_protocol-0.2.88/vendor/tau2/cli.py +236 -0
- eval_protocol-0.2.88/vendor/tau2/config.py +45 -0
- eval_protocol-0.2.88/vendor/tau2/data/domains/airline/policy.md +167 -0
- eval_protocol-0.2.88/vendor/tau2/data/domains/mock/policy.md +7 -0
- eval_protocol-0.2.88/vendor/tau2/data/domains/mock/policy_solo.md +6 -0
- eval_protocol-0.2.88/vendor/tau2/data/domains/retail/policy.md +136 -0
- eval_protocol-0.2.88/vendor/tau2/data/domains/telecom/main_policy.md +159 -0
- eval_protocol-0.2.88/vendor/tau2/data/domains/telecom/main_policy_solo.md +155 -0
- eval_protocol-0.2.88/vendor/tau2/data/domains/telecom/tech_support_manual.md +206 -0
- eval_protocol-0.2.88/vendor/tau2/data/domains/telecom/tech_support_workflow.md +303 -0
- eval_protocol-0.2.88/vendor/tau2/data/domains/telecom/tech_support_workflow_solo.md +299 -0
- eval_protocol-0.2.88/vendor/tau2/data/user_simulator/simulation_guidelines.md +18 -0
- eval_protocol-0.2.88/vendor/tau2/data/user_simulator/simulation_guidelines_tools.md +30 -0
- eval_protocol-0.2.88/vendor/tau2/data_model/__init__.py +0 -0
- eval_protocol-0.2.88/vendor/tau2/data_model/message.py +203 -0
- eval_protocol-0.2.88/vendor/tau2/data_model/simulation.py +408 -0
- eval_protocol-0.2.88/vendor/tau2/data_model/tasks.py +443 -0
- eval_protocol-0.2.88/vendor/tau2/domains/__init__.py +0 -0
- eval_protocol-0.2.88/vendor/tau2/domains/airline/__init__.py +1 -0
- eval_protocol-0.2.88/vendor/tau2/domains/airline/data_model.py +240 -0
- eval_protocol-0.2.88/vendor/tau2/domains/airline/environment.py +37 -0
- eval_protocol-0.2.88/vendor/tau2/domains/airline/tools.py +701 -0
- eval_protocol-0.2.88/vendor/tau2/domains/airline/utils.py +6 -0
- eval_protocol-0.2.88/vendor/tau2/domains/mock/__init__.py +1 -0
- eval_protocol-0.2.88/vendor/tau2/domains/mock/data_model.py +32 -0
- eval_protocol-0.2.88/vendor/tau2/domains/mock/environment.py +39 -0
- eval_protocol-0.2.88/vendor/tau2/domains/mock/tools.py +121 -0
- eval_protocol-0.2.88/vendor/tau2/domains/mock/utils.py +7 -0
- eval_protocol-0.2.88/vendor/tau2/domains/retail/__init__.py +1 -0
- eval_protocol-0.2.88/vendor/tau2/domains/retail/data_model.py +195 -0
- eval_protocol-0.2.88/vendor/tau2/domains/retail/environment.py +37 -0
- eval_protocol-0.2.88/vendor/tau2/domains/retail/tools.py +701 -0
- eval_protocol-0.2.88/vendor/tau2/domains/retail/utils.py +6 -0
- eval_protocol-0.2.88/vendor/tau2/domains/telecom/__init__.py +1 -0
- eval_protocol-0.2.88/vendor/tau2/domains/telecom/data_model.py +206 -0
- eval_protocol-0.2.88/vendor/tau2/domains/telecom/environment.py +172 -0
- eval_protocol-0.2.88/vendor/tau2/domains/telecom/tasks/__init__.py +0 -0
- eval_protocol-0.2.88/vendor/tau2/domains/telecom/tasks/const.py +26 -0
- eval_protocol-0.2.88/vendor/tau2/domains/telecom/tasks/create_tasks.py +92 -0
- eval_protocol-0.2.88/vendor/tau2/domains/telecom/tasks/manager.py +209 -0
- eval_protocol-0.2.88/vendor/tau2/domains/telecom/tasks/mms_issues.py +325 -0
- eval_protocol-0.2.88/vendor/tau2/domains/telecom/tasks/mobile_data_issues.py +541 -0
- eval_protocol-0.2.88/vendor/tau2/domains/telecom/tasks/service_issues.py +452 -0
- eval_protocol-0.2.88/vendor/tau2/domains/telecom/tasks/utils.py +124 -0
- eval_protocol-0.2.88/vendor/tau2/domains/telecom/tools.py +752 -0
- eval_protocol-0.2.88/vendor/tau2/domains/telecom/user_data_model.py +392 -0
- eval_protocol-0.2.88/vendor/tau2/domains/telecom/user_tools.py +1109 -0
- eval_protocol-0.2.88/vendor/tau2/domains/telecom/utils.py +26 -0
- eval_protocol-0.2.88/vendor/tau2/environment/__init__.py +0 -0
- eval_protocol-0.2.88/vendor/tau2/environment/db.py +41 -0
- eval_protocol-0.2.88/vendor/tau2/environment/environment.py +391 -0
- eval_protocol-0.2.88/vendor/tau2/environment/server.py +223 -0
- eval_protocol-0.2.88/vendor/tau2/environment/tool.py +216 -0
- eval_protocol-0.2.88/vendor/tau2/environment/toolkit.py +206 -0
- eval_protocol-0.2.88/vendor/tau2/environment/utils/interface_agent.py +255 -0
- eval_protocol-0.2.88/vendor/tau2/evaluator/__init__.py +0 -0
- eval_protocol-0.2.88/vendor/tau2/evaluator/evaluator.py +129 -0
- eval_protocol-0.2.88/vendor/tau2/evaluator/evaluator_action.py +86 -0
- eval_protocol-0.2.88/vendor/tau2/evaluator/evaluator_base.py +26 -0
- eval_protocol-0.2.88/vendor/tau2/evaluator/evaluator_communicate.py +83 -0
- eval_protocol-0.2.88/vendor/tau2/evaluator/evaluator_env.py +140 -0
- eval_protocol-0.2.88/vendor/tau2/evaluator/evaluator_nl_assertions.py +145 -0
- eval_protocol-0.2.88/vendor/tau2/metrics/__init__.py +0 -0
- eval_protocol-0.2.88/vendor/tau2/metrics/agent_metrics.py +139 -0
- eval_protocol-0.2.88/vendor/tau2/metrics/break_down_metrics.py +124 -0
- eval_protocol-0.2.88/vendor/tau2/orchestrator/__init__.py +0 -0
- eval_protocol-0.2.88/vendor/tau2/orchestrator/environment_manager.py +259 -0
- eval_protocol-0.2.88/vendor/tau2/orchestrator/orchestrator.py +390 -0
- eval_protocol-0.2.88/vendor/tau2/orchestrator/utils.py +8 -0
- eval_protocol-0.2.88/vendor/tau2/registry.py +192 -0
- eval_protocol-0.2.88/vendor/tau2/run.py +508 -0
- eval_protocol-0.2.88/vendor/tau2/scripts/__init__.py +0 -0
- eval_protocol-0.2.88/vendor/tau2/scripts/check_data.py +32 -0
- eval_protocol-0.2.88/vendor/tau2/scripts/show_domain_doc.py +77 -0
- eval_protocol-0.2.88/vendor/tau2/scripts/start_servers.py +97 -0
- eval_protocol-0.2.88/vendor/tau2/scripts/view_simulations.py +268 -0
- eval_protocol-0.2.88/vendor/tau2/user/__init__.py +0 -0
- eval_protocol-0.2.88/vendor/tau2/user/base.py +144 -0
- eval_protocol-0.2.88/vendor/tau2/user/user_simulator.py +200 -0
- eval_protocol-0.2.88/vendor/tau2/utils/__init__.py +3 -0
- eval_protocol-0.2.88/vendor/tau2/utils/display.py +490 -0
- eval_protocol-0.2.88/vendor/tau2/utils/io_utils.py +75 -0
- eval_protocol-0.2.88/vendor/tau2/utils/llm_utils.py +305 -0
- eval_protocol-0.2.88/vendor/tau2/utils/pydantic_utils.py +32 -0
- eval_protocol-0.2.88/vendor/tau2/utils/utils.py +77 -0
- eval_protocol-0.2.88/versioneer.py +2305 -0
- eval_protocol-0.2.88/vite-app/dist/assets/favicon-BkAAWQga.png +0 -0
- eval_protocol-0.2.88/vite-app/dist/assets/index-BIhepl19.css +1 -0
- eval_protocol-0.2.88/vite-app/dist/assets/index-DaovgarD.js +137 -0
- eval_protocol-0.2.88/vite-app/dist/assets/index-DaovgarD.js.map +1 -0
- eval_protocol-0.2.88/vite-app/dist/assets/logo-light-BprIBJQW.png +0 -0
- eval_protocol-0.2.88/vite-app/dist/index.html +14 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Fireworks AI, Inc.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: eval-protocol
|
|
3
|
+
Version: 0.2.88
|
|
4
|
+
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
|
+
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/fireworks-ai/eval-protocol
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: requests>=2.25.0
|
|
14
|
+
Requires-Dist: pydantic>=2.0.0
|
|
15
|
+
Requires-Dist: dataclasses-json>=0.5.7
|
|
16
|
+
Requires-Dist: uvicorn>=0.15.0
|
|
17
|
+
Requires-Dist: python-dotenv>=0.19.0
|
|
18
|
+
Requires-Dist: openai>=1.78.1
|
|
19
|
+
Requires-Dist: aiosqlite
|
|
20
|
+
Requires-Dist: aiohttp
|
|
21
|
+
Requires-Dist: mcp>=1.9.2
|
|
22
|
+
Requires-Dist: PyYAML>=5.0
|
|
23
|
+
Requires-Dist: hydra-core>=1.3.2
|
|
24
|
+
Requires-Dist: omegaconf>=2.3.0
|
|
25
|
+
Requires-Dist: httpx>=0.24.0
|
|
26
|
+
Requires-Dist: anthropic>=0.59.0
|
|
27
|
+
Requires-Dist: litellm<1.75.0
|
|
28
|
+
Requires-Dist: pytest>=6.0.0
|
|
29
|
+
Requires-Dist: pytest-asyncio>=0.21.0
|
|
30
|
+
Requires-Dist: peewee>=3.18.2
|
|
31
|
+
Requires-Dist: backoff>=2.2.0
|
|
32
|
+
Requires-Dist: questionary>=2.0.0
|
|
33
|
+
Requires-Dist: toml>=0.10.0
|
|
34
|
+
Requires-Dist: loguru>=0.6.0
|
|
35
|
+
Requires-Dist: docstring-parser>=0.15
|
|
36
|
+
Requires-Dist: rich>=12.0.0
|
|
37
|
+
Requires-Dist: psutil>=5.8.0
|
|
38
|
+
Requires-Dist: addict>=2.4.0
|
|
39
|
+
Requires-Dist: deepdiff>=6.0.0
|
|
40
|
+
Requires-Dist: websockets>=15.0.1
|
|
41
|
+
Requires-Dist: fastapi>=0.116.1
|
|
42
|
+
Provides-Extra: dev
|
|
43
|
+
Requires-Dist: build; extra == "dev"
|
|
44
|
+
Requires-Dist: twine; extra == "dev"
|
|
45
|
+
Requires-Dist: pytest-httpserver; extra == "dev"
|
|
46
|
+
Requires-Dist: werkzeug>=2.0.0; extra == "dev"
|
|
47
|
+
Requires-Dist: ruff>=0.5.0; extra == "dev"
|
|
48
|
+
Requires-Dist: transformers>=4.0.0; extra == "dev"
|
|
49
|
+
Requires-Dist: pandas>=1.5.0; extra == "dev"
|
|
50
|
+
Requires-Dist: types-setuptools; extra == "dev"
|
|
51
|
+
Requires-Dist: types-requests; extra == "dev"
|
|
52
|
+
Requires-Dist: types-PyYAML; extra == "dev"
|
|
53
|
+
Requires-Dist: types-docker; extra == "dev"
|
|
54
|
+
Requires-Dist: versioneer>=0.20; extra == "dev"
|
|
55
|
+
Requires-Dist: openai>=1.78.1; extra == "dev"
|
|
56
|
+
Requires-Dist: pre-commit; extra == "dev"
|
|
57
|
+
Requires-Dist: e2b; extra == "dev"
|
|
58
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
59
|
+
Requires-Dist: pytest-xdist; extra == "dev"
|
|
60
|
+
Requires-Dist: docker==7.1.0; extra == "dev"
|
|
61
|
+
Requires-Dist: ipykernel>=6.30.0; extra == "dev"
|
|
62
|
+
Requires-Dist: jupyter>=1.1.1; extra == "dev"
|
|
63
|
+
Requires-Dist: pip>=25.1.1; extra == "dev"
|
|
64
|
+
Requires-Dist: haikus==0.3.8; extra == "dev"
|
|
65
|
+
Requires-Dist: syrupy>=4.0.0; extra == "dev"
|
|
66
|
+
Requires-Dist: gymnasium>=1.2.0; extra == "dev"
|
|
67
|
+
Provides-Extra: trl
|
|
68
|
+
Requires-Dist: torch>=1.9; extra == "trl"
|
|
69
|
+
Requires-Dist: trl>=0.7.0; extra == "trl"
|
|
70
|
+
Requires-Dist: peft>=0.7.0; extra == "trl"
|
|
71
|
+
Requires-Dist: transformers>=4.0.0; extra == "trl"
|
|
72
|
+
Requires-Dist: accelerate>=0.28.0; extra == "trl"
|
|
73
|
+
Provides-Extra: openevals
|
|
74
|
+
Requires-Dist: openevals>=0.1.0; extra == "openevals"
|
|
75
|
+
Provides-Extra: fireworks
|
|
76
|
+
Requires-Dist: fireworks-ai>=0.19.19; extra == "fireworks"
|
|
77
|
+
Provides-Extra: box2d
|
|
78
|
+
Requires-Dist: swig; extra == "box2d"
|
|
79
|
+
Requires-Dist: gymnasium[box2d]>=0.29.0; extra == "box2d"
|
|
80
|
+
Requires-Dist: Pillow; extra == "box2d"
|
|
81
|
+
Provides-Extra: langfuse
|
|
82
|
+
Requires-Dist: langfuse>=2.0.0; extra == "langfuse"
|
|
83
|
+
Provides-Extra: huggingface
|
|
84
|
+
Requires-Dist: datasets>=3.0.0; extra == "huggingface"
|
|
85
|
+
Requires-Dist: transformers>=4.0.0; extra == "huggingface"
|
|
86
|
+
Provides-Extra: langsmith
|
|
87
|
+
Requires-Dist: langsmith>=0.1.86; extra == "langsmith"
|
|
88
|
+
Provides-Extra: bigquery
|
|
89
|
+
Requires-Dist: google-cloud-bigquery>=3.0.0; extra == "bigquery"
|
|
90
|
+
Requires-Dist: google-auth>=2.0.0; extra == "bigquery"
|
|
91
|
+
Provides-Extra: svgbench
|
|
92
|
+
Requires-Dist: selenium>=4.0.0; extra == "svgbench"
|
|
93
|
+
Provides-Extra: pydantic
|
|
94
|
+
Requires-Dist: pydantic-ai>=1.0.2; extra == "pydantic"
|
|
95
|
+
Provides-Extra: supabase
|
|
96
|
+
Requires-Dist: supabase>=2.18.1; extra == "supabase"
|
|
97
|
+
Provides-Extra: chinook
|
|
98
|
+
Requires-Dist: psycopg2-binary>=2.9.10; extra == "chinook"
|
|
99
|
+
Provides-Extra: langchain
|
|
100
|
+
Requires-Dist: langchain-core>=0.3.0; extra == "langchain"
|
|
101
|
+
Provides-Extra: braintrust
|
|
102
|
+
Requires-Dist: braintrust[otel]; extra == "braintrust"
|
|
103
|
+
Provides-Extra: langgraph
|
|
104
|
+
Requires-Dist: langgraph>=0.6.7; extra == "langgraph"
|
|
105
|
+
Requires-Dist: langchain-core>=0.3.75; extra == "langgraph"
|
|
106
|
+
Provides-Extra: langgraph-tools
|
|
107
|
+
Requires-Dist: langgraph>=0.6.7; extra == "langgraph-tools"
|
|
108
|
+
Requires-Dist: langchain>=0.3.0; extra == "langgraph-tools"
|
|
109
|
+
Requires-Dist: langchain-fireworks>=0.3.0; extra == "langgraph-tools"
|
|
110
|
+
Provides-Extra: proxy
|
|
111
|
+
Requires-Dist: redis>=5.0.0; extra == "proxy"
|
|
112
|
+
Requires-Dist: langfuse>=2.0.0; extra == "proxy"
|
|
113
|
+
Requires-Dist: uuid6>=2025.0.0; extra == "proxy"
|
|
114
|
+
Dynamic: license-file
|
|
115
|
+
|
|
116
|
+
# Eval Protocol
|
|
117
|
+
|
|
118
|
+
[](https://pypi.org/project/eval-protocol/)
|
|
119
|
+
[](https://deepwiki.com/eval-protocol/python-sdk)
|
|
120
|
+
|
|
121
|
+
**Eval Protocol (EP) is an open solution for doing reinforcement learning fine-tuning on existing agents — across any language, container, or framework.**
|
|
122
|
+
|
|
123
|
+

|
|
124
|
+
|
|
125
|
+
Most teams already have complex agents running in production — often across remote services with heavy dependencies, Docker containers, or TypeScript backends deployed on Vercel. When they try to train or fine-tune these agents with reinforcement learning, connecting them to a trainer quickly becomes painful.
|
|
126
|
+
|
|
127
|
+
Eval Protocol makes this possible in two ways:
|
|
128
|
+
|
|
129
|
+
1. **Expose your agent through a simple API**
|
|
130
|
+
Wrap your existing agent (Python, TypeScript, Docker, etc.) in a simple HTTP service using EP’s rollout interface. EP handles the rollout orchestration, metadata passing, and trace storage automatically.
|
|
131
|
+
2. **Connect with any trainer**
|
|
132
|
+
Once your agent speaks the EP standard, it can be fine-tuned or evaluated with any supported trainer — Fireworks RFT, TRL, Unsloth, or your own — with no environment rewrites.
|
|
133
|
+
|
|
134
|
+
The result: RL that works out-of-the-box for existing production agents.
|
|
135
|
+
|
|
136
|
+
## Who This Is For
|
|
137
|
+
|
|
138
|
+
- **Applied AI teams** adding RL to existing production agents.
|
|
139
|
+
- **Research engineers** experimenting with fine-tuning complex, multi-turn or tool-using agents.
|
|
140
|
+
- **MLOps teams** building reproducible, language-agnostic rollout pipelines.
|
|
141
|
+
|
|
142
|
+
## Quickstart
|
|
143
|
+
|
|
144
|
+
- See the Quickstart repository: [eval-protocol/quickstart](https://github.com/eval-protocol/quickstart/tree/main)
|
|
145
|
+
|
|
146
|
+
## Resources
|
|
147
|
+
|
|
148
|
+
- **[Documentation](https://evalprotocol.io)** – Guides and API reference
|
|
149
|
+
- **[Discord](https://discord.com/channels/1137072072808472616/1400975572405850155)** – Community
|
|
150
|
+
- **[GitHub](https://github.com/eval-protocol/python-sdk)** – Source and examples
|
|
151
|
+
|
|
152
|
+
## License
|
|
153
|
+
|
|
154
|
+
[MIT](LICENSE)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Eval Protocol
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/eval-protocol/)
|
|
4
|
+
[](https://deepwiki.com/eval-protocol/python-sdk)
|
|
5
|
+
|
|
6
|
+
**Eval Protocol (EP) is an open solution for doing reinforcement learning fine-tuning on existing agents — across any language, container, or framework.**
|
|
7
|
+
|
|
8
|
+

|
|
9
|
+
|
|
10
|
+
Most teams already have complex agents running in production — often across remote services with heavy dependencies, Docker containers, or TypeScript backends deployed on Vercel. When they try to train or fine-tune these agents with reinforcement learning, connecting them to a trainer quickly becomes painful.
|
|
11
|
+
|
|
12
|
+
Eval Protocol makes this possible in two ways:
|
|
13
|
+
|
|
14
|
+
1. **Expose your agent through a simple API**
|
|
15
|
+
Wrap your existing agent (Python, TypeScript, Docker, etc.) in a simple HTTP service using EP’s rollout interface. EP handles the rollout orchestration, metadata passing, and trace storage automatically.
|
|
16
|
+
2. **Connect with any trainer**
|
|
17
|
+
Once your agent speaks the EP standard, it can be fine-tuned or evaluated with any supported trainer — Fireworks RFT, TRL, Unsloth, or your own — with no environment rewrites.
|
|
18
|
+
|
|
19
|
+
The result: RL that works out-of-the-box for existing production agents.
|
|
20
|
+
|
|
21
|
+
## Who This Is For
|
|
22
|
+
|
|
23
|
+
- **Applied AI teams** adding RL to existing production agents.
|
|
24
|
+
- **Research engineers** experimenting with fine-tuning complex, multi-turn or tool-using agents.
|
|
25
|
+
- **MLOps teams** building reproducible, language-agnostic rollout pipelines.
|
|
26
|
+
|
|
27
|
+
## Quickstart
|
|
28
|
+
|
|
29
|
+
- See the Quickstart repository: [eval-protocol/quickstart](https://github.com/eval-protocol/quickstart/tree/main)
|
|
30
|
+
|
|
31
|
+
## Resources
|
|
32
|
+
|
|
33
|
+
- **[Documentation](https://evalprotocol.io)** – Guides and API reference
|
|
34
|
+
- **[Discord](https://discord.com/channels/1137072072808472616/1400975572405850155)** – Community
|
|
35
|
+
- **[GitHub](https://github.com/eval-protocol/python-sdk)** – Source and examples
|
|
36
|
+
|
|
37
|
+
## License
|
|
38
|
+
|
|
39
|
+
[MIT](LICENSE)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# This file makes the 'development' directory a Python package.
|