eval-protocol 0.2.80.dev2__py3-none-any.whl → 0.2.80.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eval_protocol/_version.py +3 -3
- eval_protocol/benchmarks/test_frozen_lake.py +1 -1
- eval_protocol/pytest/evaluation_test.py +11 -0
- {eval_protocol-0.2.80.dev2.dist-info → eval_protocol-0.2.80.dev4.dist-info}/METADATA +1 -1
- {eval_protocol-0.2.80.dev2.dist-info → eval_protocol-0.2.80.dev4.dist-info}/RECORD +9 -9
- {eval_protocol-0.2.80.dev2.dist-info → eval_protocol-0.2.80.dev4.dist-info}/WHEEL +0 -0
- {eval_protocol-0.2.80.dev2.dist-info → eval_protocol-0.2.80.dev4.dist-info}/entry_points.txt +0 -0
- {eval_protocol-0.2.80.dev2.dist-info → eval_protocol-0.2.80.dev4.dist-info}/licenses/LICENSE +0 -0
- {eval_protocol-0.2.80.dev2.dist-info → eval_protocol-0.2.80.dev4.dist-info}/top_level.txt +0 -0
eval_protocol/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2025-11-
|
|
11
|
+
"date": "2025-11-05T01:57:48-0800",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "0.2.80-
|
|
14
|
+
"full-revisionid": "6b9f1331a8ab25d0f057d3426a77849cb1b0a2f0",
|
|
15
|
+
"version": "0.2.80-dev4"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -46,7 +46,7 @@ def frozen_lake_to_evaluation_row(data: List[Dict[str, Any]]) -> List[Evaluation
|
|
|
46
46
|
num_runs=1,
|
|
47
47
|
max_concurrent_rollouts=3,
|
|
48
48
|
mode="pointwise",
|
|
49
|
-
server_script_path="
|
|
49
|
+
server_script_path="eval_protocol/mcp_servers/frozen_lake/server.py",
|
|
50
50
|
)
|
|
51
51
|
def test_frozen_lake_evaluation(row: EvaluationRow) -> EvaluationRow:
|
|
52
52
|
"""
|
|
@@ -704,11 +704,22 @@ def evaluation_test(
|
|
|
704
704
|
)
|
|
705
705
|
pytest_wrapper = pytest.mark.asyncio(pytest_wrapper)
|
|
706
706
|
|
|
707
|
+
ep_params: dict[str, Any] = {
|
|
708
|
+
"rollout_processor": rollout_processor,
|
|
709
|
+
"server_script_path": server_script_path,
|
|
710
|
+
"mcp_config_path": mcp_config_path,
|
|
711
|
+
"rollout_processor_kwargs": rollout_processor_kwargs,
|
|
712
|
+
"mode": mode,
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
print(f"ep_params: {ep_params}")
|
|
716
|
+
|
|
707
717
|
# Create the dual mode wrapper
|
|
708
718
|
dual_mode_wrapper = create_dual_mode_wrapper(
|
|
709
719
|
test_func, mode, max_concurrent_rollouts, max_concurrent_evaluations, pytest_wrapper
|
|
710
720
|
)
|
|
711
721
|
|
|
722
|
+
setattr(dual_mode_wrapper, "__ep_params__", ep_params)
|
|
712
723
|
return dual_mode_wrapper # pyright: ignore[reportReturnType, reportUnknownVariableType]
|
|
713
724
|
|
|
714
725
|
return decorator
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-protocol
|
|
3
|
-
Version: 0.2.80.
|
|
3
|
+
Version: 0.2.80.dev4
|
|
4
4
|
Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
|
|
5
5
|
Author-email: Fireworks AI <info@fireworks.ai>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -5,7 +5,7 @@ development/utils/generate_api_key.py,sha256=hHCMFkzW4yxqwcn2ct5diDm-PR9cMX9XP7I
|
|
|
5
5
|
development/utils/subprocess_manager.py,sha256=7n7rT9ji7h93i79SMrGS5RNesrnLFjdFON9_eQCmYNE,18937
|
|
6
6
|
eval_protocol/__init__.py,sha256=I24OX8MFZBIXWeCh8Jhd8e5Qka8M4iXGKuXWme0hFD4,5150
|
|
7
7
|
eval_protocol/__main__.py,sha256=FIW5fo2X2rsPThurSlZEuqvE1u0XNwJW1uoej_OhAAs,161
|
|
8
|
-
eval_protocol/_version.py,sha256=
|
|
8
|
+
eval_protocol/_version.py,sha256=7grOGq0gDqO5M0v5p83ZJpv_yXRuuOYmjgrhD8nQMk0,503
|
|
9
9
|
eval_protocol/auth.py,sha256=fKUYWw_UFOvmuMGra_ak-YYJXUoHo0I0_v_5xPy8S8Y,10215
|
|
10
10
|
eval_protocol/cli.py,sha256=7DDyaTAACvlP7OG0OiPSFzGAYX4Rh_vYMJ743sMWSxs,27682
|
|
11
11
|
eval_protocol/common_utils.py,sha256=2K7c2nMW7B3-wECbmAHBdZRIA1NjnUZIZnhCsNMRKu0,2883
|
|
@@ -60,7 +60,7 @@ eval_protocol/agent/resources/bfcl_envs/math_api.py,sha256=XcanEoJ8XFBQgR6_d9sdm
|
|
|
60
60
|
eval_protocol/agent/resources/bfcl_envs/posting_api.py,sha256=uoeCByTF4kjLiwu6wYYdfiEn3ut_2WzRGjbAk21GrDM,5291
|
|
61
61
|
eval_protocol/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
62
|
eval_protocol/benchmarks/test_aime25.py,sha256=0ALzZt5EpQpvFx-kt99vVVUBMov2sL_K7Rh5rV3nAEo,4042
|
|
63
|
-
eval_protocol/benchmarks/test_frozen_lake.py,sha256=
|
|
63
|
+
eval_protocol/benchmarks/test_frozen_lake.py,sha256=F_XrORo891q737WAXY_OM-LkYrDZRjMz0s12DAgf8yM,2407
|
|
64
64
|
eval_protocol/benchmarks/test_gpqa.py,sha256=sPa08bw8X2w5Y_TxV32jujdyAJh-rThSaF1aq4YXhpQ,5414
|
|
65
65
|
eval_protocol/benchmarks/test_livebench_data_analysis.py,sha256=p3m1dAlIRTpMqijpZ_S56KUQH_-1ex9Ge00TJeXBHiY,20097
|
|
66
66
|
eval_protocol/benchmarks/test_tau_bench_airline.py,sha256=AM1Nll58F03AKF4LrFzPG0uyMZ-Q-eQqCq2cCiD8xnM,12164
|
|
@@ -173,7 +173,7 @@ eval_protocol/pytest/default_pydantic_ai_rollout_processor.py,sha256=L_gFvDNTKr9
|
|
|
173
173
|
eval_protocol/pytest/default_single_turn_rollout_process.py,sha256=0zVICcrfP0LCFgVNRpzr-RSyGKK8EOETQgNASN3zjNM,7900
|
|
174
174
|
eval_protocol/pytest/dual_mode_wrapper.py,sha256=wMRD0G4-GpFvh2ZaBxhcrRdNSxCWIi0RqhZ5eBJVMio,3969
|
|
175
175
|
eval_protocol/pytest/elasticsearch_setup.py,sha256=NG9Qrn-w5NGpsQMjRgKDmhfhL1ANcCuVjSSLJo8OTzY,7061
|
|
176
|
-
eval_protocol/pytest/evaluation_test.py,sha256=
|
|
176
|
+
eval_protocol/pytest/evaluation_test.py,sha256=ZcP1m8b5ClAs3j6uI3YKbhEBCKBqZDZiE6ray3xXLQg,40474
|
|
177
177
|
eval_protocol/pytest/evaluation_test_postprocess.py,sha256=07WSF2j1zk8ug3zCqnpg0Ve1K3rClGqljDHXHA3BinQ,9596
|
|
178
178
|
eval_protocol/pytest/evaluation_test_utils.py,sha256=QcXTyQKpIP4PxQgC2Bd3m-p3WRuiF6sGDuO44UYAl34,20302
|
|
179
179
|
eval_protocol/pytest/exception_config.py,sha256=ZDduPfjAC99Xs0d3sqzCBS6otfFbVWcLDGZQvyrq3oo,4947
|
|
@@ -248,7 +248,7 @@ eval_protocol/utils/show_results_url.py,sha256=PHM6dWtCUiuV5WQgvHegnxY7ofkE4b9wO
|
|
|
248
248
|
eval_protocol/utils/static_policy.py,sha256=fiKnOS06EG5OB6p5An_yY_dLAvVboYnC4Sqx5z_v3-g,10716
|
|
249
249
|
eval_protocol/utils/subprocess_utils.py,sha256=2EcoVNLSlfdxwQn-2pscqjiGpBR4Ho8kfRnmzmew-1w,3504
|
|
250
250
|
eval_protocol/utils/vite_server.py,sha256=0Tfh1LfTqYpFZxkO2syrF5I0cBEJHFmYXd2N4CWkca8,5051
|
|
251
|
-
eval_protocol-0.2.80.
|
|
251
|
+
eval_protocol-0.2.80.dev4.dist-info/licenses/LICENSE,sha256=OzeIb507xW9AVhGMqqHpoL_EFRJUo8Sb7A3LN5NqFfQ,1075
|
|
252
252
|
vendor/tau2/__init__.py,sha256=EQMX_v8x-YBV24ia35_nLkf5MrC6aAuT_M5m7IJcl3k,541
|
|
253
253
|
vendor/tau2/cli.py,sha256=lhJocXCDxEfdv7gIxya5b0w5J5qebpgrg_ZTpjGp_ww,7515
|
|
254
254
|
vendor/tau2/config.py,sha256=LrkKRGSFH4Cvf9CNO-MttJMvIia0a2zP1uKVnUQi6B8,1278
|
|
@@ -348,8 +348,8 @@ vite-app/dist/assets/index-BGlGI2LH.css,sha256=xsnvbJ70EzoAdWwqDe2frphbsELuQdyGF
|
|
|
348
348
|
vite-app/dist/assets/index-CnGlFAnP.js,sha256=VYfX3a8Kx7tXmfCYk12YfCGUjX3XI7WIliD74xpIclg,868987
|
|
349
349
|
vite-app/dist/assets/index-CnGlFAnP.js.map,sha256=UFXiELpTt9H1hObSUF14I6aa53euAw9LzZaW_MqLsb4,3869740
|
|
350
350
|
vite-app/dist/assets/logo-light-BprIBJQW.png,sha256=rRXC24eqrQO3y--N493THrD48WQVAhSVMHM_iDKy250,21694
|
|
351
|
-
eval_protocol-0.2.80.
|
|
352
|
-
eval_protocol-0.2.80.
|
|
353
|
-
eval_protocol-0.2.80.
|
|
354
|
-
eval_protocol-0.2.80.
|
|
355
|
-
eval_protocol-0.2.80.
|
|
351
|
+
eval_protocol-0.2.80.dev4.dist-info/METADATA,sha256=LhN588CPYMxfKkqFutbejtpjgsF5msOjoh7rZc2951A,7664
|
|
352
|
+
eval_protocol-0.2.80.dev4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
353
|
+
eval_protocol-0.2.80.dev4.dist-info/entry_points.txt,sha256=CebRaxbWXly21zPN1fbyAw26kNUU2dv7zZyGkXxtFVw,183
|
|
354
|
+
eval_protocol-0.2.80.dev4.dist-info/top_level.txt,sha256=8jjn7dpvLPL4RX2JBeAfPPMOR6x6f7E4o4yFiKLEHuw,33
|
|
355
|
+
eval_protocol-0.2.80.dev4.dist-info/RECORD,,
|
|
File without changes
|
{eval_protocol-0.2.80.dev2.dist-info → eval_protocol-0.2.80.dev4.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{eval_protocol-0.2.80.dev2.dist-info → eval_protocol-0.2.80.dev4.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|