eval-protocol 0.2.80.dev2__py3-none-any.whl → 0.2.80.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eval_protocol/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-11-04T16:22:11-0800",
11
+ "date": "2025-11-05T01:57:48-0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "68c7a2df7942ab27516e2d6c12300cc824c999ed",
15
- "version": "0.2.80-dev2"
14
+ "full-revisionid": "6b9f1331a8ab25d0f057d3426a77849cb1b0a2f0",
15
+ "version": "0.2.80-dev4"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -46,7 +46,7 @@ def frozen_lake_to_evaluation_row(data: List[Dict[str, Any]]) -> List[Evaluation
46
46
  num_runs=1,
47
47
  max_concurrent_rollouts=3,
48
48
  mode="pointwise",
49
- server_script_path="examples/frozen_lake_mcp/server.py",
49
+ server_script_path="eval_protocol/mcp_servers/frozen_lake/server.py",
50
50
  )
51
51
  def test_frozen_lake_evaluation(row: EvaluationRow) -> EvaluationRow:
52
52
  """
@@ -704,11 +704,22 @@ def evaluation_test(
704
704
  )
705
705
  pytest_wrapper = pytest.mark.asyncio(pytest_wrapper)
706
706
 
707
+ ep_params: dict[str, Any] = {
708
+ "rollout_processor": rollout_processor,
709
+ "server_script_path": server_script_path,
710
+ "mcp_config_path": mcp_config_path,
711
+ "rollout_processor_kwargs": rollout_processor_kwargs,
712
+ "mode": mode,
713
+ }
714
+
715
+ print(f"ep_params: {ep_params}")
716
+
707
717
  # Create the dual mode wrapper
708
718
  dual_mode_wrapper = create_dual_mode_wrapper(
709
719
  test_func, mode, max_concurrent_rollouts, max_concurrent_evaluations, pytest_wrapper
710
720
  )
711
721
 
722
+ setattr(dual_mode_wrapper, "__ep_params__", ep_params)
712
723
  return dual_mode_wrapper # pyright: ignore[reportReturnType, reportUnknownVariableType]
713
724
 
714
725
  return decorator
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eval-protocol
3
- Version: 0.2.80.dev2
3
+ Version: 0.2.80.dev4
4
4
  Summary: The official Python SDK for Eval Protocol (EP.) EP is an open protocol that standardizes how developers author evals for large language model (LLM) applications.
5
5
  Author-email: Fireworks AI <info@fireworks.ai>
6
6
  License-Expression: MIT
@@ -5,7 +5,7 @@ development/utils/generate_api_key.py,sha256=hHCMFkzW4yxqwcn2ct5diDm-PR9cMX9XP7I
5
5
  development/utils/subprocess_manager.py,sha256=7n7rT9ji7h93i79SMrGS5RNesrnLFjdFON9_eQCmYNE,18937
6
6
  eval_protocol/__init__.py,sha256=I24OX8MFZBIXWeCh8Jhd8e5Qka8M4iXGKuXWme0hFD4,5150
7
7
  eval_protocol/__main__.py,sha256=FIW5fo2X2rsPThurSlZEuqvE1u0XNwJW1uoej_OhAAs,161
8
- eval_protocol/_version.py,sha256=Z1DVHr3ou0BAIC5WRc6WJzmt-b7JMkOrad17HxuG7xw,503
8
+ eval_protocol/_version.py,sha256=7grOGq0gDqO5M0v5p83ZJpv_yXRuuOYmjgrhD8nQMk0,503
9
9
  eval_protocol/auth.py,sha256=fKUYWw_UFOvmuMGra_ak-YYJXUoHo0I0_v_5xPy8S8Y,10215
10
10
  eval_protocol/cli.py,sha256=7DDyaTAACvlP7OG0OiPSFzGAYX4Rh_vYMJ743sMWSxs,27682
11
11
  eval_protocol/common_utils.py,sha256=2K7c2nMW7B3-wECbmAHBdZRIA1NjnUZIZnhCsNMRKu0,2883
@@ -60,7 +60,7 @@ eval_protocol/agent/resources/bfcl_envs/math_api.py,sha256=XcanEoJ8XFBQgR6_d9sdm
60
60
  eval_protocol/agent/resources/bfcl_envs/posting_api.py,sha256=uoeCByTF4kjLiwu6wYYdfiEn3ut_2WzRGjbAk21GrDM,5291
61
61
  eval_protocol/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
62
  eval_protocol/benchmarks/test_aime25.py,sha256=0ALzZt5EpQpvFx-kt99vVVUBMov2sL_K7Rh5rV3nAEo,4042
63
- eval_protocol/benchmarks/test_frozen_lake.py,sha256=iRK_6W9N9MWDkwUnSEJ8vy32_oYXVsTj4AsOMrIwMwA,2394
63
+ eval_protocol/benchmarks/test_frozen_lake.py,sha256=F_XrORo891q737WAXY_OM-LkYrDZRjMz0s12DAgf8yM,2407
64
64
  eval_protocol/benchmarks/test_gpqa.py,sha256=sPa08bw8X2w5Y_TxV32jujdyAJh-rThSaF1aq4YXhpQ,5414
65
65
  eval_protocol/benchmarks/test_livebench_data_analysis.py,sha256=p3m1dAlIRTpMqijpZ_S56KUQH_-1ex9Ge00TJeXBHiY,20097
66
66
  eval_protocol/benchmarks/test_tau_bench_airline.py,sha256=AM1Nll58F03AKF4LrFzPG0uyMZ-Q-eQqCq2cCiD8xnM,12164
@@ -173,7 +173,7 @@ eval_protocol/pytest/default_pydantic_ai_rollout_processor.py,sha256=L_gFvDNTKr9
173
173
  eval_protocol/pytest/default_single_turn_rollout_process.py,sha256=0zVICcrfP0LCFgVNRpzr-RSyGKK8EOETQgNASN3zjNM,7900
174
174
  eval_protocol/pytest/dual_mode_wrapper.py,sha256=wMRD0G4-GpFvh2ZaBxhcrRdNSxCWIi0RqhZ5eBJVMio,3969
175
175
  eval_protocol/pytest/elasticsearch_setup.py,sha256=NG9Qrn-w5NGpsQMjRgKDmhfhL1ANcCuVjSSLJo8OTzY,7061
176
- eval_protocol/pytest/evaluation_test.py,sha256=1tHFM34mJdt0lt2xTruSSPD6frcRddbraD-X8BIBb6s,40074
176
+ eval_protocol/pytest/evaluation_test.py,sha256=ZcP1m8b5ClAs3j6uI3YKbhEBCKBqZDZiE6ray3xXLQg,40474
177
177
  eval_protocol/pytest/evaluation_test_postprocess.py,sha256=07WSF2j1zk8ug3zCqnpg0Ve1K3rClGqljDHXHA3BinQ,9596
178
178
  eval_protocol/pytest/evaluation_test_utils.py,sha256=QcXTyQKpIP4PxQgC2Bd3m-p3WRuiF6sGDuO44UYAl34,20302
179
179
  eval_protocol/pytest/exception_config.py,sha256=ZDduPfjAC99Xs0d3sqzCBS6otfFbVWcLDGZQvyrq3oo,4947
@@ -248,7 +248,7 @@ eval_protocol/utils/show_results_url.py,sha256=PHM6dWtCUiuV5WQgvHegnxY7ofkE4b9wO
248
248
  eval_protocol/utils/static_policy.py,sha256=fiKnOS06EG5OB6p5An_yY_dLAvVboYnC4Sqx5z_v3-g,10716
249
249
  eval_protocol/utils/subprocess_utils.py,sha256=2EcoVNLSlfdxwQn-2pscqjiGpBR4Ho8kfRnmzmew-1w,3504
250
250
  eval_protocol/utils/vite_server.py,sha256=0Tfh1LfTqYpFZxkO2syrF5I0cBEJHFmYXd2N4CWkca8,5051
251
- eval_protocol-0.2.80.dev2.dist-info/licenses/LICENSE,sha256=OzeIb507xW9AVhGMqqHpoL_EFRJUo8Sb7A3LN5NqFfQ,1075
251
+ eval_protocol-0.2.80.dev4.dist-info/licenses/LICENSE,sha256=OzeIb507xW9AVhGMqqHpoL_EFRJUo8Sb7A3LN5NqFfQ,1075
252
252
  vendor/tau2/__init__.py,sha256=EQMX_v8x-YBV24ia35_nLkf5MrC6aAuT_M5m7IJcl3k,541
253
253
  vendor/tau2/cli.py,sha256=lhJocXCDxEfdv7gIxya5b0w5J5qebpgrg_ZTpjGp_ww,7515
254
254
  vendor/tau2/config.py,sha256=LrkKRGSFH4Cvf9CNO-MttJMvIia0a2zP1uKVnUQi6B8,1278
@@ -348,8 +348,8 @@ vite-app/dist/assets/index-BGlGI2LH.css,sha256=xsnvbJ70EzoAdWwqDe2frphbsELuQdyGF
348
348
  vite-app/dist/assets/index-CnGlFAnP.js,sha256=VYfX3a8Kx7tXmfCYk12YfCGUjX3XI7WIliD74xpIclg,868987
349
349
  vite-app/dist/assets/index-CnGlFAnP.js.map,sha256=UFXiELpTt9H1hObSUF14I6aa53euAw9LzZaW_MqLsb4,3869740
350
350
  vite-app/dist/assets/logo-light-BprIBJQW.png,sha256=rRXC24eqrQO3y--N493THrD48WQVAhSVMHM_iDKy250,21694
351
- eval_protocol-0.2.80.dev2.dist-info/METADATA,sha256=QMKQy1kfa8pIHnzCmunKFEUWo6XXl41qWl6Ot2fzQYY,7664
352
- eval_protocol-0.2.80.dev2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
353
- eval_protocol-0.2.80.dev2.dist-info/entry_points.txt,sha256=CebRaxbWXly21zPN1fbyAw26kNUU2dv7zZyGkXxtFVw,183
354
- eval_protocol-0.2.80.dev2.dist-info/top_level.txt,sha256=8jjn7dpvLPL4RX2JBeAfPPMOR6x6f7E4o4yFiKLEHuw,33
355
- eval_protocol-0.2.80.dev2.dist-info/RECORD,,
351
+ eval_protocol-0.2.80.dev4.dist-info/METADATA,sha256=LhN588CPYMxfKkqFutbejtpjgsF5msOjoh7rZc2951A,7664
352
+ eval_protocol-0.2.80.dev4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
353
+ eval_protocol-0.2.80.dev4.dist-info/entry_points.txt,sha256=CebRaxbWXly21zPN1fbyAw26kNUU2dv7zZyGkXxtFVw,183
354
+ eval_protocol-0.2.80.dev4.dist-info/top_level.txt,sha256=8jjn7dpvLPL4RX2JBeAfPPMOR6x6f7E4o4yFiKLEHuw,33
355
+ eval_protocol-0.2.80.dev4.dist-info/RECORD,,