crfm-helm 0.5.8__py3-none-any.whl → 0.5.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (121) hide show
  1. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/METADATA +3 -1
  2. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/RECORD +117 -115
  3. helm/benchmark/adaptation/adapter_spec.py +5 -0
  4. helm/benchmark/metrics/bbq_metrics.py +12 -0
  5. helm/benchmark/metrics/evaluate_reference_metrics.py +12 -0
  6. helm/benchmark/metrics/safety_metrics.py +13 -1
  7. helm/benchmark/metrics/ultra_suite_asr_classification_metrics.py +52 -0
  8. helm/benchmark/presentation/run_display.py +13 -3
  9. helm/benchmark/presentation/run_entry.py +2 -2
  10. helm/benchmark/run.py +1 -1
  11. helm/benchmark/run_specs/arabic_run_specs.py +6 -0
  12. helm/benchmark/run_specs/medhelm_run_specs.py +2 -2
  13. helm/benchmark/run_specs/speech_disorder_audio_run_specs.py +6 -2
  14. helm/benchmark/scenarios/anthropic_red_team_scenario.py +12 -1
  15. helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification_scenario.py +24 -54
  16. helm/benchmark/scenarios/audio_language/ultra_suite_asr_transcription_scenario.py +19 -48
  17. helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py +22 -61
  18. helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py +21 -29
  19. helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py +21 -60
  20. helm/benchmark/scenarios/banking77_scenario.py +21 -0
  21. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  22. helm/benchmark/scenarios/bird_sql_scenario.py +18 -0
  23. helm/benchmark/scenarios/commonsense_scenario.py +7 -1
  24. helm/benchmark/scenarios/czech_bank_qa_scenario.py +18 -0
  25. helm/benchmark/scenarios/fin_qa_scenario.py +20 -0
  26. helm/benchmark/scenarios/financebench_scenario.py +21 -0
  27. helm/benchmark/scenarios/gsm_scenario.py +9 -3
  28. helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +12 -1
  29. helm/benchmark/scenarios/harm_bench_scenario.py +12 -1
  30. helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py +21 -0
  31. helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py +19 -0
  32. helm/benchmark/scenarios/legalbench_scenario.py +6 -7
  33. helm/benchmark/scenarios/math_scenario.py +11 -4
  34. helm/benchmark/scenarios/med_qa_scenario.py +7 -1
  35. helm/benchmark/scenarios/medi_qa_scenario.py +2 -2
  36. helm/benchmark/scenarios/mmlu_scenario.py +8 -2
  37. helm/benchmark/scenarios/narrativeqa_scenario.py +3 -4
  38. helm/benchmark/scenarios/openai_mrcr_scenario.py +15 -0
  39. helm/benchmark/scenarios/ruler_qa_scenarios.py +40 -0
  40. helm/benchmark/scenarios/simple_safety_tests_scenario.py +12 -1
  41. helm/benchmark/scenarios/spider_scenario.py +18 -0
  42. helm/benchmark/scenarios/thai_exam_scenario.py +95 -0
  43. helm/benchmark/scenarios/wmt_14_scenario.py +9 -2
  44. helm/benchmark/static/schema_long_context.yaml +12 -31
  45. helm/benchmark/static_build/assets/audio-table-Dn5NMMeJ.png +0 -0
  46. helm/benchmark/static_build/assets/index-qOFpOyHb.js +10 -0
  47. helm/benchmark/static_build/assets/react-BteFIppM.js +85 -0
  48. helm/benchmark/static_build/assets/recharts-DxuQtTOs.js +97 -0
  49. helm/benchmark/static_build/assets/tremor-DR4fE7ko.js +10 -0
  50. helm/benchmark/static_build/index.html +5 -6
  51. helm/clients/ai21_client.py +2 -0
  52. helm/clients/aleph_alpha_client.py +2 -0
  53. helm/clients/anthropic_client.py +7 -1
  54. helm/clients/audio_language/diva_llama_client.py +2 -0
  55. helm/clients/audio_language/llama_omni_client.py +2 -1
  56. helm/clients/audio_language/qwen2_5_omni_client.py +2 -1
  57. helm/clients/audio_language/qwen2_audiolm_client.py +2 -1
  58. helm/clients/audio_language/qwen_audiolm_client.py +2 -1
  59. helm/clients/bedrock_client.py +2 -0
  60. helm/clients/cohere_client.py +3 -0
  61. helm/clients/google_client.py +2 -0
  62. helm/clients/http_model_client.py +2 -0
  63. helm/clients/huggingface_client.py +2 -1
  64. helm/clients/ibm_client.py +3 -1
  65. helm/clients/image_generation/adobe_vision_client.py +2 -0
  66. helm/clients/image_generation/aleph_alpha_image_generation_client.py +2 -0
  67. helm/clients/image_generation/cogview2_client.py +2 -1
  68. helm/clients/image_generation/dalle2_client.py +2 -0
  69. helm/clients/image_generation/dalle_mini_client.py +2 -1
  70. helm/clients/image_generation/deep_floyd_client.py +2 -0
  71. helm/clients/image_generation/huggingface_diffusers_client.py +2 -1
  72. helm/clients/image_generation/lexica_client.py +2 -0
  73. helm/clients/image_generation/mindalle_client.py +2 -1
  74. helm/clients/image_generation/together_image_generation_client.py +2 -0
  75. helm/clients/megatron_client.py +2 -0
  76. helm/clients/mistral_client.py +2 -0
  77. helm/clients/moderation_api_client.py +2 -0
  78. helm/clients/openai_client.py +5 -1
  79. helm/clients/palmyra_client.py +2 -1
  80. helm/clients/reka_client.py +2 -1
  81. helm/clients/stanfordhealthcare_azure_openai_client.py +2 -2
  82. helm/clients/stanfordhealthcare_http_model_client.py +2 -0
  83. helm/clients/together_client.py +4 -0
  84. helm/clients/vertexai_client.py +4 -0
  85. helm/clients/vision_language/huggingface_vision2seq_client.py +2 -1
  86. helm/clients/vision_language/huggingface_vlm_client.py +2 -0
  87. helm/clients/vision_language/idefics_client.py +2 -1
  88. helm/clients/vision_language/open_flamingo_client.py +2 -1
  89. helm/clients/vision_language/paligemma_client.py +2 -1
  90. helm/clients/vision_language/palmyra_vision_client.py +2 -0
  91. helm/clients/vision_language/qwen2_vlm_client.py +2 -1
  92. helm/clients/vision_language/qwen_vlm_client.py +2 -1
  93. helm/clients/writer_client.py +2 -0
  94. helm/common/hierarchical_logger.py +20 -0
  95. helm/common/optional_dependencies.py +1 -1
  96. helm/common/test_general.py +4 -0
  97. helm/config/model_deployments.yaml +225 -0
  98. helm/config/model_metadata.yaml +232 -7
  99. helm/config/tokenizer_configs.yaml +74 -4
  100. helm/benchmark/static_build/assets/index-671a5e06.js +0 -10
  101. helm/benchmark/static_build/assets/react-f82877fd.js +0 -85
  102. helm/benchmark/static_build/assets/recharts-4037aff0.js +0 -97
  103. helm/benchmark/static_build/assets/tremor-38a10867.js +0 -10
  104. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/WHEEL +0 -0
  105. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/entry_points.txt +0 -0
  106. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/licenses/LICENSE +0 -0
  107. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/top_level.txt +0 -0
  108. /helm/benchmark/static_build/assets/{air-overview-d2e6c49f.png → air-overview-DpBbyagA.png} +0 -0
  109. /helm/benchmark/static_build/assets/{crfm-logo-74391ab8.png → crfm-logo-Du4T1uWZ.png} +0 -0
  110. /helm/benchmark/static_build/assets/{heim-logo-3e5e3aa4.png → heim-logo-BJtQlEbV.png} +0 -0
  111. /helm/benchmark/static_build/assets/{helm-logo-simple-2ed5400b.png → helm-logo-simple-DzOhNN41.png} +0 -0
  112. /helm/benchmark/static_build/assets/{helm-safety-2907a7b6.png → helm-safety-COfndXuS.png} +0 -0
  113. /helm/benchmark/static_build/assets/{helmhero-28e90f4d.png → helmhero-D9TvmJsp.png} +0 -0
  114. /helm/benchmark/static_build/assets/{index-9352595e.css → index-oIeiQW2g.css} +0 -0
  115. /helm/benchmark/static_build/assets/{medhelm-overview-eac29843.png → medhelm-overview-CND0EIsy.png} +0 -0
  116. /helm/benchmark/static_build/assets/{medhelm-v1-overview-3ddfcd65.png → medhelm-v1-overview-Cu2tphBB.png} +0 -0
  117. /helm/benchmark/static_build/assets/{overview-74aea3d8.png → overview-BwypNWnk.png} +0 -0
  118. /helm/benchmark/static_build/assets/{process-flow-bd2eba96.png → process-flow-DWDJC733.png} +0 -0
  119. /helm/benchmark/static_build/assets/{vhelm-aspects-1437d673.png → vhelm-aspects-NiDQofvP.png} +0 -0
  120. /helm/benchmark/static_build/assets/{vhelm-framework-a1ca3f3f.png → vhelm-framework-NxJE4fdA.png} +0 -0
  121. /helm/benchmark/static_build/assets/{vhelm-model-8afb7616.png → vhelm-model-ypCL5Yvq.png} +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crfm-helm
3
- Version: 0.5.8
3
+ Version: 0.5.9
4
4
  Summary: Benchmark for language models
5
5
  Author-email: Stanford CRFM <contact-crfm@stanford.edu>
6
6
  License: Apache License 2.0
@@ -187,6 +187,7 @@ Requires-Dist: google-cloud-storage~=2.9; extra == "heim"
187
187
  Requires-Dist: lpips~=0.1.4; extra == "heim"
188
188
  Requires-Dist: multilingual-clip~=1.0; extra == "heim"
189
189
  Requires-Dist: NudeNet~=2.0; extra == "heim"
190
+ Requires-Dist: numpy<2,>=1.26; extra == "heim"
190
191
  Requires-Dist: opencv-python<4.8.2.0,>=4.7.0.68; python_version >= "3.10" and extra == "heim"
191
192
  Requires-Dist: opencv-python-headless<=4.11.0.86,>=4.7.0.68; python_version < "3.10" and extra == "heim"
192
193
  Requires-Dist: pytorch-fid~=0.3.0; extra == "heim"
@@ -341,6 +342,7 @@ The HELM framework was used in the following papers for evaluating models.
341
342
  - **The Mighty ToRR: A Benchmark for Table Reasoning and Robustness** - [paper](https://arxiv.org/abs/2502.19412), [leaderboard](https://crfm.stanford.edu/helm/torr/latest/)
342
343
  - **Reliable and Efficient Amortized Model-based Evaluation** - [paper](https://arxiv.org/abs/2503.13335), [documentation](https://crfm-helm.readthedocs.io/en/latest/reeval/)
343
344
  - **MedHELM** - paper in progress, [leaderboard](https://crfm.stanford.edu/helm/medhelm/latest/), [documentation](https://crfm-helm.readthedocs.io/en/latest/reeval/)
345
+ - **Holistic Evaluation of Audio-Language Models** - [paper](https://arxiv.org/abs/2508.21376), [leaderboard](https://crfm.stanford.edu/helm/audio/latest/)
344
346
 
345
347
  The HELM framework can be used to reproduce the published model evaluation results from these papers. To get started, refer to the documentation links above for the corresponding paper, or the [main Reproducing Leaderboards documentation](https://crfm-helm.readthedocs.io/en/latest/reproducing_leaderboards/).
346
348
 
@@ -1,4 +1,4 @@
1
- crfm_helm-0.5.8.dist-info/licenses/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
1
+ crfm_helm-0.5.9.dist-info/licenses/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
2
2
  helm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  helm/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  helm/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -12,7 +12,7 @@ helm/benchmark/model_metadata_registry.py,sha256=7XisV0an_edM8hvP8LSoCnTeUN2QLJr
12
12
  helm/benchmark/multi_gpu_runner.py,sha256=WmTKpVfcKXyiiPzrmxpbvQoZy0Ua8IyPgxB8r_3jrRw,4773
13
13
  helm/benchmark/reeval_run.py,sha256=vImL8JNhveEOftZbRQ6JAxF0L-XCKIwh65M6fIYo4RU,7198
14
14
  helm/benchmark/reeval_runner.py,sha256=bJPl7XVOVwK2fUA7voOVQYwVFEOfKVnrT2tbSGQzQY8,15584
15
- helm/benchmark/run.py,sha256=ZyqkKnqkMqM2AH4HL6sH72H8-mrDWu0NW0piE7BY0HM,13973
15
+ helm/benchmark/run.py,sha256=n6FYrAOkdCLVzPK-HqNx0MWkdqazviUVqMBAVcpSUUk,14004
16
16
  helm/benchmark/run_expander.py,sha256=IMPhg16Yd3diaFRLGYcLCXGO4L_B2WXW69oZP0fx6lE,56857
17
17
  helm/benchmark/run_spec.py,sha256=GiIU8iGO2FGYFDWIxt51CeNPsW7rM7BzDqH1KgEL1cg,3217
18
18
  helm/benchmark/run_spec_factory.py,sha256=Hxeft3fXoWNz9yGo-2nIfb5pd3GDWlwYWc6YYvAkTjM,7785
@@ -25,7 +25,7 @@ helm/benchmark/test_data_preprocessor.py,sha256=_esdtkqyU_8Yp5ZOO7n1b-Y4Qc28wpD5
25
25
  helm/benchmark/test_run_expander.py,sha256=gLeHkNt_nLgbwEJiYxhwda-eKA3sJAxkYolCvgRN5TY,1163
26
26
  helm/benchmark/tokenizer_config_registry.py,sha256=ZOImg38ta0FXZYAWna6q7A5xrG2mU7Ofr-8j4EqGlUY,1585
27
27
  helm/benchmark/adaptation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
- helm/benchmark/adaptation/adapter_spec.py,sha256=mfqU5lkvN2UOOUrldgTNq_u8iqRajagvzimyGWQhPQs,6054
28
+ helm/benchmark/adaptation/adapter_spec.py,sha256=ONFbOdQiVbffP48_VonC3QgkwtJVObfG7j4wsCNGiJQ,6407
29
29
  helm/benchmark/adaptation/common_adapter_specs.py,sha256=V8aYhQYuwohzwW0T_IU_ymGlxEwARKIiChLvwLKt-ew,12553
30
30
  helm/benchmark/adaptation/prompt.py,sha256=vPCFeKVUwpbnTe0IbphkyAKFkkM0YnEONfvjcb8Hj50,2158
31
31
  helm/benchmark/adaptation/request_state.py,sha256=WAPyubn35on-Ry7xKpXsVz3wYBMCMc_LidDOdcKxatI,3053
@@ -131,7 +131,7 @@ helm/benchmark/metrics/air_bench_metrics.py,sha256=WvfjjHLSE567Y7BC8tGlMINBwP-d1
131
131
  helm/benchmark/metrics/alrage_metric.py,sha256=4QHtL00aEIRYQx2QkDs5uldu7ZAkbFYMALH6DL9LSJg,1233
132
132
  helm/benchmark/metrics/annotation_metrics.py,sha256=JbXNleQsPJVF2uc1xXgUW2bzvJqwLPZyhnndqc6THv0,4268
133
133
  helm/benchmark/metrics/basic_metrics.py,sha256=3y1M0mFJL8FlkMkQWWs4ZV2NiriaMGydddbeY3F-vXk,30547
134
- helm/benchmark/metrics/bbq_metrics.py,sha256=GeZhSSJzqGD0e5EAiRHitIC3XtPICF7rDI6GfeYQc8E,6201
134
+ helm/benchmark/metrics/bbq_metrics.py,sha256=oHd4U6Q5sv2h0UtVnAJ2_cf32XiISWaDvc-2y0fU-gk,6574
135
135
  helm/benchmark/metrics/bias_metrics.py,sha256=8qcInRJwQsuCI-lMC1umd-ZZaYvorUPrMjnuC6vSeb4,11602
136
136
  helm/benchmark/metrics/bias_word_lists.py,sha256=eyk6we2J4SW8ZaZxQUWLB7Yapn92uM5TCekhFB5vg-U,13908
137
137
  helm/benchmark/metrics/bigcodebench_metrics.py,sha256=JcPZrSiHR-kxT-MFM8zXqOs6wTC5Hus3TbxuHFQVZow,860
@@ -161,7 +161,7 @@ helm/benchmark/metrics/dry_run_metrics.py,sha256=ouS6_8lESuCGSQgegN4xKKyoGr7Rb1K
161
161
  helm/benchmark/metrics/efficiency_metrics.py,sha256=VnM5PgxxK6UKk9MzPprnN_7d-t6xVlIgFMQYrFh8dwY,15262
162
162
  helm/benchmark/metrics/ehr_sql_metrics.py,sha256=yyz-2tsk4Fu6D5ELp3cbLaAWGjqtDGrUdvFvgHvxevg,7418
163
163
  helm/benchmark/metrics/evaluate_instances_metric.py,sha256=LGk1Dv_76Ak0YUlWKFTsOLEFiBSmcGVhNrbj_4zg9g4,2913
164
- helm/benchmark/metrics/evaluate_reference_metrics.py,sha256=T1AUnN1wYFrTBMLyys3AbvlArIenZwCPwHa_F7J9ODg,31476
164
+ helm/benchmark/metrics/evaluate_reference_metrics.py,sha256=kVYKCFX6LiG8ucA12Ib3RAkDd1kFaSONEtvgIatvIrE,31884
165
165
  helm/benchmark/metrics/fin_qa_metrics.py,sha256=MtXxGMGYiCiwCD1CclBXPopzly-Tz3zJTrXJaHYTXn4,2470
166
166
  helm/benchmark/metrics/fin_qa_metrics_helper.py,sha256=sH5FIpsxxGUkXO21YGS2EtVsev1EdQ44lYoqFZPSSGo,11884
167
167
  helm/benchmark/metrics/gpqa_chain_of_thought_metric.py,sha256=Lkil9DRtO3NS3zr5Ef_qqGxZBL-ObCNpbKoJvMhCrb8,4762
@@ -199,7 +199,7 @@ helm/benchmark/metrics/ranking_metrics.py,sha256=hSNKy4h7zRkGYSgo6RWt4PXQztA5ZX1
199
199
  helm/benchmark/metrics/reference_metric.py,sha256=hseI7A16SOC8ymYZYFCL6nxnyxn0q9_Gywuvb1r9FLE,6092
200
200
  helm/benchmark/metrics/reka_vibe_critique_metrics.py,sha256=CwzzQ13bBT0r_o75TqFj2Zr0ST9vzQi74K_ezWTnLCU,6568
201
201
  helm/benchmark/metrics/ruler_qa_metrics.py,sha256=OuiA0ksByl0Tw1Oal7zbedhKjTrhJgQJDLXAgoTLXuc,1473
202
- helm/benchmark/metrics/safety_metrics.py,sha256=oARko_EwVnykBKYxi-w3ytKme4qcb1waz_0N2GKbSlg,3348
202
+ helm/benchmark/metrics/safety_metrics.py,sha256=PZjyNsxiBe4VTdIujsqrLUtsQfLUpcm8snlAk3g9zWA,3870
203
203
  helm/benchmark/metrics/seahelm_metrics.py,sha256=GlNoK1O7kcuiuEOJEgTsnrfK9TcGwH7-tPj6Qe6JV90,7493
204
204
  helm/benchmark/metrics/seahelm_metrics_specs.py,sha256=cx8p4kwTuEOWxZioK9CVoeTNJT0fZjxRy_6_EM9F394,452
205
205
  helm/benchmark/metrics/spider_metrics.py,sha256=RSrFJoA5SNcNxfmgVqCQixcSLrfJBYuVQw5jsfrc9Xg,189
@@ -214,6 +214,7 @@ helm/benchmark/metrics/test_metric.py,sha256=0sGlXE3_Al_VyKpOPBhQR_xT-XrcVgGepLp
214
214
  helm/benchmark/metrics/test_statistic.py,sha256=yK6m2BZ5UXWmb2D1cQzDH_2ELvrNDaR_lyzX4WoHw9Q,1273
215
215
  helm/benchmark/metrics/toxicity_metrics.py,sha256=s5Ypodu4cBmIc_fCbbQ9kCqcvVJf-OQ6zAvb85r8Cv8,5509
216
216
  helm/benchmark/metrics/toxicity_utils.py,sha256=-bfittLtMkHyV5wu-hj6KVtaiNGgVIO5duUmThBlX8w,988
217
+ helm/benchmark/metrics/ultra_suite_asr_classification_metrics.py,sha256=dSJXAS7--n2sxRaajDo20Omzwx4LY5x0gd8nTxX3DAE,2317
217
218
  helm/benchmark/metrics/unitxt_metrics.py,sha256=8fawxnrg0xsAe0xO2wbL7S_yisj8RzJnrn6xtk8C6q8,4852
218
219
  helm/benchmark/metrics/wildbench_metrics.py,sha256=THOguxE6GUun0zTr-pITXfQGEd664sScrfIzFGdNPXk,2163
219
220
  helm/benchmark/metrics/ifeval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -275,8 +276,8 @@ helm/benchmark/metrics/vision_language/image_utils.py,sha256=xwtydR8-s23cJacIGXD
275
276
  helm/benchmark/presentation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
276
277
  helm/benchmark/presentation/contamination.py,sha256=07IuIP92vfuI0GwfeNC-i_NZUlF8N1azzagC19YHOMQ,2802
277
278
  helm/benchmark/presentation/create_plots.py,sha256=bM6UNzH0Bx8Bv2iKcyMoYp7IwfCZSQob-w_XOOI6r1M,29090
278
- helm/benchmark/presentation/run_display.py,sha256=LmY2HES4dU94kRYuUxt-c9LTMDN6MU5CspWTF6rZwDo,12419
279
- helm/benchmark/presentation/run_entry.py,sha256=J1QgLOP99N7N4bs7nzXWxyU3pOd-a1j8xwL9ag1nP_Y,1158
279
+ helm/benchmark/presentation/run_display.py,sha256=byOcVknL6UgwSBGWUPiWEdSBRbW6PYwmo7vJ1Ms50iY,12890
280
+ helm/benchmark/presentation/run_entry.py,sha256=_hgsKMpZ-WpgaK7nta68GohXe07JCyaWD6jRjINujXk,1182
280
281
  helm/benchmark/presentation/schema.py,sha256=AMGmEwqxkHoZFkOKD-UVZ8aXwgbafG6KYASsWo6YEw8,11005
281
282
  helm/benchmark/presentation/summarize.py,sha256=m3RSw6ogUFasdeZ8xSUh4wKV-nYzVi3iQv-KrrwtDFM,67828
282
283
  helm/benchmark/presentation/table.py,sha256=-foH1BIfMiD6YvpwoGJ910CH7Hib-_pYtHH1hE8zwNc,2904
@@ -289,7 +290,7 @@ helm/benchmark/presentation/test_summarize.py,sha256=GzZNwBDybpstzl6wT0Rgqn75N9i
289
290
  helm/benchmark/presentation/torr_robustness_summarizer.py,sha256=SmMOZWCQ-KaJBp78otwvAeE1btWignyWalaQ8QG87r4,8242
290
291
  helm/benchmark/run_specs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
291
292
  helm/benchmark/run_specs/air_bench_run_specs.py,sha256=K86SqpINMBOiLIpuHz-jwlQL3SrH6n6WbqjD90i4LQQ,2231
292
- helm/benchmark/run_specs/arabic_run_specs.py,sha256=fPAI9GCV_D0BHPcLGSNZN45sAO2d449Gb54iHW1nocc,7399
293
+ helm/benchmark/run_specs/arabic_run_specs.py,sha256=x3pBNbUcYfx6f0APXroLBQodOgv6oWuJNb301c_QUhg,7768
293
294
  helm/benchmark/run_specs/audio_run_specs.py,sha256=baJz5LZiwWZP3KD0hluKgpidtswzdorQnshX0CoqKAc,23383
294
295
  helm/benchmark/run_specs/bluex_run_specs.py,sha256=jwrH33YeXqoAex11071XMUwTCKNkoJTQQS7iNoJDLmg,1797
295
296
  helm/benchmark/run_specs/call_center_run_specs.py,sha256=QhRQw91WblB9UaB319XNCO5K8PX8Riiza41Ym-1CcRU,7044
@@ -309,7 +310,7 @@ helm/benchmark/run_specs/instruction_following_run_specs.py,sha256=GElJhgbQhlZMY
309
310
  helm/benchmark/run_specs/lite_run_specs.py,sha256=8OkL9g3wQBG96g0ijGZ9L1Trb59b7VPDyYMqvA3hXfE,11129
310
311
  helm/benchmark/run_specs/lmkt_run_specs.py,sha256=tNZvlA4mXUX-NBC9enRR90qFLeh8SNGFq701rXmXc18,5376
311
312
  helm/benchmark/run_specs/long_context_run_specs.py,sha256=wn7yY9rMIBJY30SN-275qg9U49aGPUl4hVZphKYFkBI,6442
312
- helm/benchmark/run_specs/medhelm_run_specs.py,sha256=bi7sGIx5I7KQXAF_Uj6n_O_DFNgtc496unrVh7UuLcQ,53256
313
+ helm/benchmark/run_specs/medhelm_run_specs.py,sha256=CGFHmoQB58gpqi4b4BbffuHzRBs12aeq8suUkaaFbqc,53262
313
314
  helm/benchmark/run_specs/melt_run_specs.py,sha256=729MkALud2wG07yulx9zqAzejdXW_eVGkfF5cQWeGGY,32031
314
315
  helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py,sha256=kenpGGMK1XXaNtvNXsshPvdvN9ubv1sOfaPdjFM4obA,2034
315
316
  helm/benchmark/run_specs/multilingual_run_specs.py,sha256=umf8e6ZDgRXiU0G_BPoovj1UZ_dxyrXtIQ7i9WC6USg,2296
@@ -317,7 +318,7 @@ helm/benchmark/run_specs/oab_exams_specs.py,sha256=ws7Vppo_zJvxKqQ_sNhm9N7-5eQbX
317
318
  helm/benchmark/run_specs/safety_run_specs.py,sha256=3X6tYaq2SlRsZs9q6SCtBUgjNEpOwUtV6M7iY2Kowm0,6807
318
319
  helm/benchmark/run_specs/seahelm_run_specs.py,sha256=R3mg4_OoaRizZ5n0FHcUQpJLny3j-ulBlHzOyF0a0Ok,23904
319
320
  helm/benchmark/run_specs/simple_run_specs.py,sha256=0kK_e8U4JUWZ6wO4N-GPFRE1iGT4ilvSMUGfirvpIE0,3837
320
- helm/benchmark/run_specs/speech_disorder_audio_run_specs.py,sha256=P1mxSu7ErjiK0ARbRmbIzFwYO3fC-6QpsZQeez4U3qI,7346
321
+ helm/benchmark/run_specs/speech_disorder_audio_run_specs.py,sha256=Hx0BxdzORXU8cyEGFYJJWs60Ssuny6tIpWqCR6fFSfI,7464
321
322
  helm/benchmark/run_specs/sql_run_specs.py,sha256=JWCICELKi81m11MggyR6CJNl3vpWPwk4kr8DZSsWvj4,1965
322
323
  helm/benchmark/run_specs/tweetsentbr_run_specs.py,sha256=qogc-fb83Rh1DooKKaskhak52ycvu8DAnhabw9rc7yA,1129
323
324
  helm/benchmark/run_specs/unitxt_run_specs.py,sha256=4Vbsq0MPpSe4cIJOXzeVpMm60N9Qafa2R85X5BeFQew,1873
@@ -331,19 +332,19 @@ helm/benchmark/scenarios/air_bench_scenario.py,sha256=Ufcpxm5KaXHI2FfK4tdQsURaCS
331
332
  helm/benchmark/scenarios/alghafa_scenario.py,sha256=FJXO3W6qYzCgLJMSiJEhpddNcFyR3N5Brh8pATW_9GM,5217
332
333
  helm/benchmark/scenarios/alrage_scenario.py,sha256=MN-gMQboAaJCasYNg_rLJVgcrk5KZ1WCBN9R_lyRrhE,1499
333
334
  helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py,sha256=EzS8td1lJE1yxEwFtuwTbjHtHm1hGIaur93BKAL_Hm4,6212
334
- helm/benchmark/scenarios/anthropic_red_team_scenario.py,sha256=_OWE33eVRaZI0gmfP7bLd572uOi_6jb39z_J6nkcvfg,3182
335
+ helm/benchmark/scenarios/anthropic_red_team_scenario.py,sha256=3axwlXnKI0-mRDaYfD-hcCNZwtw_1jte_5THuyuV7Ok,3683
335
336
  helm/benchmark/scenarios/arabic_exams_scenario.py,sha256=hv28A2pM66ejrO6oFOgmCx3JIP_nqwdUYvIsfGc0Kew,5359
336
337
  helm/benchmark/scenarios/arabic_mmlu_scenario.py,sha256=xMRWPA16Wn8ONgAeyyHOB95X2SQca7tKUpUP8L5ZNJc,3018
337
338
  helm/benchmark/scenarios/aratrust_scenario.py,sha256=ismiWLm1M6JmBgVZ0SoVglaOyFbAlyOHsSsiAv8Np8Y,3125
338
339
  helm/benchmark/scenarios/autobencher_capabilities_scenario.py,sha256=fOCHumFWZa4OJZcTZefJiJbdWsb3zjQnWLJYd10Cctw,2496
339
340
  helm/benchmark/scenarios/autobencher_safety_scenario.py,sha256=MFt3f5baN5r-FmzWZfUChGR1mX_PUB_5hxoINac_Whs,1854
340
341
  helm/benchmark/scenarios/babi_qa_scenario.py,sha256=CAmh3GfFjB9Xsuh9K-PUu-2xIFTV0v0YNgWbSuv09Y0,5711
341
- helm/benchmark/scenarios/banking77_scenario.py,sha256=dtiM-Q_pMDWqkLi-hgl0tH-aGuDdgHkXgweE1JqrPYs,1883
342
- helm/benchmark/scenarios/bbq_scenario.py,sha256=mVfxztgLI9sFwOYntx0dxElm8RmOb7XQYS9DOfgYjkI,10360
342
+ helm/benchmark/scenarios/banking77_scenario.py,sha256=cYM5Itksjy-tufjC9mmIKy_FG0kqPuDkDIhPhDZUX2I,2773
343
+ helm/benchmark/scenarios/bbq_scenario.py,sha256=VAlwXpAegpmvb3Zf-oMaBE7HGnfKVbhprCn2SE8ayKM,10355
343
344
  helm/benchmark/scenarios/best_chatgpt_prompts.yaml,sha256=KZdXj4KUbkwFzgIEXVakMpZLTqJ7rldxNuXVDIdlk-A,31304
344
345
  helm/benchmark/scenarios/big_bench_scenario.py,sha256=g1TLoDTYQAe-efzQnV9J5UBCaUfN1jWTTjTd-ZJQmVQ,8146
345
346
  helm/benchmark/scenarios/bigcodebench_scenario.py,sha256=zQLv91uwfGAR9N4jm_iBUmYOVFj9cL14Nj8aqoCqUM0,2004
346
- helm/benchmark/scenarios/bird_sql_scenario.py,sha256=n5elzanKEX9YclAl2l1y33aCjihTmaw1VF_ZsAU5IaM,3613
347
+ helm/benchmark/scenarios/bird_sql_scenario.py,sha256=KoCcy4enWJzrwK1X405EWnK8E0kjpmcwhVFfBrKSRRQ,4235
347
348
  helm/benchmark/scenarios/bird_sql_scenario_helper.py,sha256=FIwPk-dwfTY-8gDXeAiTZbfbS0Oe1OuWRlYiJOhZwk4,4664
348
349
  helm/benchmark/scenarios/blimp_scenario.py,sha256=9Ge3QKRgtVHpWy7aehZVKiO6JrsxK7zrEdtqAb4zxtQ,6284
349
350
  helm/benchmark/scenarios/bluex_scenario.py,sha256=K4ob5_rd1hTOzlPJjuEvujcOdt_Ybgxj3jqj_BYjA9o,2599
@@ -364,14 +365,14 @@ helm/benchmark/scenarios/codeinsights_correct_code_scenario.py,sha256=7BpcezugYH
364
365
  helm/benchmark/scenarios/codeinsights_edge_case_scenario.py,sha256=csTwe-mv1f6Tyvnj9uZ0SYuj1GRVvgjzukV28gIhNpk,8703
365
366
  helm/benchmark/scenarios/codeinsights_student_coding_scenario.py,sha256=wc5Fefn4jpCw03dQ6WswCztJ8AO5j0Vrn6omcOVUq2k,7409
366
367
  helm/benchmark/scenarios/codeinsights_student_mistake_scenario.py,sha256=qX3yckZdMojYhiwvokvEuQpRXOzmN2zmzKjQb96Ljg8,9651
367
- helm/benchmark/scenarios/commonsense_scenario.py,sha256=VN6nNZZpz9a1IC-tW5MvqztaW71f2zsV8lq-A34p3iE,10696
368
+ helm/benchmark/scenarios/commonsense_scenario.py,sha256=f1E94zQAArwha730LcdJFo75KrX50lqcaFRn9sq85Yw,10855
368
369
  helm/benchmark/scenarios/compositional_instructions.yaml,sha256=mPsFzPU6uaAD0xghzv-QD5Wk4uhoLY2sF3Fw_lunAsI,1822
369
370
  helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py,sha256=sR3UzObloLUzgjNwTbSHLGGkeA0g9-Aq_utpBPT2u_4,4757
370
371
  helm/benchmark/scenarios/copyright_scenario.py,sha256=GWRCJdLlnWZcz6ztB5XIASGMPNd2o8EZNR2GueP8xuc,5035
371
372
  helm/benchmark/scenarios/covid_dialog_scenario.py,sha256=Vnxfn6EKwN-KR1vH-x46YHUC5jf7UAOv7zsnXVHYmZY,4032
372
373
  helm/benchmark/scenarios/cti_to_mitre_scenario.py,sha256=FM6ty-JSFTDqdKLzfwgfhl3zV2oh_DWjRw4qI4-IrI0,11169
373
374
  helm/benchmark/scenarios/custom_mcqa_scenario.py,sha256=rgdHsSh8QknlcdGfZQ4VvqBUMLfTTHaNolCv4QgWHzE,1939
374
- helm/benchmark/scenarios/czech_bank_qa_scenario.py,sha256=ZBfkUYlIa-BagRVBf97RoyLfEloAjnM0RPv5wmEWueQ,4406
375
+ helm/benchmark/scenarios/czech_bank_qa_scenario.py,sha256=jnBIx4RPnTCGfWIcKNTTCyzBZWqUidWGTzTleOm958A,5020
375
376
  helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py,sha256=pZK3dbKKNfNOHvNaGMkN9pjFznu4raNyLe4fWkxNHSo,8604
376
377
  helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py,sha256=hBKRRYIHegOrhIo_i7-1RPtbxmuhXcg29DkUIep0x_o,6304
377
378
  helm/benchmark/scenarios/decodingtrust_fairness_scenario.py,sha256=KzBz8nkrvPUTw5WmEoivtl0lLJ-mORek-IVKYmct2Pk,3460
@@ -393,16 +394,16 @@ helm/benchmark/scenarios/entity_matching_scenario.py,sha256=83F017FPFED_106IOawJ
393
394
  helm/benchmark/scenarios/entity_matching_scenario_fixed_random_state.py,sha256=TklbX7Kx4y-estV-YHUbI5O08q2qCZRrOmX9D3gZS9c,2193
394
395
  helm/benchmark/scenarios/ewok_scenario.py,sha256=vrbJg9vakAxE6n-1jURUcwb-ihrsYoY9e32BpnEGDaQ,4684
395
396
  helm/benchmark/scenarios/exams_multilingual_scenario.py,sha256=c9zMGGL8EbCeNogTm-88g_5wWUiX1Zr7z_tsyjUq2h0,5404
396
- helm/benchmark/scenarios/fin_qa_scenario.py,sha256=Dm_kGOivaxiKVhcqFgN8pRPs1eqm2LdBZxWy0yFhFuE,5958
397
- helm/benchmark/scenarios/financebench_scenario.py,sha256=cHMljdg0_9HA3FbwcwwMt3DR9rxl0jkyFN9jNrUStSE,1956
397
+ helm/benchmark/scenarios/fin_qa_scenario.py,sha256=kwjdhO6_09csdZJ7KqMKXpnpOy6necDDZVOkiSW1lhY,6807
398
+ helm/benchmark/scenarios/financebench_scenario.py,sha256=_DompAP_3JzR6sGkvaBe_qubz7fS0BHB4wV0Gt8jGrQ,2900
398
399
  helm/benchmark/scenarios/financial_phrasebank_scenario.py,sha256=I7eoymZfxu4gky3YjyLnZgaFIJcMkprxQxiCLM7wJV8,5455
399
400
  helm/benchmark/scenarios/gold_commodity_news_scenario.py,sha256=Qw8OJzvp12716GRW5kIxxX--f92OFRcaP0oEy-gakjM,5674
400
401
  helm/benchmark/scenarios/gpqa_scenario.py,sha256=MsMsBqgxz6jKt2-ys98XAslGWkxZgzpYOws0b9e4Uj8,3520
401
402
  helm/benchmark/scenarios/grammar.py,sha256=58tQYKPj013V9jIpW7fXUqZBLuboqEi_WLlDjx74spM,5590
402
403
  helm/benchmark/scenarios/grammar_scenario.py,sha256=c3ATPkHM0WkA9QZEf2VNfThhuEUXD448uOuW6CAeVFw,2309
403
- helm/benchmark/scenarios/gsm_scenario.py,sha256=S_rD8uZsajgqyaJGNMpqYvshYYIW9hMV9N2udbI1Ax8,3405
404
- helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py,sha256=8_ShEuOoEGu7iRE2b0tgi-cfBrCPF9k1L-Pgb__n3Bg,2005
405
- helm/benchmark/scenarios/harm_bench_scenario.py,sha256=CBo_AfbtHTlvJdsiquP0EDTKApVmDZc7EW0VTENNAfQ,2478
404
+ helm/benchmark/scenarios/gsm_scenario.py,sha256=dwIHWplfz0wCxD8BasRaIoCmG9cfMt3fRF3KhfhjyH0,3579
405
+ helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py,sha256=hBh0ci-WXlAbhiPpsA96XEIkJPC9w_A2DAGRAHrIf9Q,2511
406
+ helm/benchmark/scenarios/harm_bench_scenario.py,sha256=ZFuVusNrbYxLwi57Dx2ACgLY3ydZySFB6PDwh8XP3XU,2949
406
407
  helm/benchmark/scenarios/headqa_scenario.py,sha256=0hJewHkF9IKQfW6NUJ0DPjlwQmr7N90a2eSXrBQiFNA,6635
407
408
  helm/benchmark/scenarios/healthqa_br_scenario.py,sha256=YneXTfp8V6k8rYCF3BTX6bxN2ASxdG3qrBr7uH_IFWc,3406
408
409
  helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py,sha256=5R9En7lTNirZCVsMNqNB2metw0dIEPa9usoFB9W11B4,1855
@@ -412,9 +413,9 @@ helm/benchmark/scenarios/ifeval_scenario.py,sha256=v2Q1uYCd5i1jO4_gcIlTrbZdPZ27t
412
413
  helm/benchmark/scenarios/imdb_ptbr_scenario.py,sha256=laq9UwyvBvZZuo54rf-8SdKTLrMdDHTdGWJ4TdC8Eng,2340
413
414
  helm/benchmark/scenarios/imdb_scenario.py,sha256=H9iHmKK-q4a5edSMcS166f1fjkNbOS5BEIgR3md3k7M,6887
414
415
  helm/benchmark/scenarios/imdb_scenario_pinned_file_order.py,sha256=fjW0Gkzg2Y3IAbtYJ3KC7MueWd9U8h0tlcBCqxYmRrM,1621
415
- helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py,sha256=JRTLaQc3PDpYeX9ewGnBteT9jXeaGbmJ1VzYGT8TsXI,3067
416
+ helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py,sha256=L_ii0n5vWoLGkwrBcqaAyaaigX-7y6Quu6piXflX8EI,3979
416
417
  helm/benchmark/scenarios/infinite_bench_en_qa_scenario.py,sha256=5fJHFonb7Ko7exHFtoUtvHar_7PhK2HjW9uDlU8Ljj0,2872
417
- helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py,sha256=6z3VlcucrwK2B30artWiSpo-mOTr9tiwYV6Fu8XD0VY,2657
418
+ helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py,sha256=qs3ID1f9Nobba2Mv3DxXzVVJmSU6RxtZW-DMJEAbkRc,3427
418
419
  helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py,sha256=F-gDO6r4GPBJTLirhF5noRaV0edvoIT7tiIDlovBFfE,2253
419
420
  helm/benchmark/scenarios/koala_scenario.py,sha256=h-dTHQrNVoi7p7sTXZDqWcpjlznfUgxNrgr4nW8Hrk0,2212
420
421
  helm/benchmark/scenarios/kpi_edgar_scenario.py,sha256=DE8efUmcPW5R62tZ46Rdsjv-EQs4lXm403O5XxM9heQ,7303
@@ -422,7 +423,7 @@ helm/benchmark/scenarios/legal_contract_summarization_scenario.py,sha256=JTm4Zkw
422
423
  helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py,sha256=q_iezJo23_HNNoIXYT4cLYCbwNzLYJx6uvxgPSE5bQA,2804
423
424
  helm/benchmark/scenarios/legal_summarization_scenario.py,sha256=0DraJdQebbl8tv7S3WmLos98wnQFGJOzY6suGPoxR40,10954
424
425
  helm/benchmark/scenarios/legal_support_scenario.py,sha256=cM98PnIAfjQzciUYGtgHqHYnWIdbdJfh3uy6uEIo488,4567
425
- helm/benchmark/scenarios/legalbench_scenario.py,sha256=K_KjDH3Rk1AM6pXLRedo-6o2rEw9OIk3porlCr3IGvQ,5623
426
+ helm/benchmark/scenarios/legalbench_scenario.py,sha256=TaFwrzJzhPrnHrOV3GF7PYETRR5ywmMmn7oOJtZokZU,5604
426
427
  helm/benchmark/scenarios/lex_glue_scenario.py,sha256=H7f3F7gK7bgf6FXvqXGTQrecTE6RtZaitIKmwQLksck,10736
427
428
  helm/benchmark/scenarios/lextreme_scenario.py,sha256=dR5UUIymth3J3RInoNybygZg0rNZ-8wwzVHneuTTOGE,20843
428
429
  helm/benchmark/scenarios/live_qa_scenario.py,sha256=TnWaOPOcA4U1_8JdahQOUZ9KBj0MpMf4BcK2TDBl3BE,3666
@@ -430,13 +431,13 @@ helm/benchmark/scenarios/lm_entry_scenario.py,sha256=kQTnj5gKJmDxCgynmzQOmghwNyS
430
431
  helm/benchmark/scenarios/lmkt_scenarios.py,sha256=K51CdOZqMOMOozUmADjrJuNCpUtXVEZwcOeIY-EZrwM,11162
431
432
  helm/benchmark/scenarios/lsat_qa_scenario.py,sha256=ZtheFEcsBMSqGIPw5UPOO_b3v93mPFar1yqxVnsLq4E,6785
432
433
  helm/benchmark/scenarios/madinah_qa_scenario.py,sha256=W7YEQTHyNWUJD8sKFmXU9e-ubzvleWQs7Cj_1zdq2bk,2482
433
- helm/benchmark/scenarios/math_scenario.py,sha256=p9tsdNsiYFtuG89cMByZYn60QjWzEsnCO21OHPr4DJo,16034
434
+ helm/benchmark/scenarios/math_scenario.py,sha256=4BBhEvgfqPDrXxxW-4x4I0v3lWjscoLCf9vCURXs7hA,16043
434
435
  helm/benchmark/scenarios/mbzuai_human_translated_arabic_mmlu.py,sha256=Gtc9DgV2bLPIDngROmizTWQHbTftnwVodi9CYT0_P2A,2146
435
436
  helm/benchmark/scenarios/me_q_sum_scenario.py,sha256=7DOqQmO70BpDeJy_S4fJ5i2UcCH8tunxzjFgTIim9bQ,4062
436
437
  helm/benchmark/scenarios/med_dialog_scenario.py,sha256=MKDlZLJEUq1nDRzlkHlpTWOxHwgghWMXcQvHJcM2LP0,8615
437
438
  helm/benchmark/scenarios/med_mcqa_scenario.py,sha256=tvF6d6e4WQi_mUIlZoLQvbOpVIfHR4nyMVVR8z4AkAE,5752
438
439
  helm/benchmark/scenarios/med_paragraph_simplification_scenario.py,sha256=0Z1JrizLygjd9v_LLFMk8uZ805IWjJPvg-ZvPVhtMm4,7652
439
- helm/benchmark/scenarios/med_qa_scenario.py,sha256=uW8FOEQhMw6k0WF_LKlH0oFTQVS9D_9MHXvVTNwDC7k,5140
440
+ helm/benchmark/scenarios/med_qa_scenario.py,sha256=w7xTavAi7v-xmQXpxXM3Z071qR-eVbj_0yxwILAcLHk,5294
440
441
  helm/benchmark/scenarios/medalign_scenario.py,sha256=5ALak5Hq2XQbqwTF3fQYKg-QPtL_vjY7J1UsMm9SOFk,4481
441
442
  helm/benchmark/scenarios/medalign_scenario_helper.py,sha256=fKXJFVLGnLcZKRBLsbjJA6YA4WqMaQAjkEU-i6YzSTQ,11626
442
443
  helm/benchmark/scenarios/medbullets_scenario.py,sha256=oMqnF3Ri9dghEWpGQYzfcTnYGMK5b2cJNVpJoqdtdUo,7694
@@ -444,7 +445,7 @@ helm/benchmark/scenarios/medcalc_bench_scenario.py,sha256=EDeeBKmbosUaMo3dg2MNVs
444
445
  helm/benchmark/scenarios/medec_scenario.py,sha256=sLx6tcFXcvhDIThGNVi-425znECAn5pkUgRk83CM-Q8,6343
445
446
  helm/benchmark/scenarios/medhallu_scenario.py,sha256=0EgeIxGuYMyBzM8xIOF4WcxfCOVqCp-oOuZe4Ai-CRM,3660
446
447
  helm/benchmark/scenarios/medhelm_configurable_scenario.py,sha256=vxvvAaIFW4cWaMez1xbEOZBh6S2wEH6Ws8KcGpnaZbs,3852
447
- helm/benchmark/scenarios/medi_qa_scenario.py,sha256=KXHQIliik9Cihaw2_M6GW5QdmHBeGoPc-0tnTw-_M5w,5224
448
+ helm/benchmark/scenarios/medi_qa_scenario.py,sha256=iv4_GUZJ9mGS7JGOMaPL747ujjrvnmeFjg1LbCpeMLo,5210
448
449
  helm/benchmark/scenarios/medication_qa_scenario.py,sha256=uyYxtCm_dX9Jt6X-3ha2gAUyxF55wKn3_k95g7VAzHQ,3636
449
450
  helm/benchmark/scenarios/melt_ir_scenario.py,sha256=d88DEGKVJZCeGnbrXrQZO_W4VJeqW8XNaYc8wIUiJtA,5978
450
451
  helm/benchmark/scenarios/melt_knowledge_scenario.py,sha256=FDG4OGYEV6Ac40VC7KAeikzbFKAK2XXFhH1-QUTw8jo,7923
@@ -459,19 +460,19 @@ helm/benchmark/scenarios/mimic_rrs_scenario.py,sha256=pG_NK1Et0QZosQAOLAxbciyNSq
459
460
  helm/benchmark/scenarios/mimiciv_billing_code_scenario.py,sha256=KRl1lYX-ITWTGxWS_NNQ0o3I4E__jlzNDhAYvI1by7g,3749
460
461
  helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py,sha256=-OkPMRyB7aO6QBFwoTl6a2rpzcoHeEl84tqz7k9kpCM,2982
461
462
  helm/benchmark/scenarios/mmlu_pro_scenario.py,sha256=2FVL-6Umn0BufFpJ0e405q1ZgeeP8Np1kCvsE61GaOE,4686
462
- helm/benchmark/scenarios/mmlu_scenario.py,sha256=P68i3gBlvVwjItZhLimtM6-zVGv3cYitSPH8ARwnkEk,4610
463
+ helm/benchmark/scenarios/mmlu_scenario.py,sha256=uHJny3NXaqqUfBav30T7ip0FJJ1hxqcUk8spEpUq818,4772
463
464
  helm/benchmark/scenarios/mmmlu_scenario.py,sha256=CyOISLOsXF9IEYGfeqWyYYkWGvrUvGivlWSJ5ttN9qY,2762
464
465
  helm/benchmark/scenarios/msmarco_scenario.py,sha256=p9YNL5oTa9isCGVvmqHHVofKmiwitjPQd28ElXmRAN4,35601
465
466
  helm/benchmark/scenarios/mtsamples_procedures_scenario.py,sha256=gtVSZxrs321tOolyD0gOoLzc0--uTc--3_HdlBVIuHo,6607
466
467
  helm/benchmark/scenarios/mtsamples_replicate_scenario.py,sha256=FIdI509nn0LN9opC4yJ8UsvWmh6-KECUMZF88duIEq0,6395
467
468
  helm/benchmark/scenarios/n2c2_ct_matching_scenario.py,sha256=o7RydazvQkYK90epvuXsdEyE02fmpsDEwS6253fNptk,14365
468
- helm/benchmark/scenarios/narrativeqa_scenario.py,sha256=XBGq3_gz1vaMhVX17RWF7mhXaSlKsv-_-JWCyHDkGWA,6428
469
+ helm/benchmark/scenarios/narrativeqa_scenario.py,sha256=I4hjn0czmygPLB3tE-VTgCHWC28PaB6BdxL6eSBLL_I,6431
469
470
  helm/benchmark/scenarios/natural_qa_scenario.py,sha256=3wkXvYm7m0Isxv2EW6SIuIEwZEV2lihsSLQZaANsKZo,14017
470
471
  helm/benchmark/scenarios/newsqa_scenario.py,sha256=G25VYaLrV_JyyoT0jpzJ6p4l5qsOydm8rlzTvSptNKQ,7284
471
472
  helm/benchmark/scenarios/oab_exams_scenario.py,sha256=vbjUzQP0zU4ckvMbsk4lh24NddVWbUAtfWmsq1h24_w,2101
472
473
  helm/benchmark/scenarios/omni_math_scenario.py,sha256=nB2miRRQ-cWwhpqUkypOZibYugD56wZ299nxE5bty9Q,2582
473
474
  helm/benchmark/scenarios/open_assistant_scenario.py,sha256=Z9eyaaHGRtFZTogIkOe1Pr6d70lqSe80tMsNPWR_jog,6577
474
- helm/benchmark/scenarios/openai_mrcr_scenario.py,sha256=XbO8Wpjjq2e8OsC2s_ZScV4TcZg3hlpVGy56hgxXY9w,3253
475
+ helm/benchmark/scenarios/openai_mrcr_scenario.py,sha256=MTzTZVGN-5c6wASKIK5kBFiGywjvAzRR2rjSbgiELhw,4064
475
476
  helm/benchmark/scenarios/opinions_qa_scenario.py,sha256=JK39tq306tKe0RDBDLz1AfAdZwNjK_Ng-rHvu6bTRY4,7395
476
477
  helm/benchmark/scenarios/pubmed_qa_scenario.py,sha256=Z8gR19kiTIugBTvBj6g9LiBXicfAxZ1AFh_GF_axgQc,9043
477
478
  helm/benchmark/scenarios/quac_scenario.py,sha256=y5bm1LXHIICqPIkWOg3sibnH_sC15b2zYUfT-_Y0V4E,7349
@@ -479,7 +480,7 @@ helm/benchmark/scenarios/race_based_med_scenario.py,sha256=pyeOUjWlQ30WgNr48BuV7
479
480
  helm/benchmark/scenarios/raft_scenario.py,sha256=BQ-faIiWBuUYmHTMCRbI8XpymtWvKK8DN6oNejjNi7M,5443
480
481
  helm/benchmark/scenarios/real_toxicity_prompts_scenario.py,sha256=USsjBVzoL-Bgq8B2clQvl3d-g4XlOlt8gvBje9VD7Dk,3077
481
482
  helm/benchmark/scenarios/ruler_qa_scenario_helper.py,sha256=jgVf1D4eTSxwxQsW0GBou5hfSo2dnlEJvHpVJqk3BxM,6327
482
- helm/benchmark/scenarios/ruler_qa_scenarios.py,sha256=Dy0INRMzxSiIs9Pm3fa0hYodN-W--WPSv4kcmeQhucM,3270
483
+ helm/benchmark/scenarios/ruler_qa_scenarios.py,sha256=sUJs9eocWUrnBBOEFK4pUq4KgozL-QTra67zkcTHn1s,5048
483
484
  helm/benchmark/scenarios/scenario.py,sha256=6zYT0heGPh1HXmv9l2g360Y3CwcV4xjA6jUq5snNF5I,9482
484
485
  helm/benchmark/scenarios/seahelm_scenario.py,sha256=Pgw05ZT9NByV7GL0031vGImbhGOZPrHv8aOR5DmP7sA,94098
485
486
  helm/benchmark/scenarios/self_instruct_scenario.py,sha256=uPVclF96zh0P2VJ163nLa7XuTKlMKGaTDFN-6IcdbXQ,3164
@@ -493,10 +494,10 @@ helm/benchmark/scenarios/shc_proxy_scenario.py,sha256=bM_qSCv5Qp_03TiDezgl1gUSSs
493
494
  helm/benchmark/scenarios/shc_ptbm_scenario.py,sha256=BttMbH39uai4qg621W0ySAFX-UtoRLuyEi-f4bfSrFo,4461
494
495
  helm/benchmark/scenarios/shc_sei_scenario.py,sha256=pTcb7n97VkesyRuqUqe5JGed1jDsQEd19udciDras8E,4532
495
496
  helm/benchmark/scenarios/shc_sequoia_scenario.py,sha256=IPOuJ74AIWOLDVIQ5lNUjMswcU9zeB_gOXg-K9HLTO4,3703
496
- helm/benchmark/scenarios/simple_safety_tests_scenario.py,sha256=sjIHT5NZlHv_IcXr_15-pOiBUPKKwykyH-QpMfvrHAY,1247
497
+ helm/benchmark/scenarios/simple_safety_tests_scenario.py,sha256=IjBVVLUG4muHvU_wd-12ML-YZqN4Qe39TLwb7hiYT4c,1743
497
498
  helm/benchmark/scenarios/simple_scenarios.py,sha256=ersSzp9bFEFfpJ-SNy368AuonwswLnuyA1n7FOgkw4U,6459
498
499
  helm/benchmark/scenarios/situation_prompts.yaml,sha256=nJA3X_I67PIpXgd7LTekWwEr5zn1ryqIHgvqCpAwoGQ,1790
499
- helm/benchmark/scenarios/spider_scenario.py,sha256=mhiV3XWGwpnIQkaHFM_rvZlrwE7nqS12-F9t1eB8kdI,3306
500
+ helm/benchmark/scenarios/spider_scenario.py,sha256=qN1TTLfJmsOdRwf6a-sL9cMzXmJsu09nQPvSqn9L0hk,3932
500
501
  helm/benchmark/scenarios/starr_patient_instructions_scenario.py,sha256=ZiXGXeKelEm9NrFsHQS5ft1L4oL6a_IlAJm_flRv-Z4,5228
501
502
  helm/benchmark/scenarios/summarization_scenario.py,sha256=wry6hAO_YXk56gS79jJ6HP6VhrRjpExvEZSsl2vM910,8883
502
503
  helm/benchmark/scenarios/sumosum_scenario.py,sha256=HG3wrKj5alV0a2aKb_nau8bB4oKDtTOLtdf3bx8h7sw,7695
@@ -539,7 +540,7 @@ helm/benchmark/scenarios/test_simple_scenarios.py,sha256=9b-gtuRnd638q_JevVlEVsH
539
540
  helm/benchmark/scenarios/test_tweetsentbr_scenario.py,sha256=V6ZsT405ltgC3pYXW-FVN7Z4nGH8ZLiFfh_F9OPXZjw,789
540
541
  helm/benchmark/scenarios/test_wildbench_scenario.py,sha256=pmQ87MNoGAXwAmPf0eoep5qf9hk6BPP2zzgzGuKXwzs,527
541
542
  helm/benchmark/scenarios/test_winogrande_afr_scenario.py,sha256=LZfE4J42BZ7OF3BvfKgMWuCHpdw4-LpWnFiKyrHGXp8,910
542
- helm/benchmark/scenarios/thai_exam_scenario.py,sha256=YjFsom1yiu-xBZ3SGenNuczVCwQcmyoITTMavGv-QEk,6069
543
+ helm/benchmark/scenarios/thai_exam_scenario.py,sha256=7FALls8tnT5QxC1TT8A0Mce9kmRT-icsQ7SPU4oqWPs,10461
543
544
  helm/benchmark/scenarios/the_pile_scenario.py,sha256=Dz51JxxazqPiX_fk6viOav8hQ2n6Iw0LIPhouquu6aw,5632
544
545
  helm/benchmark/scenarios/truthful_qa_scenario.py,sha256=0U7q8E9XB0H9oSN3OzhfsiZ-8PJrYXCCC04dffjicB8,6822
545
546
  helm/benchmark/scenarios/tweetsentbr_scenario.py,sha256=ppugbPWd_3hHesLC52QbC-wUknctr9ZX4tmHefnPf6w,2879
@@ -551,7 +552,7 @@ helm/benchmark/scenarios/wikifact_scenario.py,sha256=AHHZz_trtGf8HRoCnE6vukqrTD_
551
552
  helm/benchmark/scenarios/wikitext_103_scenario.py,sha256=k13TxITriwqoBrMzf-JzPKr5wHaC9M2A_HyxxBaASnk,3111
552
553
  helm/benchmark/scenarios/wildbench_scenario.py,sha256=dWJSqF06ZWAyZhaejNmrZ0Uu4Vlh5HMdTaMLNkMfe8Q,3668
553
554
  helm/benchmark/scenarios/winogrande_afr_scenario.py,sha256=3SOVyrQ8D7Wzz06uSbczDE-IN4sjKSEAJ7Po-_-O6qw,3131
554
- helm/benchmark/scenarios/wmt_14_scenario.py,sha256=TNIYBXnbuvaOcpfmKqRZF6-yta1pTZSLA4Fd_XHhjCY,5159
555
+ helm/benchmark/scenarios/wmt_14_scenario.py,sha256=7V2AFfd_LlbYTyi-tLXi5YvE8b1zrTxQJ6Z6e1xONso,5401
555
556
  helm/benchmark/scenarios/xstest_scenario.py,sha256=ndRNB5ApW4th5iltlmT9-Nfw9eTaVZQw5AMC4HZCI-k,1309
556
557
  helm/benchmark/scenarios/audio_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
557
558
  helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py,sha256=NtTEHzmbeCicbjTRxPBUueZrBGOPwF6RVc2Yftc-VKs,5634
@@ -575,11 +576,11 @@ helm/benchmark/scenarios/audio_language/mustard_scenario.py,sha256=7YHgfSpua5OdE
575
576
  helm/benchmark/scenarios/audio_language/mutox_scenario.py,sha256=bDCQbhsRDR6iQGNlCu_35kjmjGjuzjOIoraSncfOlOY,10277
576
577
  helm/benchmark/scenarios/audio_language/parade_scenario.py,sha256=UuOa5cSrHh5n3VF_SuJp4cy1MxlI3uEKHLrNEhGuyuw,4186
577
578
  helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py,sha256=oN4vBkElVzjccaEK2JFqoXMCGFTTHD0gcYwSDhvHTpQ,5438
578
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification_scenario.py,sha256=2qzPYfn0YYzzOtffD50kQu_ePpFJj_sSW7Bq8ZS6M2g,3559
579
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_transcription_scenario.py,sha256=TEyfAsas3ihN4b4bpGkbK_M_uDt39fVrL5k8vl2Cdyw,3389
580
- helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py,sha256=qPOP6eIEwxPKu6q5EzcrRmhMxMUQk5F9iq8zdJ1Ccrc,4819
581
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py,sha256=CGteDFCd31vbu_eg5oal1cnfjQ2J0Ty3C2HYyBLhI5M,4186
582
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py,sha256=sL93Q2ERzYiWcTOFEyvjUNbX0BgPdsyHKt6eTr51-Kc,5177
579
+ helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification_scenario.py,sha256=Xw256FUD3mNZRtrnR1N9q5oSbHwGCP9KzLlcNjb5vn4,2740
580
+ helm/benchmark/scenarios/audio_language/ultra_suite_asr_transcription_scenario.py,sha256=MrjlgmeVFhdmvVZclFmOGK0wZDQBFK5E2wBG8bVyj-c,2565
581
+ helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py,sha256=xBrqLDVU-94NNRsByLa8BovFc7fblWa3BO3eftcw-TU,3603
582
+ helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py,sha256=-3ZKJVoNRLpnooI9Nl_cMH250r7Pg0hxtgcjYN4LbSE,3993
583
+ helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py,sha256=2w1RuNMO2I9zhphO0LI5wgYVH9K7gbos_DeBilucakM,3960
583
584
  helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py,sha256=wkKyTCtx4isQSMufap_6DsNdGkHi7L8FQ2p7n58kKYI,3124
584
585
  helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py,sha256=4M_gTWs4CoJ1Ce9dDFBTAe9dzSovpsve_sN1eco2V2A,3155
585
586
  helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py,sha256=L04ee5bM5E0UNNmkwEzVwug4HJXQoIcVjujPgxtU2h0,4366
@@ -666,7 +667,7 @@ helm/benchmark/static/schema_image2struct.yaml,sha256=cD1X99YcPI8BMAnNfDmXlM-FN0
666
667
  helm/benchmark/static/schema_instruction_following.yaml,sha256=mYLpMv-iNtsmrv9ewfN9ceDOBBg8nSxOWfc6ByATmIk,6056
667
668
  helm/benchmark/static/schema_legal.yaml,sha256=RpoFOuVSIowNgxlPn3UMfJC-68RFr3CGDciUGLPfVqc,28806
668
669
  helm/benchmark/static/schema_lite.yaml,sha256=rFSoG7zGPNOtKkJyGgOViWf5WJbMiJMAXrgmqCAi9X4,36611
669
- helm/benchmark/static/schema_long_context.yaml,sha256=p01u7yPN75ZNmJhQodCRJo4q4Zb4vBieHKYqp4fD9Jg,11520
670
+ helm/benchmark/static/schema_long_context.yaml,sha256=NH7poEOCpmbqOZvbHeWrnSB7MWavh7EX2NU-Yl-nXNg,10829
670
671
  helm/benchmark/static/schema_medhelm.yaml,sha256=e3vVHdEXcS-joOUMUoIoFA3x9hEE__svDoajbjfqpLE,51793
671
672
  helm/benchmark/static/schema_melt.yaml,sha256=mmPqwDa26DVZXsRJkmKQSyD0OStvjlxaMoSPM25SpD4,47494
672
673
  helm/benchmark/static/schema_mmlu.yaml,sha256=KI3XnzEwBRpzfYGjP77yKL-hBklEg72D3vL0kVl1BeI,49666
@@ -685,25 +686,26 @@ helm/benchmark/static/schema_vhelm.yaml,sha256=0slYep2eepUefgtK_m4iSS785sHdJzljm
685
686
  helm/benchmark/static/schema_vhelm_lite.yaml,sha256=4I68Em9q5wW8sFzj5GCJz8m49fBEuMyVmSZM0-wbfOk,4024
686
687
  helm/benchmark/static/schema_video.yaml,sha256=FkpI5Slc4w-ty4hns82ArXIvTdqppWDnkJSpIp74QN4,9713
687
688
  helm/benchmark/static_build/config.js,sha256=o98g6QSly1NAfqhYWbU4lEoZB4LEpIrePZtmimiuoXc,165
688
- helm/benchmark/static_build/index.html,sha256=MRRycZym58h-5KW7aKyiqGxIpRB8DV5OHkND5JL5aDk,1178
689
- helm/benchmark/static_build/assets/air-overview-d2e6c49f.png,sha256=0ubEn4J0T51-jx7IlwjaEGSrofZWlW_e67MJw47Ujzg,733055
690
- helm/benchmark/static_build/assets/crfm-logo-74391ab8.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
691
- helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png,sha256=Pl46pKbC_TU3L6kZQ_3G-0wTseluAhIYwb3EqpdQAjQ,1344452
692
- helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
693
- helm/benchmark/static_build/assets/helm-safety-2907a7b6.png,sha256=KQentq_1e3uGwiWMViAPxHu2XZ60gqFgovP3UWTyMmw,72312
694
- helm/benchmark/static_build/assets/helmhero-28e90f4d.png,sha256=KOkPTf-q28PdvGOBp1G5O4q1eWUJjuij3z2h_SUUf8s,55314
695
- helm/benchmark/static_build/assets/index-671a5e06.js,sha256=XEa85-IyP6ZeHfsWGoPno-Qj9pSxlnHsjLYmaqzdzqg,124954
696
- helm/benchmark/static_build/assets/index-9352595e.css,sha256=k1JZXkXPFsUerOZ37oDhxjcb1ypOFEdDogJUP6H-NAQ,491553
697
- helm/benchmark/static_build/assets/medhelm-overview-eac29843.png,sha256=6sKYQ79cN07-cUsnt-JPsdoVwUBWu5KxOaHWSdwjdgA,284408
698
- helm/benchmark/static_build/assets/medhelm-v1-overview-3ddfcd65.png,sha256=Pd_NZfAf1ZeU2BIGx9zNT6WmypZNP2bk5z5AxDkbwoU,270625
699
- helm/benchmark/static_build/assets/overview-74aea3d8.png,sha256=dK6j2Nn3j9O-FMUIVRT5HGBpR_GL78vrKi8oHdG1eaI,74685
700
- helm/benchmark/static_build/assets/process-flow-bd2eba96.png,sha256=vS66lq700aPEKTJR7maMrmepAyBZySaL42tBNCRjFWA,190822
701
- helm/benchmark/static_build/assets/react-f82877fd.js,sha256=ijg4n6eANaZKXPWIVTQITqrtf-zzicjslJMm6DniDkA,275149
702
- helm/benchmark/static_build/assets/recharts-4037aff0.js,sha256=SP08CFvsw8cMMMMdqcXvsLviuOxkAhXGwvUIMvYUdxk,432466
703
- helm/benchmark/static_build/assets/tremor-38a10867.js,sha256=prOrg5S4EeKHSd6RkgnBIbVfXIUq3xjeVE0MRdqvenI,293019
704
- helm/benchmark/static_build/assets/vhelm-aspects-1437d673.png,sha256=FDfWcwGcJhJco4qmZli_ROomLiASrrnsX-wtKSDvMkc,542231
705
- helm/benchmark/static_build/assets/vhelm-framework-a1ca3f3f.png,sha256=oco_P6kwqp0cC3YaT_2H2RhJ6p1sh3sEQq3R0RA_cT0,71934
706
- helm/benchmark/static_build/assets/vhelm-model-8afb7616.png,sha256=ivt2FhDk8dwnzp1MAle5WfbXzht_Mxg4rpy-xHRybjs,180285
689
+ helm/benchmark/static_build/index.html,sha256=BaMObuai-TufVapXx7P4wX8ZGvoQuyQh4bdD2ZDukoE,1185
690
+ helm/benchmark/static_build/assets/air-overview-DpBbyagA.png,sha256=0ubEn4J0T51-jx7IlwjaEGSrofZWlW_e67MJw47Ujzg,733055
691
+ helm/benchmark/static_build/assets/audio-table-Dn5NMMeJ.png,sha256=_I8OI_2Fy_Vkmxl74qbSKtOb-C5mbHMye4JaC6LylDk,377331
692
+ helm/benchmark/static_build/assets/crfm-logo-Du4T1uWZ.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
693
+ helm/benchmark/static_build/assets/heim-logo-BJtQlEbV.png,sha256=Pl46pKbC_TU3L6kZQ_3G-0wTseluAhIYwb3EqpdQAjQ,1344452
694
+ helm/benchmark/static_build/assets/helm-logo-simple-DzOhNN41.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
695
+ helm/benchmark/static_build/assets/helm-safety-COfndXuS.png,sha256=KQentq_1e3uGwiWMViAPxHu2XZ60gqFgovP3UWTyMmw,72312
696
+ helm/benchmark/static_build/assets/helmhero-D9TvmJsp.png,sha256=KOkPTf-q28PdvGOBp1G5O4q1eWUJjuij3z2h_SUUf8s,55314
697
+ helm/benchmark/static_build/assets/index-oIeiQW2g.css,sha256=k1JZXkXPFsUerOZ37oDhxjcb1ypOFEdDogJUP6H-NAQ,491553
698
+ helm/benchmark/static_build/assets/index-qOFpOyHb.js,sha256=AwlcCxBOsMoY19-58uUNxyZC9llXwzDheC5ARsE98kM,129215
699
+ helm/benchmark/static_build/assets/medhelm-overview-CND0EIsy.png,sha256=6sKYQ79cN07-cUsnt-JPsdoVwUBWu5KxOaHWSdwjdgA,284408
700
+ helm/benchmark/static_build/assets/medhelm-v1-overview-Cu2tphBB.png,sha256=Pd_NZfAf1ZeU2BIGx9zNT6WmypZNP2bk5z5AxDkbwoU,270625
701
+ helm/benchmark/static_build/assets/overview-BwypNWnk.png,sha256=dK6j2Nn3j9O-FMUIVRT5HGBpR_GL78vrKi8oHdG1eaI,74685
702
+ helm/benchmark/static_build/assets/process-flow-DWDJC733.png,sha256=vS66lq700aPEKTJR7maMrmepAyBZySaL42tBNCRjFWA,190822
703
+ helm/benchmark/static_build/assets/react-BteFIppM.js,sha256=rtvePuxI4R_ecUu6MekBI3bolSJCKhriCQYdFqg6HuI,275079
704
+ helm/benchmark/static_build/assets/recharts-DxuQtTOs.js,sha256=h1N20jF_qA400VP6AQSdA1GhlNsFEuXqQk5hMpInUjg,430871
705
+ helm/benchmark/static_build/assets/tremor-DR4fE7ko.js,sha256=xL2aEMpCmZYl2FDAA6G2MOjOIjfwp4v40hxilO10j1I,288436
706
+ helm/benchmark/static_build/assets/vhelm-aspects-NiDQofvP.png,sha256=FDfWcwGcJhJco4qmZli_ROomLiASrrnsX-wtKSDvMkc,542231
707
+ helm/benchmark/static_build/assets/vhelm-framework-NxJE4fdA.png,sha256=oco_P6kwqp0cC3YaT_2H2RhJ6p1sh3sEQq3R0RA_cT0,71934
708
+ helm/benchmark/static_build/assets/vhelm-model-ypCL5Yvq.png,sha256=ivt2FhDk8dwnzp1MAle5WfbXzht_Mxg4rpy-xHRybjs,180285
707
709
  helm/benchmark/window_services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
708
710
  helm/benchmark/window_services/default_window_service.py,sha256=HlLI3be8s-GNxDygNGrvo9exEhbrO8Vtr3w0rnSIx7M,181
709
711
  helm/benchmark/window_services/encoder_decoder_window_service.py,sha256=wfdydJY6AmpYCfAv5PQu9D6nFXbuxIRum7Tsv0DemJE,2148
@@ -736,44 +738,44 @@ helm/benchmark/window_services/image_generation/openai_dalle_window_service.py,s
736
738
  helm/benchmark/window_services/image_generation/test_clip_window_service.py,sha256=domn2MRduHVAdruSUuGPDIGKyDrh-gFxW-fZaBYR7cg,1430
737
739
  helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py,sha256=nSyKK-cQxZnase3Bw4X6DyAWZEy1OZi4stDZpKtolF4,1411
738
740
  helm/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
739
- helm/clients/ai21_client.py,sha256=RAXQufajYnxr3b_1Hl-wAZkeE_j6O8zX-vngWEits6c,8158
741
+ helm/clients/ai21_client.py,sha256=wDhdlPvmRDqY4v5bLzL1TDu-HwZ9vuqEy2FbmH-jg7A,8239
740
742
  helm/clients/ai21_utils.py,sha256=mlg3h615kyckccGZv9rqsP4Y60O3XpwyE-UURRMrxII,471
741
- helm/clients/aleph_alpha_client.py,sha256=BK2eQIHYMxLMsZNWld85ZCj17JAoy5lU7rHuSBa4fOM,4981
742
- helm/clients/anthropic_client.py,sha256=R85gLYrheN2YWSGTnf3pkYTjCkTl300ktdlGLe1_1-o,36181
743
+ helm/clients/aleph_alpha_client.py,sha256=yqVBGDg5N_py0CB02GezK3lwJ2j2bFLm5qATO_d7R5M,5062
744
+ helm/clients/anthropic_client.py,sha256=XEHfZL8jc8ii8RC4ZTnxUdLi6c5gk9TYHMLOS4laJDI,36492
743
745
  helm/clients/auto_client.py,sha256=J5bCxIDZJUdV1dCv_EtbvwPzd1p2Ogtg207vpb3PhgI,11624
744
746
  helm/clients/azure_openai_client.py,sha256=mZ0udOAjadp7ZyE2KEtq8XuQp45eHlX_qM_getyzbA0,2009
745
- helm/clients/bedrock_client.py,sha256=sXxzNTs3pwVIwvir5lyJWLRajI9p2lMiJq21XsZ_FZo,12267
747
+ helm/clients/bedrock_client.py,sha256=CswF6TOq4M3We-ukDY4kjTUBiPUkdyczinZEYeZAdVI,12352
746
748
  helm/clients/bedrock_utils.py,sha256=8ZZfyOuZkgxL_naJ-wwBnH4GKv425fu3MfyakGHxeb4,3764
747
749
  helm/clients/client.py,sha256=fWJ_Eg4NyhPqlvpDvM7AjWN7cr2LU2uWdsnENLJXlTs,8963
748
750
  helm/clients/clip_score_client.py,sha256=ct3GHZ2Zh3fGwyvQ9DyoIPT6PwDPI-nUaFkUFuc8PIE,1622
749
- helm/clients/cohere_client.py,sha256=edQO5raoJYmYzfVREqHhNvjTcqPevG0M8EPMLOANqXY,10975
751
+ helm/clients/cohere_client.py,sha256=HQ8MeQKZVa-A3zZhpGK3sGzxJ8uE7EsJWCINzZus-kI,11082
750
752
  helm/clients/cohere_utils.py,sha256=aYmj60m0e9RF9BIdxp1vmA-uZv17TEALw0dbgTUSpCc,504
751
753
  helm/clients/gcs_client.py,sha256=1sK5x5uWtThgz9gqBLaA8oyiXGD_9nn1WyfMzJRyPQ8,3231
752
- helm/clients/google_client.py,sha256=mIaUzK7GHCa9pqK1BEVhdt6dZsJfHv1Qdsf3I0Ayq8A,2912
754
+ helm/clients/google_client.py,sha256=8O-98kt7SLSZ-_bkPMTBdQPsxTb_UgfFrKV8tXidUuU,2993
753
755
  helm/clients/google_translate_client.py,sha256=TgiQEscjOae58Ptgp9f4n0LXUtl1Jf6v9BI-Z1_wcuw,1304
754
756
  helm/clients/grok_client.py,sha256=SbVB6AduTwfElzUgEMnQW2kQUFVTCv4TpPPJvElQEe0,1127
755
- helm/clients/http_model_client.py,sha256=_F3_y2UWqbzESQdzV0FMEsECIKjporVSAW6iUQhJ35c,2818
756
- helm/clients/huggingface_client.py,sha256=oWR4yNFk28nrnB3IoznrhcEuU0pZkNywP0E82z1-NGM,17671
757
+ helm/clients/http_model_client.py,sha256=rDCzfugWoZegOSt-ZimKePYs4YA3SZ9CJ5Xuycjuccc,2899
758
+ helm/clients/huggingface_client.py,sha256=a4QV2h-voZ5MC0agrM4AibLdVUbyNWp1Pk4XUFMVc04,17709
757
759
  helm/clients/huggingface_pipeline_client.py,sha256=ivFTMNHBwwIUjkeOHkl-veZi5nNAjtnkYvneRFWs-6Q,6154
758
- helm/clients/ibm_client.py,sha256=4W4fbjnDNjXrP4gVwSfBHPus0QcqFOQzFvfaST1BE1Y,9701
760
+ helm/clients/ibm_client.py,sha256=9pQh0Ho5DN27bHYt6NrUHVdgD-iQyP8m_-OlraYWxok,9765
759
761
  helm/clients/lit_gpt_client.py,sha256=pgLfSvusNpdj8F5DVxzQdHxTDRNX4RVt6unegao803U,6229
760
762
  helm/clients/lit_gpt_generate.py,sha256=8DdBE9ReQ00NbV3KMFYc--PlO9X-HMOR0Rhm5CADWEA,3103
761
- helm/clients/megatron_client.py,sha256=KFL1BBBDqxr5mtd5iu0dA6uK8_v6d4g_D6RsZrHx3a0,4107
762
- helm/clients/mistral_client.py,sha256=ceM8KLAcniAqK1BNVdUGzqy4av2SEEau6PVmPivxc0o,8369
763
- helm/clients/moderation_api_client.py,sha256=I5pYWRb2MmcLDYrScnC3P5N7OUFzQiVQ828_hf7zjM4,4719
763
+ helm/clients/megatron_client.py,sha256=Zk80yeDFWSFDy2ILtuOC5hs9ruH-AUDhxZiMWw_IJi8,4188
764
+ helm/clients/mistral_client.py,sha256=Nlh97asTsMSHo7-m1JgbYdqnUjSeQ83spaNeHVNkvzg,8454
765
+ helm/clients/moderation_api_client.py,sha256=1lB875B5F2I32u8j-Q8DoA9CQA5-kMJ6RnPfkOqS6AQ,4800
764
766
  helm/clients/nvidia_nim_client.py,sha256=Z1UAqR2jHacIO_QGqQl1JUZ_82JiSPstBOtj6xURmQk,902
765
767
  helm/clients/open_lm_client.py,sha256=qFgYqlV_3UiW8WJKz66lLqRqg2jt1qtJ1bHMRAtBn40,1749
766
- helm/clients/openai_client.py,sha256=4Q4LVMqvPo-37MV_BhsMydpwmMLfo-2kftRZH9lGtZs,28538
768
+ helm/clients/openai_client.py,sha256=f_RY84FPNt04vfR7py4iXDr9i6cB7824v9PYwTna-Q0,28650
767
769
  helm/clients/openai_responses_client.py,sha256=FhQcOcXNZc5AuDMh1KBD3ZoRdEREy73dIeFBjUg9YDo,8444
768
770
  helm/clients/openrouter_client.py,sha256=oK8gXBhBs1y0AriZ9tVp8kx5lSY7gUgQJv-mfywSTfI,980
769
- helm/clients/palmyra_client.py,sha256=4AaZcV2tPHU4HJ9FWSkOY8_C9ndEckH3PH715QxJQ8E,7086
771
+ helm/clients/palmyra_client.py,sha256=_dUeVY-64C94aJdbgzpGWy6b2AbmIxCG4ZqTSgRRLcg,7128
770
772
  helm/clients/perspective_api_client.py,sha256=o_1FFTCrTny6AZ4EJTstX1H9t8SQSQ8dvhi321RTcL4,6105
771
- helm/clients/reka_client.py,sha256=hA0tq3Hc9669q2sYa4Jr5yWy2NAbvoFDnVqQ6vds62w,8334
773
+ helm/clients/reka_client.py,sha256=6FNiH7b8ADO8NHS7759rDeIGGbgVFagpDZ7_u_rYgaA,8376
772
774
  helm/clients/simple_client.py,sha256=55S_y1eWD1bjktcG21Vs8G5bF6QbKKwmJyqs6lCUJeI,2048
773
- helm/clients/stanfordhealthcare_azure_openai_client.py,sha256=NGbeI6sMenmgqPQTWxYF3C1Aen29LybRcHcsmS3Jqmg,2059
775
+ helm/clients/stanfordhealthcare_azure_openai_client.py,sha256=EhgDbDoDNwTow65jea4dJNqnBn5CjYUl_N1MueeB33g,2057
774
776
  helm/clients/stanfordhealthcare_claude_client.py,sha256=ShhbLttPDRa-Pnvr35_2WmVx5s0XpsJMGzu5qhzLoLI,1020
775
777
  helm/clients/stanfordhealthcare_google_client.py,sha256=cJK_uH-YBQpBJsltNuiUi0x77bh0eCM5UNBaJQ1zai4,1475
776
- helm/clients/stanfordhealthcare_http_model_client.py,sha256=LEq1fIxHnTnwCsvkF1AUlw6L4Gwv4egx0-PUvNiMNTo,3047
778
+ helm/clients/stanfordhealthcare_http_model_client.py,sha256=2ppahR35twHqxDMb7Dzy2rfjoFVuHZTB05MZJeKOy_8,3128
777
779
  helm/clients/stanfordhealthcare_openai_client.py,sha256=Qyl8voGz1hJPqT6g4PunMuN99EYaW8U-NXQQSgJbiiM,2169
778
780
  helm/clients/stanfordhealthcare_shc_openai_client.py,sha256=V7K4KZaSjIiE0FkoY4qy6ifJ8pUiNa3vBcWiDsIwXFI,1343
779
781
  helm/clients/test_auto_client.py,sha256=bc-rsMJ8JM0MFnQ4B48hBJ1jL3RtRyVvmPwOgzF2mF8,3155
@@ -782,20 +784,20 @@ helm/clients/test_huggingface_client.py,sha256=8Shzrf1Pad1UsiUAdeOSqsTPQaay0CrWX
782
784
  helm/clients/test_openrouter_client.py,sha256=gCzchJMQZi4kkgtpGe1Ma0xF2nsP1uDevJcqbprZ6RE,2414
783
785
  helm/clients/test_simple_client.py,sha256=G0JRQX69ypQN2VxhlNQXs5u2Tdtkcl_aeHqudDUVKi4,702
784
786
  helm/clients/test_together_client.py,sha256=kyBLu-2i4EJyuJm5ft0yg8W-H1IqmULRXggEbChuxdo,6178
785
- helm/clients/together_client.py,sha256=kEa6z54zPWlcLHCb2g2PCxLRpdJ8aE9zvG5Yzkaeun4,25518
787
+ helm/clients/together_client.py,sha256=Nj1FY1nMN5pYiHOG6lKPinVwgqBCTBT-9kHdgq953KU,25667
786
788
  helm/clients/toxicity_classifier_client.py,sha256=AI_FizxMurubTIyeceRdkixSnhWQbcD-oEEONj5ve7o,464
787
789
  helm/clients/upstage_client.py,sha256=iSL1G8G3jWSbrpacz4I0l6Lwc5T01fsLR-wZzF39ftM,679
788
- helm/clients/vertexai_client.py,sha256=Qm-EkbpXnwiwZzB592-FPBuSlxKIkVH7tWBFFvOBvCY,23631
790
+ helm/clients/vertexai_client.py,sha256=Hf8ncfCrpoG8ZLl_TRTX5vdxjf0kyzwIaFR8V-qfSEc,23768
789
791
  helm/clients/vllm_client.py,sha256=xmXf35WX2oOZhpQnRxeooXGshENySOHZCUQ1E4pbQbA,2647
790
792
  helm/clients/vllm_granite_thinking_client.py,sha256=fds2i8LUG78OJYke1uYdDy6XRFqE3rZgSornFjzu4Sk,2172
791
- helm/clients/writer_client.py,sha256=flKLeMbFkyGfNmv1ozZGU4dxNy-QF5bFJF0mGHqpU3c,4467
793
+ helm/clients/writer_client.py,sha256=Eyae245YkOcW1yHtVMqLPRRvzddr9IcXZLstr8UO5iw,4552
792
794
  helm/clients/yi_client.py,sha256=nC60d2HiUL2W59FTne9tWmZ9bGGY1OvI7Ob3Ng4wSPE,750
793
795
  helm/clients/audio_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
794
- helm/clients/audio_language/diva_llama_client.py,sha256=Bvcf4wE7yMZlqETgKEMtCug8-2fQI8QCDdaGWSeQ2X8,4864
795
- helm/clients/audio_language/llama_omni_client.py,sha256=OCak716q97uEk9CBXQqnmUsbLFR-dddMzg5eyIZ4gzE,8718
796
- helm/clients/audio_language/qwen2_5_omni_client.py,sha256=ftAVtOG0azvRQEcFjkSSBMU6SDk9Bi8WIks6o6UCbKQ,9684
797
- helm/clients/audio_language/qwen2_audiolm_client.py,sha256=s9eH8fnVgw5xV39b_8AGt6IyNN3q9Uhcx6HZVxt7TM8,8981
798
- helm/clients/audio_language/qwen_audiolm_client.py,sha256=RvYweXANEyzhHYDx38H10F0ZEFaL8kj7n7TZ-UrRmZs,6338
796
+ helm/clients/audio_language/diva_llama_client.py,sha256=NoBeG74AqDLxJXZuyBtdEwXmAprj3cbTjon-_-_C4oY,4945
797
+ helm/clients/audio_language/llama_omni_client.py,sha256=JIOxGd1iCyCP8LPqA5KupwSsXKXb64GYmllZ5UtKgJc,8774
798
+ helm/clients/audio_language/qwen2_5_omni_client.py,sha256=3rWuCKfrvwpRzQoDxWJMhThS0cX1hCg9fCWiyVstXSk,9740
799
+ helm/clients/audio_language/qwen2_audiolm_client.py,sha256=AZKLQY6ipfmpPm2Kg-Ecmm0Nut_Ni6syx1P57XCmYLw,9037
800
+ helm/clients/audio_language/qwen_audiolm_client.py,sha256=j2OQaLnHU5v4IaVZrpzV7D5JFzXYXYHxcLvO1rnO23E,6394
799
801
  helm/clients/audio_language/test.py,sha256=FrKpirOwJW1__E2egq4VPgsTrgiSHZHBwfUCvxNjC0o,1969
800
802
  helm/clients/audio_language/llama_omni/arguments.py,sha256=MxzZKE8sNsOe5eUse96gejOsmu_MfTJGiuOwR87xiSA,2334
801
803
  helm/clients/audio_language/llama_omni/constants.py,sha256=IjFS9EUI5p1DLtGcX0B1lSxESkxcx5dMbuMkMm1UaSs,183
@@ -826,19 +828,19 @@ helm/clients/clip_scorers/base_clip_scorer.py,sha256=NfXe79g6M4Wype3Xf-oXxscaUgj
826
828
  helm/clients/clip_scorers/clip_scorer.py,sha256=5KzYTrGuy5zA8yHX6c67Is98HLkqQooWhioPxHNLJ7s,1932
827
829
  helm/clients/clip_scorers/multilingual_clip_scorer.py,sha256=LgV1hN6y2FiFQ30UakxRmlwtLs_LCMxrOCewriN1nkk,2066
828
830
  helm/clients/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
829
- helm/clients/image_generation/adobe_vision_client.py,sha256=eFEeuyyQlas9Oz-w7cdQ_mRIwbPJFECwmAu5xkvt3wA,2892
830
- helm/clients/image_generation/aleph_alpha_image_generation_client.py,sha256=6HXLS_kxJouNyLVLfhoepMf5CNIQukWzxkVNV71PJ6Y,4028
831
- helm/clients/image_generation/cogview2_client.py,sha256=ssIrM6-7bbBQq_w6SCE4DtPWnF_S2x_FqMhSjomlBK4,8455
832
- helm/clients/image_generation/dalle2_client.py,sha256=amsgWBzP266UmOYM6QFIxEV4xAybw_OpPVdtH748REk,8430
831
+ helm/clients/image_generation/adobe_vision_client.py,sha256=x8nOWO4oQLy8vp_iiZN0dAKQz2QxEfcDKFgSETH9hHQ,2973
832
+ helm/clients/image_generation/aleph_alpha_image_generation_client.py,sha256=91--D-nax3rzcfBYdinMxtH0xa0uwWZs_4jH_HgOet8,4109
833
+ helm/clients/image_generation/cogview2_client.py,sha256=hRNkJjw9DbqEioiA1PKtg5-GX5zqidSabw-M5lvr57U,8493
834
+ helm/clients/image_generation/dalle2_client.py,sha256=58JACUUO0d9EETqyM7k2eA-YsnmNFhhqtVrAlExrNq4,8515
833
835
  helm/clients/image_generation/dalle3_client.py,sha256=sabS7AJ6O5ewmTkGmHr4cK14tlMcmh-xrGgj7J-xa3k,4639
834
- helm/clients/image_generation/dalle_mini_client.py,sha256=wTXW79KJ5tCku46AiotA2x0vQAqKi6g4aX05n-Qm_pQ,8175
835
- helm/clients/image_generation/deep_floyd_client.py,sha256=P566sdeBvLLv6vi9Y4IbtEfmoQykMDh0viUeRXY7Ik0,3028
836
- helm/clients/image_generation/huggingface_diffusers_client.py,sha256=OgkWWNGquu0xTKVSAduAMz8T_pWG-SwwG9iZegPrXEw,12313
836
+ helm/clients/image_generation/dalle_mini_client.py,sha256=axO4mmBZQ22juEwqYFdiFBtH6cbqweXbwmLKy5d-03Y,8213
837
+ helm/clients/image_generation/deep_floyd_client.py,sha256=scEifSsu2fRD08rHzHhSBjHRbaYnKDSC_Z8I2VQXO3E,3109
838
+ helm/clients/image_generation/huggingface_diffusers_client.py,sha256=atj0YBQYHgrTzCkHFZVhNC2SXdgvWEc0Yg-62475xQo,12352
837
839
  helm/clients/image_generation/image_generation_client_utils.py,sha256=N130PbHLLvE9Q1iVefPvTCJzs3hG3osZCeYdJyjLjCw,437
838
- helm/clients/image_generation/lexica_client.py,sha256=zyyfxZdTiBopPLZJ-uu5ewQYf9tUhtrEIHZddq8gCBI,3681
839
- helm/clients/image_generation/mindalle_client.py,sha256=IH9XM6n-ZHXyidTG66ew-pAgb9XaXLu-_AD_-YBydU8,4653
840
+ helm/clients/image_generation/lexica_client.py,sha256=7uM9Zq5JXbsjriJyYnVA_S6_3xCKKyGw-lMZAKtfENo,3762
841
+ helm/clients/image_generation/mindalle_client.py,sha256=6YWzCjyV5ELRvmIiq-WjHO-rVdOulcC9PH7ughy-H8s,4692
840
842
  helm/clients/image_generation/nudity_check_client.py,sha256=TeFga6HvBKgdX7LitBoioXUD4BQGavVwzr5BFFE29x8,2599
841
- helm/clients/image_generation/together_image_generation_client.py,sha256=onvBeRFuuuzpAVg5lZAbnUzmv5L5HwsPwRcflDPqWUI,4393
843
+ helm/clients/image_generation/together_image_generation_client.py,sha256=MkVwuK9iTGjMjtnqv3RFJpIm9_RHXb6Ys4te_WRfkO0,4474
842
844
  helm/clients/image_generation/cogview2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
843
845
  helm/clients/image_generation/cogview2/coglm_strategy.py,sha256=P3NU3Z4jsj171PrHPtGDiCRq05kEh-KHjSTgxPDw6R8,3766
844
846
  helm/clients/image_generation/cogview2/coglm_utils.py,sha256=EJPOEQJInCDVi2LHqkjEUsgw6GgVlLDrIptlT9cXk-Y,2900
@@ -878,14 +880,14 @@ helm/clients/image_generation/mindalle/utils/config.py,sha256=lh8dXvL7ctKmuYEbeT
878
880
  helm/clients/image_generation/mindalle/utils/sampling.py,sha256=soTHaJrN4FV1lDdh9HMveJs6F49UMK57Xfa0ccnHqI8,5029
879
881
  helm/clients/image_generation/mindalle/utils/utils.py,sha256=ESugpzG-_73GKl07mj-8o-_nim_FOICxfYkczy3s9x4,3119
880
882
  helm/clients/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
881
- helm/clients/vision_language/huggingface_vision2seq_client.py,sha256=S4FDbSO917bUw3cK64xnxwH5HFH_Eb-w2zQ8ZL4eSSk,6588
882
- helm/clients/vision_language/huggingface_vlm_client.py,sha256=OHV41AA-WZo_CnsHymwslgjDcVK0uHmIrvGbrxBDK5w,5000
883
- helm/clients/vision_language/idefics_client.py,sha256=DURync-8rh2ccdlGDPl3NMgryBcMn5yCrrmFZisf5m0,7784
884
- helm/clients/vision_language/open_flamingo_client.py,sha256=QH6el-wkEl4PMZM9b3_H-o2PRaMvumGbN29ee9dmkMU,6519
885
- helm/clients/vision_language/paligemma_client.py,sha256=K9MzXlgjXoiVafA8bbu-mKNt3Z9kq8v8AJL286DyQqI,6867
886
- helm/clients/vision_language/palmyra_vision_client.py,sha256=4elEdmwllMr2qzTzBdlRC8L5Ut3vOXFtanGGYrx4lv8,4074
887
- helm/clients/vision_language/qwen2_vlm_client.py,sha256=jvh_-jyvFL4r3LPX-gWPCYHT503JtJ73FVHQS2KyQ2c,8325
888
- helm/clients/vision_language/qwen_vlm_client.py,sha256=wNxEuYOrhjaW5s4vtdRxKvJ-LCTTGyKqiqD84j7H1Do,7565
883
+ helm/clients/vision_language/huggingface_vision2seq_client.py,sha256=dBf-tQJSwjHjZ3-eOaf2xfpltMRSWfczNLh7_OOPwVw,6640
884
+ helm/clients/vision_language/huggingface_vlm_client.py,sha256=3qQ4Ks2M-CWWn3h-Kl_xIO8Dyd_2Bc9wvvNkufyfMsA,5081
885
+ helm/clients/vision_language/idefics_client.py,sha256=7TNV7JTXeD6A5SZf9CtM7ugnd910B0-fFYhKxu2dA0Q,7836
886
+ helm/clients/vision_language/open_flamingo_client.py,sha256=w-bUzcSlwn_t_pX16HRFM9Vb3GhY3MhEPVShQKd0dKw,6558
887
+ helm/clients/vision_language/paligemma_client.py,sha256=9SW_QYJm8PDmQpT4iDkbIohbMv0f2-QacJpBRpDknN4,6919
888
+ helm/clients/vision_language/palmyra_vision_client.py,sha256=oUKfD_gJnWYs0l7u6axIt5xIoHwRkEMRK4agq50_JGc,4156
889
+ helm/clients/vision_language/qwen2_vlm_client.py,sha256=t_u36ZjZlORnCxaWsx8q5T2eAzKBW2VTRZAIYTHOf6s,8381
890
+ helm/clients/vision_language/qwen_vlm_client.py,sha256=VVhPVI0Xc2BuHQdWWc89jJTVXyscn62DxyzHpKmbmvs,7621
889
891
  helm/clients/vision_language/open_flamingo/__init__.py,sha256=RTxnxjYnTmTZv-608o66_W74qmKLpEO6hx0cxaZaYv8,172
890
892
  helm/clients/vision_language/open_flamingo/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
891
893
  helm/clients/vision_language/open_flamingo/src/factory.py,sha256=4KRXLV5mOEZ34-Foq2zVgTye3sQD-Buz6NZTSp2X9_A,5790
@@ -907,7 +909,7 @@ helm/common/critique_request.py,sha256=DZhJ_sY2IMluOxz-FeHvuEkA2Ujsx65HXT__7T3Ux
907
909
  helm/common/file_upload_request.py,sha256=OZeAW1_zsiNdXnWDwNNvhPs0b48TUmW_e4kzzCYmyiY,543
908
910
  helm/common/general.py,sha256=TcdPXn_bgPFvXtFP2lJhncz4Q8SdTXnKOinHOTBsegw,12027
909
911
  helm/common/gpu_utils.py,sha256=pmLq6ipYNLEm28VxxSNeZuVt-gAw-WnYmBvxP1P1p6M,480
910
- helm/common/hierarchical_logger.py,sha256=qIbhwh-dlCcnYG10qTSMxIMM7_Q9VJj8ymDqnWlseuo,6151
912
+ helm/common/hierarchical_logger.py,sha256=iGVHqCSOlVijjPNvzQDHOdxP8-2ll2PGA2Y5n-u4_sQ,6827
911
913
  helm/common/image_generation_parameters.py,sha256=nsbuk_-BlRMK6IwP5y6BnTXbTRTOcvZ6uLblL5VHLOo,916
912
914
  helm/common/images_utils.py,sha256=8BsN0fd8pc0rh_TSDvippWhTfwmJJXKNF2zqKLB8cps,3372
913
915
  helm/common/key_value_store.py,sha256=D9ZBORzZncf3zHQOP4AuNbQnV8cZpO_kqHY1mDRugqQ,3174
@@ -918,7 +920,7 @@ helm/common/mongo_key_value_store.py,sha256=G0TIWQcvwMjyXh4TnN6xJ462HKHUAZtQJJYQ
918
920
  helm/common/multimodal_request_utils.py,sha256=n6HgTyHNqfGmU9qmVK-wxQzrkPZ5Wdh-lO_y_ln6VYc,2184
919
921
  helm/common/nudity_check_request.py,sha256=VMsujI_RBy5u_cGEk0teE4KyX1dL2Zt3Pb4U6LpBdSY,728
920
922
  helm/common/object_spec.py,sha256=sKcEdggqRa3a8TovHAS4lf1LaahOFInvMl5DUF4tE6c,5186
921
- helm/common/optional_dependencies.py,sha256=Qam3QCHff8tuXbS-fCw-MVe-pK18gSvHw-uQoXXxT7M,616
923
+ helm/common/optional_dependencies.py,sha256=mM5qeuTq6-BiNJPjAsq29olq8_5TOVF-FIK0EeM25Po,618
922
924
  helm/common/perspective_api_request.py,sha256=WAVwtajNVmi5XJNsPcorGEAVrqkpPSk-Kd3b0hJghbA,2427
923
925
  helm/common/reeval_parameters.py,sha256=exaEucXnSI8a076uq_qhO3CTBztMMRoRzL_7v1N4adE,300
924
926
  helm/common/remote_context.py,sha256=DzFMii9AN03CoWp1J3k703-7oQJYHwEf9TDV5YzM6v4,2825
@@ -926,7 +928,7 @@ helm/common/request.py,sha256=HWj6IizIwJm9_NigO-geira_rI6aqhj5CevQB694m94,9161
926
928
  helm/common/response_format.py,sha256=wIptA8FydZoRjMvO5SFIplgDXhwpZvZmFI-Bi-7mcGU,516
927
929
  helm/common/test_cache.py,sha256=j19p-qzv_98X_TMW4b39ZHwSJ-MX3p91PrkYumarS6Y,4870
928
930
  helm/common/test_codec.py,sha256=igL--k-2DwAy0eoMr8D9Xs8MOjBoT0LutbMPzDlTNkM,5885
929
- helm/common/test_general.py,sha256=c8Lh0mK8I-SfcMprq909B6zWRBxSBngq2nNL1L6-cYA,1788
931
+ helm/common/test_general.py,sha256=ZPuRRkMG0gA95GOVxfd4xvtSV-1T09rBj3Huwi72-Ks,1909
930
932
  helm/common/test_logging.py,sha256=tkb_QDPkKBfaEQ5Y8Xip9PgMYhqOFakcENqyzO5Mj2o,2681
931
933
  helm/common/test_media_object.py,sha256=SUWLfms_vkXNivRYM0ZT8AI3_2ru6GON5l-Hb-lk-t0,1661
932
934
  helm/common/tokenization_request.py,sha256=NND9ESiiDE0H8QRNpfHVjXS7MQfKKIwtVRKDIjPnnJM,3344
@@ -935,9 +937,9 @@ helm/common/file_caches/file_cache.py,sha256=QfF1hlF8FQ-rcPn9Zyl6L0dOCokvYgd-dFq
935
937
  helm/common/file_caches/local_file_cache.py,sha256=NiXbat1BBGl5P27oERqSLFfhIHpYqA1IQrvE_N1sWR8,1944
936
938
  helm/common/file_caches/test_local_file_cache.py,sha256=ANb01ctUV-J4i1ab3l4uhg9Ce54U_56xq9Hayjt1WhQ,686
937
939
  helm/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
938
- helm/config/model_deployments.yaml,sha256=JGM4eLHXv3KgndTu2ZqnMH5rwvoXNvKAoTAnmfZDs7A,174425
939
- helm/config/model_metadata.yaml,sha256=8W9u04RugI_L6Kj3ipGqQlWLeXAd_FQwcw-2usKm5y4,274605
940
- helm/config/tokenizer_configs.yaml,sha256=KZ6nReCV6AoActBoQYfi9BH4eGYkSx4OmSa2gzWh0uo,41039
940
+ helm/config/model_deployments.yaml,sha256=DNY-6M7CVA0SSvS4nMNpK7zLAPhHDDR4NHimRrFrQjA,182596
941
+ helm/config/model_metadata.yaml,sha256=2IGQlmx6GWHveVjfSsSAn8WMdDLji5OD7d7BK4cDG7o,291347
942
+ helm/config/tokenizer_configs.yaml,sha256=TUuOUkORUVnpiHsPFO9BPcKTM3WPCaFcN8IzIBFsvkg,43555
941
943
  helm/proxy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
942
944
  helm/proxy/accounts.py,sha256=gd5cKhKeqklf_cXCAISl65AUvZeD6afBNrs6WK3IBvQ,14764
943
945
  helm/proxy/cli.py,sha256=kEDoHpisFO0EJ0Wfm1FLpJdP9sXk9j8WCILEq42RKb0,8317
@@ -998,8 +1000,8 @@ helm/tokenizers/yalm_tokenizer_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQ
998
1000
  helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py,sha256=1ZcPL3srfk031LmA8bEdPcIraAPnHGiYi_CqTiJSTlc,904
999
1001
  helm/tokenizers/yalm_tokenizer_data/voc_100b.sp,sha256=LmPD0_OIOXi8dWuNjXUYOSPhf8kPp2xhvK-g3bXcwrQ,2815034
1000
1002
  helm/tokenizers/yalm_tokenizer_data/yalm_tokenizer.py,sha256=kH5Qig1_6r_sKbAHinX7C83tqBUoTwbe-gGZCbGVkko,6389
1001
- crfm_helm-0.5.8.dist-info/METADATA,sha256=UCr1ojkpYEsbV8_KfuhviO1vhPRs0fnfz7ADVaqa32E,18414
1002
- crfm_helm-0.5.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1003
- crfm_helm-0.5.8.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
1004
- crfm_helm-0.5.8.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
1005
- crfm_helm-0.5.8.dist-info/RECORD,,
1003
+ crfm_helm-0.5.9.dist-info/METADATA,sha256=LuiU5r_-9KfusWA04IwPffmA5KfGYXwZvRR-noA20Ns,18617
1004
+ crfm_helm-0.5.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1005
+ crfm_helm-0.5.9.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
1006
+ crfm_helm-0.5.9.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
1007
+ crfm_helm-0.5.9.dist-info/RECORD,,