crfm-helm 0.5.7__py3-none-any.whl → 0.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (243) hide show
  1. {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.8.dist-info}/METADATA +5 -77
  2. {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.8.dist-info}/RECORD +228 -197
  3. helm/benchmark/adaptation/adapter_spec.py +5 -0
  4. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +11 -3
  5. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +11 -8
  6. helm/benchmark/annotation/aci_bench_annotator.py +11 -22
  7. helm/benchmark/annotation/alrage_annotator.py +90 -0
  8. helm/benchmark/annotation/chw_care_plan_annotator.py +10 -21
  9. helm/benchmark/annotation/dischargeme_annotator.py +11 -22
  10. helm/benchmark/annotation/med_dialog_annotator.py +11 -22
  11. helm/benchmark/annotation/medalign_annotator.py +11 -22
  12. helm/benchmark/annotation/medi_qa_annotator.py +11 -22
  13. helm/benchmark/annotation/medication_qa_annotator.py +11 -22
  14. helm/benchmark/annotation/mental_health_annotator.py +11 -22
  15. helm/benchmark/annotation/mimic_bhc_annotator.py +11 -22
  16. helm/benchmark/annotation/mimic_rrs_annotator.py +11 -22
  17. helm/benchmark/annotation/model_as_judge.py +23 -18
  18. helm/benchmark/annotation/mtsamples_procedures_annotator.py +11 -22
  19. helm/benchmark/annotation/mtsamples_replicate_annotator.py +11 -22
  20. helm/benchmark/annotation/starr_patient_instructions_annotator.py +11 -22
  21. helm/benchmark/metrics/air_bench_metrics.py +3157 -1
  22. helm/benchmark/metrics/alrage_metric.py +35 -0
  23. helm/benchmark/metrics/basic_metrics.py +267 -2
  24. helm/benchmark/metrics/classification_metrics.py +19 -1
  25. helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +12 -1
  26. helm/benchmark/metrics/dry_run_metrics.py +30 -1
  27. helm/benchmark/metrics/efficiency_metrics.py +74 -0
  28. helm/benchmark/metrics/ehr_sql_metrics.py +57 -1
  29. helm/benchmark/metrics/evaluate_reference_metrics.py +299 -0
  30. helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +13 -1
  31. helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +13 -1
  32. helm/benchmark/metrics/ifeval_metrics.py +13 -1
  33. helm/benchmark/metrics/instruction_following_critique_metrics.py +41 -1
  34. helm/benchmark/metrics/kpi_edgar_metrics.py +21 -0
  35. helm/benchmark/metrics/language_modeling_metrics.py +13 -1
  36. helm/benchmark/metrics/live_qa_metrics.py +13 -1
  37. helm/benchmark/metrics/llm_jury_metrics.py +13 -1
  38. helm/benchmark/metrics/medcalc_bench_metrics.py +14 -1
  39. helm/benchmark/metrics/medec_metrics.py +25 -2
  40. helm/benchmark/metrics/metric.py +25 -0
  41. helm/benchmark/metrics/mimiciv_billing_code_metrics.py +32 -1
  42. helm/benchmark/metrics/omni_math_metrics.py +13 -1
  43. helm/benchmark/metrics/seahelm_metrics.py +14 -1
  44. helm/benchmark/metrics/summac/model_summac.py +2 -2
  45. helm/benchmark/metrics/summarization_metrics.py +129 -1
  46. helm/benchmark/metrics/toxicity_metrics.py +31 -1
  47. helm/benchmark/metrics/wildbench_metrics.py +21 -1
  48. helm/benchmark/presentation/schema.py +5 -22
  49. helm/benchmark/presentation/summarize.py +180 -11
  50. helm/benchmark/presentation/taxonomy_info.py +20 -0
  51. helm/benchmark/run_expander.py +4 -0
  52. helm/benchmark/run_specs/arabic_run_specs.py +134 -16
  53. helm/benchmark/run_specs/bluex_run_specs.py +1 -1
  54. helm/benchmark/run_specs/classic_run_specs.py +2 -2
  55. helm/benchmark/run_specs/long_context_run_specs.py +2 -2
  56. helm/benchmark/run_specs/medhelm/__init__.py +0 -0
  57. helm/benchmark/run_specs/medhelm/benchmark_config.py +219 -0
  58. helm/benchmark/run_specs/medhelm_run_specs.py +360 -50
  59. helm/benchmark/scenarios/aci_bench_scenario.py +23 -0
  60. helm/benchmark/scenarios/air_bench_scenario.py +21 -0
  61. helm/benchmark/scenarios/alrage_scenario.py +54 -0
  62. helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +23 -1
  63. helm/benchmark/scenarios/arabic_exams_scenario.py +114 -0
  64. helm/benchmark/scenarios/arabic_mmlu_scenario.py +8 -4
  65. helm/benchmark/scenarios/aratrust_scenario.py +19 -0
  66. helm/benchmark/scenarios/babi_qa_scenario.py +15 -0
  67. helm/benchmark/scenarios/bbq_scenario.py +15 -0
  68. helm/benchmark/scenarios/best_chatgpt_prompts.yaml +473 -0
  69. helm/benchmark/scenarios/bluex_scenario.py +6 -2
  70. helm/benchmark/scenarios/bold_scenario.py +15 -0
  71. helm/benchmark/scenarios/boolq_scenario.py +20 -0
  72. helm/benchmark/scenarios/chw_care_plan_scenario.py +23 -0
  73. helm/benchmark/scenarios/civil_comments_scenario.py +13 -0
  74. helm/benchmark/scenarios/clear_scenario.py +23 -0
  75. helm/benchmark/scenarios/cleva_scenario.py +479 -0
  76. helm/benchmark/scenarios/code_scenario.py +28 -0
  77. helm/benchmark/scenarios/commonsense_scenario.py +26 -0
  78. helm/benchmark/scenarios/compositional_instructions.yaml +70 -0
  79. helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +21 -0
  80. helm/benchmark/scenarios/copyright_scenario.py +35 -1
  81. helm/benchmark/scenarios/cti_to_mitre_scenario.py +21 -0
  82. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +22 -1
  83. helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +23 -1
  84. helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +22 -1
  85. helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +21 -1
  86. helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +13 -0
  87. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +13 -1
  88. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +13 -1
  89. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +13 -1
  90. helm/benchmark/scenarios/dischargeme_scenario.py +24 -0
  91. helm/benchmark/scenarios/disinformation_scenario.py +22 -0
  92. helm/benchmark/scenarios/dyck_language_scenario.py +15 -0
  93. helm/benchmark/scenarios/ehrshot_scenario.py +22 -0
  94. helm/benchmark/scenarios/enem_challenge_scenario.py +19 -0
  95. helm/benchmark/scenarios/entity_data_imputation_scenario.py +14 -0
  96. helm/benchmark/scenarios/entity_matching_scenario.py +14 -0
  97. helm/benchmark/scenarios/financial_phrasebank_scenario.py +21 -0
  98. helm/benchmark/scenarios/gold_commodity_news_scenario.py +21 -0
  99. helm/benchmark/scenarios/gpqa_scenario.py +18 -0
  100. helm/benchmark/scenarios/grammar_scenario.py +20 -1
  101. helm/benchmark/scenarios/gsm_scenario.py +15 -0
  102. helm/benchmark/scenarios/headqa_scenario.py +22 -0
  103. helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +13 -0
  104. helm/benchmark/scenarios/ice_scenario.py +21 -1
  105. helm/benchmark/scenarios/ifeval_scenario.py +18 -0
  106. helm/benchmark/scenarios/imdb_scenario.py +15 -0
  107. helm/benchmark/scenarios/koala_scenario.py +21 -1
  108. helm/benchmark/scenarios/kpi_edgar_scenario.py +21 -0
  109. helm/benchmark/scenarios/legal_contract_summarization_scenario.py +20 -0
  110. helm/benchmark/scenarios/legal_summarization_scenario.py +50 -0
  111. helm/benchmark/scenarios/legal_support_scenario.py +13 -0
  112. helm/benchmark/scenarios/legalbench_scenario.py +20 -0
  113. helm/benchmark/scenarios/lex_glue_scenario.py +11 -0
  114. helm/benchmark/scenarios/lextreme_scenario.py +11 -0
  115. helm/benchmark/scenarios/lsat_qa_scenario.py +14 -0
  116. helm/benchmark/scenarios/madinah_qa_scenario.py +73 -0
  117. helm/benchmark/scenarios/math_scenario.py +26 -0
  118. helm/benchmark/scenarios/mbzuai_human_translated_arabic_mmlu.py +68 -0
  119. helm/benchmark/scenarios/med_dialog_scenario.py +32 -1
  120. helm/benchmark/scenarios/med_mcqa_scenario.py +14 -0
  121. helm/benchmark/scenarios/med_qa_scenario.py +14 -0
  122. helm/benchmark/scenarios/medalign_scenario.py +23 -0
  123. helm/benchmark/scenarios/medbullets_scenario.py +22 -0
  124. helm/benchmark/scenarios/medcalc_bench_scenario.py +22 -0
  125. helm/benchmark/scenarios/medec_scenario.py +23 -0
  126. helm/benchmark/scenarios/medhallu_scenario.py +23 -0
  127. helm/benchmark/scenarios/medhelm/__init__.py +0 -0
  128. helm/benchmark/scenarios/medhelm/judges.yaml +14 -0
  129. helm/benchmark/scenarios/medhelm_configurable_scenario.py +101 -0
  130. helm/benchmark/scenarios/medi_qa_scenario.py +23 -0
  131. helm/benchmark/scenarios/medication_qa_scenario.py +31 -1
  132. helm/benchmark/scenarios/mental_health_scenario.py +23 -0
  133. helm/benchmark/scenarios/mimic_bhc_scenario.py +24 -0
  134. helm/benchmark/scenarios/mimic_rrs_scenario.py +23 -0
  135. helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +22 -0
  136. helm/benchmark/scenarios/mmlu_pro_scenario.py +18 -0
  137. helm/benchmark/scenarios/mmlu_scenario.py +15 -0
  138. helm/benchmark/scenarios/msmarco_scenario.py +30 -0
  139. helm/benchmark/scenarios/mtsamples_procedures_scenario.py +22 -0
  140. helm/benchmark/scenarios/mtsamples_replicate_scenario.py +22 -0
  141. helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +20 -0
  142. helm/benchmark/scenarios/narrativeqa_scenario.py +20 -0
  143. helm/benchmark/scenarios/natural_qa_scenario.py +32 -0
  144. helm/benchmark/scenarios/omni_math_scenario.py +18 -0
  145. helm/benchmark/scenarios/open_assistant_scenario.py +22 -0
  146. helm/benchmark/scenarios/pubmed_qa_scenario.py +22 -0
  147. helm/benchmark/scenarios/quac_scenario.py +14 -0
  148. helm/benchmark/scenarios/race_based_med_scenario.py +23 -0
  149. helm/benchmark/scenarios/raft_scenario.py +15 -0
  150. helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +14 -1
  151. helm/benchmark/scenarios/scenario.py +31 -0
  152. helm/benchmark/scenarios/seahelm_scenario.py +348 -0
  153. helm/benchmark/scenarios/self_instruct_scenario.py +29 -1
  154. helm/benchmark/scenarios/shc_bmt_scenario.py +22 -0
  155. helm/benchmark/scenarios/shc_cdi_scenario.py +20 -0
  156. helm/benchmark/scenarios/shc_conf_scenario.py +23 -0
  157. helm/benchmark/scenarios/shc_ent_scenario.py +21 -0
  158. helm/benchmark/scenarios/shc_gip_scenario.py +20 -0
  159. helm/benchmark/scenarios/shc_privacy_scenario.py +22 -0
  160. helm/benchmark/scenarios/shc_proxy_scenario.py +22 -0
  161. helm/benchmark/scenarios/shc_ptbm_scenario.py +23 -0
  162. helm/benchmark/scenarios/shc_sequoia_scenario.py +21 -0
  163. helm/benchmark/scenarios/situation_prompts.yaml +49 -0
  164. helm/benchmark/scenarios/starr_patient_instructions_scenario.py +22 -0
  165. helm/benchmark/scenarios/summarization_scenario.py +37 -0
  166. helm/benchmark/scenarios/synthetic_efficiency_scenario.py +22 -1
  167. helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +13 -0
  168. helm/benchmark/scenarios/test_alrage_scenario.py +23 -0
  169. helm/benchmark/scenarios/test_arabic_exams_scenario.py +21 -0
  170. helm/benchmark/scenarios/test_aratrust_scenario.py +1 -1
  171. helm/benchmark/scenarios/test_bluex_scenario.py +2 -2
  172. helm/benchmark/scenarios/the_pile_scenario.py +13 -1
  173. helm/benchmark/scenarios/truthful_qa_scenario.py +14 -0
  174. helm/benchmark/scenarios/twitter_aae_scenario.py +20 -1
  175. helm/benchmark/scenarios/vicuna_scenario.py +21 -1
  176. helm/benchmark/scenarios/wikifact_scenario.py +20 -0
  177. helm/benchmark/scenarios/wildbench_scenario.py +18 -0
  178. helm/benchmark/scenarios/wmt_14_scenario.py +12 -0
  179. helm/benchmark/static/schema_arabic.yaml +55 -12
  180. helm/benchmark/static/schema_long_context.yaml +17 -17
  181. helm/benchmark/static/schema_medhelm.yaml +36 -0
  182. helm/benchmark/static/schema_slp.yaml +219 -0
  183. helm/benchmark/static_build/assets/index-671a5e06.js +10 -0
  184. helm/benchmark/static_build/assets/index-9352595e.css +1 -0
  185. helm/benchmark/static_build/index.html +2 -2
  186. helm/clients/audio_language/llama_omni/arguments.py +61 -0
  187. helm/clients/audio_language/llama_omni/constants.py +9 -0
  188. helm/clients/audio_language/llama_omni/conversation.py +213 -0
  189. helm/clients/audio_language/llama_omni/model/__init__.py +0 -0
  190. helm/clients/audio_language/llama_omni/model/builder.py +88 -0
  191. helm/clients/audio_language/llama_omni/model/language_model/omni_speech2s_llama.py +190 -0
  192. helm/clients/audio_language/llama_omni/model/language_model/omni_speech_llama.py +118 -0
  193. helm/clients/audio_language/llama_omni/model/omni_speech_arch.py +249 -0
  194. helm/clients/audio_language/llama_omni/model/speech_encoder/builder.py +9 -0
  195. helm/clients/audio_language/llama_omni/model/speech_encoder/speech_encoder.py +27 -0
  196. helm/clients/audio_language/llama_omni/model/speech_generator/builder.py +9 -0
  197. helm/clients/audio_language/llama_omni/model/speech_generator/generation.py +622 -0
  198. helm/clients/audio_language/llama_omni/model/speech_generator/speech_generator.py +104 -0
  199. helm/clients/audio_language/llama_omni/model/speech_projector/builder.py +9 -0
  200. helm/clients/audio_language/llama_omni/model/speech_projector/speech_projector.py +27 -0
  201. helm/clients/audio_language/llama_omni/preprocess.py +295 -0
  202. helm/clients/audio_language/llama_omni/utils.py +202 -0
  203. helm/clients/audio_language/qwen_omni/configuration_qwen2_5_omni.py +519 -0
  204. helm/clients/audio_language/qwen_omni/modeling_qwen2_5_omni.py +4308 -0
  205. helm/clients/audio_language/qwen_omni/processing_qwen2_5_omni.py +270 -0
  206. helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/__init__.py +0 -0
  207. helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/__init__.py +8 -0
  208. helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/audio_process.py +56 -0
  209. helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/vision_process.py +380 -0
  210. helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +1 -1
  211. helm/clients/image_generation/mindalle/models/stage1/layers.py +2 -2
  212. helm/clients/openai_client.py +31 -19
  213. helm/clients/openai_responses_client.py +27 -3
  214. helm/clients/openrouter_client.py +31 -0
  215. helm/clients/test_openrouter_client.py +69 -0
  216. helm/clients/together_client.py +48 -11
  217. helm/clients/vertexai_client.py +8 -2
  218. helm/config/model_deployments.yaml +75 -1
  219. helm/config/model_metadata.yaml +70 -2
  220. helm/config/tokenizer_configs.yaml +19 -1
  221. helm/proxy/example_queries.py +8 -8
  222. helm/proxy/server.py +2 -1
  223. helm/proxy/static/index.css +4 -0
  224. helm/proxy/static/index.js +7 -1
  225. helm/benchmark/metrics/aci_bench_metrics.py +0 -14
  226. helm/benchmark/metrics/chw_care_plan_metrics.py +0 -14
  227. helm/benchmark/metrics/dischargeme_metrics.py +0 -14
  228. helm/benchmark/metrics/med_dialog_metrics.py +0 -14
  229. helm/benchmark/metrics/medalign_metrics.py +0 -14
  230. helm/benchmark/metrics/medi_qa_metrics.py +0 -14
  231. helm/benchmark/metrics/medication_qa_metrics.py +0 -14
  232. helm/benchmark/metrics/mental_health_metrics.py +0 -14
  233. helm/benchmark/metrics/mimic_bhc_metrics.py +0 -14
  234. helm/benchmark/metrics/mimic_rrs_metrics.py +0 -14
  235. helm/benchmark/metrics/mtsamples_procedures_metrics.py +0 -14
  236. helm/benchmark/metrics/mtsamples_replicate_metrics.py +0 -14
  237. helm/benchmark/metrics/starr_patient_instructions_metrics.py +0 -14
  238. helm/benchmark/static_build/assets/index-b9779128.css +0 -1
  239. helm/benchmark/static_build/assets/index-e439d5e1.js +0 -10
  240. {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.8.dist-info}/WHEEL +0 -0
  241. {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.8.dist-info}/entry_points.txt +0 -0
  242. {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.8.dist-info}/licenses/LICENSE +0 -0
  243. {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.8.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- crfm_helm-0.5.7.dist-info/licenses/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
1
+ crfm_helm-0.5.8.dist-info/licenses/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
2
2
  helm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  helm/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  helm/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -13,7 +13,7 @@ helm/benchmark/multi_gpu_runner.py,sha256=WmTKpVfcKXyiiPzrmxpbvQoZy0Ua8IyPgxB8r_
13
13
  helm/benchmark/reeval_run.py,sha256=vImL8JNhveEOftZbRQ6JAxF0L-XCKIwh65M6fIYo4RU,7198
14
14
  helm/benchmark/reeval_runner.py,sha256=bJPl7XVOVwK2fUA7voOVQYwVFEOfKVnrT2tbSGQzQY8,15584
15
15
  helm/benchmark/run.py,sha256=ZyqkKnqkMqM2AH4HL6sH72H8-mrDWu0NW0piE7BY0HM,13973
16
- helm/benchmark/run_expander.py,sha256=hKFLpmq8W2KBl_mBf-ahHEbt67qZFgu-VxjvidOeQuE,56543
16
+ helm/benchmark/run_expander.py,sha256=IMPhg16Yd3diaFRLGYcLCXGO4L_B2WXW69oZP0fx6lE,56857
17
17
  helm/benchmark/run_spec.py,sha256=GiIU8iGO2FGYFDWIxt51CeNPsW7rM7BzDqH1KgEL1cg,3217
18
18
  helm/benchmark/run_spec_factory.py,sha256=Hxeft3fXoWNz9yGo-2nIfb5pd3GDWlwYWc6YYvAkTjM,7785
19
19
  helm/benchmark/runner.py,sha256=O-91eRRrNgE4_tlCVeLq9_0QsRfNELvaQT-KWtJw894,14618
@@ -25,7 +25,7 @@ helm/benchmark/test_data_preprocessor.py,sha256=_esdtkqyU_8Yp5ZOO7n1b-Y4Qc28wpD5
25
25
  helm/benchmark/test_run_expander.py,sha256=gLeHkNt_nLgbwEJiYxhwda-eKA3sJAxkYolCvgRN5TY,1163
26
26
  helm/benchmark/tokenizer_config_registry.py,sha256=ZOImg38ta0FXZYAWna6q7A5xrG2mU7Ofr-8j4EqGlUY,1585
27
27
  helm/benchmark/adaptation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
- helm/benchmark/adaptation/adapter_spec.py,sha256=WrDOvQoeV5Ciw2bmvtnz6HTCAEfjCHTYgfTZwRZzkN8,5680
28
+ helm/benchmark/adaptation/adapter_spec.py,sha256=mfqU5lkvN2UOOUrldgTNq_u8iqRajagvzimyGWQhPQs,6054
29
29
  helm/benchmark/adaptation/common_adapter_specs.py,sha256=V8aYhQYuwohzwW0T_IU_ymGlxEwARKIiChLvwLKt-ew,12553
30
30
  helm/benchmark/adaptation/prompt.py,sha256=vPCFeKVUwpbnTe0IbphkyAKFkkM0YnEONfvjcb8Hj50,2158
31
31
  helm/benchmark/adaptation/request_state.py,sha256=WAPyubn35on-Ry7xKpXsVz3wYBMCMc_LidDOdcKxatI,3053
@@ -40,7 +40,7 @@ helm/benchmark/adaptation/adapters/generation_adapter.py,sha256=LI7uWpKIHvTUjGiy
40
40
  helm/benchmark/adaptation/adapters/in_context_learning_adapter.py,sha256=8LepCkI5b0MOL70pRPGb7vEH0KFMxIlpCQIVIzQT_vE,15030
41
41
  helm/benchmark/adaptation/adapters/language_modeling_adapter.py,sha256=u_GFEgg5wmpate-s5U5aMsmcHuFmreJcA8J0TO1kPCc,14907
42
42
  helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py,sha256=-fY4mvzoGCCoR0HesT_xf2U2m2arVjgDuj59lm07_tg,1923
43
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=lzmHwvDOHWl9IWC3NTLGfJDbduXtK_zrS2_YoUQmdc8,4464
43
+ helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=nOCuX9lFKb3BHpznhTwpNCO0YsZBNhcMYuFnsLT_u-s,4579
44
44
  helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py,sha256=RV6B3i5juBbJCtPDWzSfma49YXeDq3vQAQ5xQwnH-cA,3282
45
45
  helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py,sha256=hhH9ehK092j1WdUwrKYSy5PvNJ73gsIu6-5W8aLoYVI,2190
46
46
  helm/benchmark/adaptation/adapters/test_adapter.py,sha256=7Nr6kMK3JN0UjMjjZ6P1fsD5xhOeaqh0D1xI6LFKCos,641
@@ -51,12 +51,13 @@ helm/benchmark/adaptation/adapters/multimodal/__init__.py,sha256=47DEQpj8HBSa-_T
51
51
  helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py,sha256=MvE7YdIt8Y0nefXLskY9gPmXp7QWi2b8cqg8fxUpzbM,1980
52
52
  helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py,sha256=KXP9MzDdmUao3uVjPgZYKjZQ_LvGHgZvI-86o3E87xA,6404
53
53
  helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py,sha256=jyL61UxBsIr68hUz-jtjBUnyB2HBp5ESNyECGp_Gf6Q,2129
54
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py,sha256=ftwSOTPugDuw8vh2WaQDJb0tQAeWR7S7qtD4yE_nOt4,4804
54
+ helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py,sha256=GP2Fg1kW0-5jCkjgzVkhuN7YBQFyFgQpPTfpSgfbAvk,5178
55
55
  helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py,sha256=mjjyn9p31V-yt6S8BX7SvqvkQ56D9cKSff6d-daM6HM,10250
56
56
  helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py,sha256=6nuz0Vn89A1mOedutsiq2SwTOG3qn8dUZTiaXhKffiw,3587
57
57
  helm/benchmark/annotation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
- helm/benchmark/annotation/aci_bench_annotator.py,sha256=SjXidlbpm5HOhdhNXg3HjabMEQvt3hq1iJ5GPajxt8M,3228
58
+ helm/benchmark/annotation/aci_bench_annotator.py,sha256=aAzXqbjj_3bv0-ATCrFu4JvrsqORE5lkYpgxtXAEGSA,2777
59
59
  helm/benchmark/annotation/air_bench_annotator.py,sha256=CDyHVwD4eoymfLduJC5WvvhDX1DOgYBqgjvqBjoCfU8,3501
60
+ helm/benchmark/annotation/alrage_annotator.py,sha256=3DcHbD8WXTg5PN3feipHTsFls0v5owMyb_rqpNWokls,3531
60
61
  helm/benchmark/annotation/annotator.py,sha256=__BkMVpAEpSs1pbwPK5sVWLdCAXnjsHcPYgmOqmNPu0,1843
61
62
  helm/benchmark/annotation/annotator_factory.py,sha256=8uo5uz1UpIVCHUd7CRvmy6b9XB1gspdHmgxH5UZMPVI,2335
62
63
  helm/benchmark/annotation/anthropic_red_team_annotator.py,sha256=4hob15m2k9e2A97E0aG9FstCbJ_oMM7-9y-nh2EaYqc,2395
@@ -65,28 +66,28 @@ helm/benchmark/annotation/autobencher_safety_annotator.py,sha256=w_xjZmY1zuLjVvV
65
66
  helm/benchmark/annotation/bigcodebench_annotator.py,sha256=CJG2pn1DeHJCp3yHETRquNIkCHfd6ZNuOiUjG1cQ_JY,4448
66
67
  helm/benchmark/annotation/bird_sql_annotator.py,sha256=FQDZs1-O1jfJOET0eDeU7lf5xLaiMPohC5BdmQ4XkzI,2436
67
68
  helm/benchmark/annotation/call_center_annotator.py,sha256=pTEjwfA4tgZhroFbamoQ8IO_D1O9r6k5GIlD50JEg5c,11601
68
- helm/benchmark/annotation/chw_care_plan_annotator.py,sha256=6ybNBvJi59i0cpAhI_fLwXoSnqhAH6m7Lo6ad_PufBs,2966
69
+ helm/benchmark/annotation/chw_care_plan_annotator.py,sha256=R6Hexh20T6WBBRBhwLhQv_IQvW7Z55Pf9IYBCWxUTaQ,2517
69
70
  helm/benchmark/annotation/czech_bank_qa_annotator.py,sha256=YIH5g4zHe3BQF2Y-6uRVw7g9u_SPBncqBobdvZdIzyA,3096
70
- helm/benchmark/annotation/dischargeme_annotator.py,sha256=Z6xnUK1cNrFco9x0w8B_qhlLOEZrzXBwT6TKZPKoPBk,3676
71
+ helm/benchmark/annotation/dischargeme_annotator.py,sha256=blP76BgwmbHDDDRdaaGwtTHfukCvXXLN72vjGj_LI_U,3225
71
72
  helm/benchmark/annotation/ehr_sql_annotator.py,sha256=Izpq0biZ9lkJOPk6NwTuv2wk8Bg88vj56BKZrY8XhT4,4021
72
73
  helm/benchmark/annotation/financebench_annotator.py,sha256=gNERLY35t2kcpayXGGrY4-pBs2jbEUomqElRYbb9nho,4150
73
74
  helm/benchmark/annotation/harm_bench_annotator.py,sha256=zhkWnV3qZgY-nvHgQRHGrrCMC7605JwFHesY7UC3ZnQ,2293
74
75
  helm/benchmark/annotation/helpdesk_call_summarization_annotator.py,sha256=I7TjpN502Sa-Z4uUKemJXSAdOiVA3MMO92YIAAXeDBg,6034
75
76
  helm/benchmark/annotation/live_qa_annotator.py,sha256=PSff59mU_t3ypmptYsYRKU3m1vMLF0dMyUySIOxBrPw,3553
76
- helm/benchmark/annotation/med_dialog_annotator.py,sha256=OVTFIlvdhcOr_hdK0tnrDes9hYdN1mDWFTp4GDYY7O0,3162
77
- helm/benchmark/annotation/medalign_annotator.py,sha256=8edAZh8oQgDKUT1bQ3Hp2NBE-QnBZ_-ZQjHkV7YKWhs,3240
78
- helm/benchmark/annotation/medi_qa_annotator.py,sha256=v8e6hkHZX1x9KtTedCnpCseh-Y72z5kUgUrXHWPUkX8,3074
79
- helm/benchmark/annotation/medication_qa_annotator.py,sha256=uZ3VpJ0nsDyF70_kn8kSSBPr4OlfiNdZC7q8wq_jJFE,3090
80
- helm/benchmark/annotation/mental_health_annotator.py,sha256=JwgSeXtwf4KFZxNtAxsnqdLJQSvP-F-ZoCcCWdasrMQ,3275
81
- helm/benchmark/annotation/mimic_bhc_annotator.py,sha256=pwwniNlu5VTa1ZdyO0KFcMFZcpqM5CjguujgSpEGslw,3174
82
- helm/benchmark/annotation/mimic_rrs_annotator.py,sha256=zABO1FJH9pOFhUe5vc2B-c14Hf5RsuU9jQAGiMg6G0I,3204
83
- helm/benchmark/annotation/model_as_judge.py,sha256=FIJOUzIhf2QpxqFf6hjgAM5hPEm0VlXzB-jiHJUrPDs,11985
84
- helm/benchmark/annotation/mtsamples_procedures_annotator.py,sha256=qqWHY2HfCwMP5GqvObS3JpMIYVs4yyITCsA1B7lcDks,3201
85
- helm/benchmark/annotation/mtsamples_replicate_annotator.py,sha256=TUxNzJcItErsw0gw76hiKZAWeQTNHGHnC0qf-_CGeF0,3316
77
+ helm/benchmark/annotation/med_dialog_annotator.py,sha256=uGp8d74WGgOOiexpoKj5CMdr5jOvAnfe-ZLKGSHT6ng,2711
78
+ helm/benchmark/annotation/medalign_annotator.py,sha256=glAPpVdIfebm39GhrBY3BE2hdofVBIBXUxPU3_qqZOw,2789
79
+ helm/benchmark/annotation/medi_qa_annotator.py,sha256=bLXxXe-obPvud15sPrqp9i-wSq1QqguCPt_UJaXRz_I,2623
80
+ helm/benchmark/annotation/medication_qa_annotator.py,sha256=98XU2VVSoQ8XlAkuVKWnNBOS76X_lIviq_A-nyrlqcw,2639
81
+ helm/benchmark/annotation/mental_health_annotator.py,sha256=08b_XqgfSpIhutDUaaSgVRdiZB6metAQQ_WHF8U2-c0,2824
82
+ helm/benchmark/annotation/mimic_bhc_annotator.py,sha256=a9AHMFY2shV4I2qVUfKnOvZFbmQjL5vPKsbytTBfU0A,2723
83
+ helm/benchmark/annotation/mimic_rrs_annotator.py,sha256=eu9rZhRAXVbo0j7BP7vuAKwGkuwhTCvVRvJ4dPbcR4I,2753
84
+ helm/benchmark/annotation/model_as_judge.py,sha256=eZZlyCrW6U9a8bHhaPrbV1AJ23q3uP0ho1NbVErGBXs,12160
85
+ helm/benchmark/annotation/mtsamples_procedures_annotator.py,sha256=ZgJVtNpab3BrMs0ZXFW6L0CNp1Hcqfgv7FHP4rpxFPg,2750
86
+ helm/benchmark/annotation/mtsamples_replicate_annotator.py,sha256=VtHiEGFZLUsd3zkgnSoti5itZnDPgERMPZlORkEp7ok,2865
86
87
  helm/benchmark/annotation/omni_math_annotator.py,sha256=PvZZb1oGw60qT-oHRIs93AZbh5wTbpsmD8BforudFhA,6144
87
88
  helm/benchmark/annotation/simple_safety_tests_annotator.py,sha256=if4S8MaENr1HZ42ZsOjDPXZ-kJ0p4l4B2j9m994RuxQ,2140
88
89
  helm/benchmark/annotation/spider_annotator.py,sha256=B48ylGg5J7xuTSUio7VztdXk3lI6ilMqrUvAD-ve0sE,621
89
- helm/benchmark/annotation/starr_patient_instructions_annotator.py,sha256=5jU-dK_0OvB_jXNLDZtQ5E3gaSUcAxFNzv6prA17eAg,3186
90
+ helm/benchmark/annotation/starr_patient_instructions_annotator.py,sha256=Te9rQhcUV-T2I4oBCBzInAZW65EV3lv0LXLPgGzLd8c,2735
90
91
  helm/benchmark/annotation/test_annotator_factory.py,sha256=ifv5hxSbFe113AHeXLqTPkVJ-C2PW_gb9L3a0SHNi-M,986
91
92
  helm/benchmark/annotation/test_dummy_annotator.py,sha256=LfY1ErJDUJ7rD8JUy92RUDD1b91jUs4Nk8Gvope-Z98,1644
92
93
  helm/benchmark/annotation/wildbench_annotator.py,sha256=OXR59zdKw9W7v3Q_sFnt1cEPN3nOzQDVqSbh4jDbEUs,5457
@@ -126,17 +127,16 @@ helm/benchmark/efficiency_data/inference_denoised_runtimes.json,sha256=ios_dt-_8
126
127
  helm/benchmark/efficiency_data/inference_idealized_runtimes.json,sha256=5w7reeZc0yc4cjH8kJGxQQSoe8yaRVX2SSlSrx0QWFQ,12348
127
128
  helm/benchmark/efficiency_data/training_efficiency.json,sha256=aH2moiBLStOLVi8Ci2KTK5ZkWlTBLK-B3fRfNZwhoSg,9763
128
129
  helm/benchmark/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
129
- helm/benchmark/metrics/aci_bench_metrics.py,sha256=fAuTm8Sr1vvyd7Tjcz9WWKrFkqrwCV-CiF6lqUO3dKU,442
130
- helm/benchmark/metrics/air_bench_metrics.py,sha256=VMNQDDEtz2CiK4U55lCHLz0b_DxHprTAZ1WtYtGXjcY,2282
130
+ helm/benchmark/metrics/air_bench_metrics.py,sha256=WvfjjHLSE567Y7BC8tGlMINBwP-d1URRUZcMUF1yf1g,171277
131
+ helm/benchmark/metrics/alrage_metric.py,sha256=4QHtL00aEIRYQx2QkDs5uldu7ZAkbFYMALH6DL9LSJg,1233
131
132
  helm/benchmark/metrics/annotation_metrics.py,sha256=JbXNleQsPJVF2uc1xXgUW2bzvJqwLPZyhnndqc6THv0,4268
132
- helm/benchmark/metrics/basic_metrics.py,sha256=d0iwYnwrbF7w7CFtazx8vPIsZnj51U2PVVoscCb-HJA,20495
133
+ helm/benchmark/metrics/basic_metrics.py,sha256=3y1M0mFJL8FlkMkQWWs4ZV2NiriaMGydddbeY3F-vXk,30547
133
134
  helm/benchmark/metrics/bbq_metrics.py,sha256=GeZhSSJzqGD0e5EAiRHitIC3XtPICF7rDI6GfeYQc8E,6201
134
135
  helm/benchmark/metrics/bias_metrics.py,sha256=8qcInRJwQsuCI-lMC1umd-ZZaYvorUPrMjnuC6vSeb4,11602
135
136
  helm/benchmark/metrics/bias_word_lists.py,sha256=eyk6we2J4SW8ZaZxQUWLB7Yapn92uM5TCekhFB5vg-U,13908
136
137
  helm/benchmark/metrics/bigcodebench_metrics.py,sha256=JcPZrSiHR-kxT-MFM8zXqOs6wTC5Hus3TbxuHFQVZow,860
137
138
  helm/benchmark/metrics/bird_sql_metrics.py,sha256=ooCuXW5nPpRs_-4seCONQmn25DzTbcUgGXznXTK9y0Y,1153
138
- helm/benchmark/metrics/chw_care_plan_metrics.py,sha256=WOAdwuF4vusZhjaXSAB3r7PD_ZxeNmVu2oAmOqzVLtU,460
139
- helm/benchmark/metrics/classification_metrics.py,sha256=1Xa_bO4PqIAV2iZitE69kc4VKS4A7PloG5ElZAgvmh8,8851
139
+ helm/benchmark/metrics/classification_metrics.py,sha256=CfkyMiiWo74VbIB7eEhNxIcPbGA_imbzETrAExqn5WM,9498
140
140
  helm/benchmark/metrics/cleva_accuracy_metrics.py,sha256=1eDxHxVk-JW1mF9SBcuplIefAoi_edUwKpp-XxYbmeU,2740
141
141
  helm/benchmark/metrics/cleva_harms_metrics.py,sha256=xVubv2pG3iinVs3namoVHWAmV9oUPywZwFB_0JGhP_w,11277
142
142
  helm/benchmark/metrics/cleva_metrics_helper.py,sha256=8UwiGhekUmp7DxYWU4rxqX2v3ewkg-O5-jOh49iOGmc,304
@@ -149,57 +149,47 @@ helm/benchmark/metrics/codeinsights_edge_case_metrics.py,sha256=B7EEELwwH67Vxmgr
149
149
  helm/benchmark/metrics/codeinsights_metric_specs.py,sha256=BkKWII9yTkChdZVsGeeeCbiWQDYvvcAKo0nxi_RTTUk,1798
150
150
  helm/benchmark/metrics/comet_metric.py,sha256=EJWZ9x8CGeDDQlfxYrY-np_NVJBt5gun0XLJvtpjXVI,4798
151
151
  helm/benchmark/metrics/common_metric_specs.py,sha256=JKqmO4ovBdfOYKC-00OSzOMv--g9NTCVfUHLaz-1Uns,6025
152
- helm/benchmark/metrics/conv_fin_qa_calc_metrics.py,sha256=Zrf6HyH_WNe7gGFgW0j8FJlX5KZvbk-05iX8QFPJDyU,2656
152
+ helm/benchmark/metrics/conv_fin_qa_calc_metrics.py,sha256=F2bfg8XbjH3WOQ0O_c5S7UUxgpzu7AD5wRtNdNcJlUs,2997
153
153
  helm/benchmark/metrics/copyright_metrics.py,sha256=RYOWKFN97UCD2Vj51gzKGbnnY9wAq6KJgiRt2cecVfs,7824
154
154
  helm/benchmark/metrics/czech_bank_qa_metrics.py,sha256=bKoooK2T5v_fFKNbUnsuW6Mv9muAirJD5lTrzuHfpz8,1113
155
155
  helm/benchmark/metrics/decodingtrust_fairness_metrics.py,sha256=x66XP0iQGk4ThT7ddmrlLCA0XF4arRbQMDT42LHf2kE,3297
156
156
  helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py,sha256=TxTkkWdx6d6ym0MirZTiucl_TWFdn4uJLnlTfLjQvgk,2925
157
157
  helm/benchmark/metrics/decodingtrust_privacy_metrics.py,sha256=OU7lka-hm6PubR5Gjj4uNyrqhjlfhe0mmjBCAz9vlRs,3456
158
158
  helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py,sha256=bW4zafRyKFa__8fGrdiTPUu848ovNnvakLCfqcMrcHk,6461
159
- helm/benchmark/metrics/dischargeme_metrics.py,sha256=D8LI52E17hNSPDpEvb2tw1za4QWDE3p9xgx7Nm9l7_Y,454
160
159
  helm/benchmark/metrics/disinformation_metrics.py,sha256=5n8wgRBb6FaDjqe1nR3Cj9aS48esmMsIUq4KpBHoQoU,7870
161
- helm/benchmark/metrics/dry_run_metrics.py,sha256=Ss0lzf944HIbL1CX6QuJpGFPqOzhBT0qVWLNR1BoEjk,3784
162
- helm/benchmark/metrics/efficiency_metrics.py,sha256=SJqpA1d_GfBPl9H6moai8ra1GVe7tlaCfg3PeiWT54c,11845
163
- helm/benchmark/metrics/ehr_sql_metrics.py,sha256=YRjvPIty7zlyoyGD6wo3HYOz7y_PThySOZzVRJ38iww,4797
160
+ helm/benchmark/metrics/dry_run_metrics.py,sha256=ouS6_8lESuCGSQgegN4xKKyoGr7Rb1K-dufHPT1fDwc,4886
161
+ helm/benchmark/metrics/efficiency_metrics.py,sha256=VnM5PgxxK6UKk9MzPprnN_7d-t6xVlIgFMQYrFh8dwY,15262
162
+ helm/benchmark/metrics/ehr_sql_metrics.py,sha256=yyz-2tsk4Fu6D5ELp3cbLaAWGjqtDGrUdvFvgHvxevg,7418
164
163
  helm/benchmark/metrics/evaluate_instances_metric.py,sha256=LGk1Dv_76Ak0YUlWKFTsOLEFiBSmcGVhNrbj_4zg9g4,2913
165
- helm/benchmark/metrics/evaluate_reference_metrics.py,sha256=T3pftPfYEUR88NEZEZuzYOTNoHELo7nSbz4qmxN8oQc,19628
164
+ helm/benchmark/metrics/evaluate_reference_metrics.py,sha256=T1AUnN1wYFrTBMLyys3AbvlArIenZwCPwHa_F7J9ODg,31476
166
165
  helm/benchmark/metrics/fin_qa_metrics.py,sha256=MtXxGMGYiCiwCD1CclBXPopzly-Tz3zJTrXJaHYTXn4,2470
167
166
  helm/benchmark/metrics/fin_qa_metrics_helper.py,sha256=sH5FIpsxxGUkXO21YGS2EtVsev1EdQ44lYoqFZPSSGo,11884
168
- helm/benchmark/metrics/gpqa_chain_of_thought_metric.py,sha256=HRRKkcTbCu5ScOVwmjzYaA7UAEGE_AJUZVOCDRuv4Po,4321
167
+ helm/benchmark/metrics/gpqa_chain_of_thought_metric.py,sha256=Lkil9DRtO3NS3zr5Ef_qqGxZBL-ObCNpbKoJvMhCrb8,4762
169
168
  helm/benchmark/metrics/gpt4_audio_critique_metrics.py,sha256=L9tGFwvl1-Ew3MdInQ7KPa8OlI5YexIB2KuCYVYsuPY,7023
170
169
  helm/benchmark/metrics/gpt4_audio_refusal_metrics.py,sha256=vYPRJq-4uNhUWUWMrDkpHmfIBkhEyAgaMNEI6RKPP80,5896
171
170
  helm/benchmark/metrics/gpt4v_originality_critique_metrics.py,sha256=1m7IWy9vu66svnmdBRjZQI-2YsGYzH2vXZMptlRGM0Y,5654
172
- helm/benchmark/metrics/helpdesk_call_summarization_metrics.py,sha256=9-kB3NeBacI6nxs2oQ7Km_1SHyiz98UVZuR8PAlvCHM,1442
173
- helm/benchmark/metrics/ifeval_metrics.py,sha256=4_Vp9bNnrctKtv6xZ1RpvBstPAZPwv1xiohH-ogs99U,2565
174
- helm/benchmark/metrics/instruction_following_critique_metrics.py,sha256=RR9cMIG113oXUnBjU_denn7DaCGB11k1oGtQ5dQON3M,9874
175
- helm/benchmark/metrics/kpi_edgar_metrics.py,sha256=1GsW-nBz8TgP4wFIVEGA4_BhI17kihmk96zuLpD4NZc,4636
176
- helm/benchmark/metrics/language_modeling_metrics.py,sha256=yS7k8iFjxfkckSBA0RVA7VdOivSEBtNzCjczK6We7y0,4598
177
- helm/benchmark/metrics/live_qa_metrics.py,sha256=f2XFmQaohjQNqYqNg8NcDVavCzyP4cd8Cl8rLArn9EM,816
178
- helm/benchmark/metrics/llm_jury_metrics.py,sha256=yzAsdacyX0MFJy2qKIjhI0y7JvtflELpCh6R14wuCgk,1704
171
+ helm/benchmark/metrics/helpdesk_call_summarization_metrics.py,sha256=5Z43F9ZI9OHBxeZENBGSE4fB1YTo1NKOquPt_Sw-F5s,1835
172
+ helm/benchmark/metrics/ifeval_metrics.py,sha256=33IqTVdYlX9ZI6sR-FfFAKbVJ9tAGDNqZpLHS5yInio,3036
173
+ helm/benchmark/metrics/instruction_following_critique_metrics.py,sha256=AK_ZpayimVZ9MxX8CJG-K1uPKo2j1dNJ_H9uSz1CWiY,11612
174
+ helm/benchmark/metrics/kpi_edgar_metrics.py,sha256=rnvVlvFgWwaavaIu9n8iVlODhkk2g3liOiK7kwfGbN8,5474
175
+ helm/benchmark/metrics/language_modeling_metrics.py,sha256=NK8vYLFyFAidDG8UXVkP242zbg_6W6EZ4xZPNbokGlw,5001
176
+ helm/benchmark/metrics/live_qa_metrics.py,sha256=YGodrQ-b9ucQTK3ICKXRla5r26RR0wxC4iPOTcYrV1k,1195
177
+ helm/benchmark/metrics/llm_jury_metrics.py,sha256=-5w8tFG4JE0cMcH3KS7xQ1z6mbdtDf7reCMz6u5vtag,2158
179
178
  helm/benchmark/metrics/lmkt_metric_specs.py,sha256=0Fa0xLjQDXwsRCE5VqGzEfb5ZdzKsDoSCwR_zHogFcc,376
180
179
  helm/benchmark/metrics/lmkt_metrics.py,sha256=GaZTfl-NQXa1YSzcJUGlZ5wZURH1CnJxGkPFBj8ydTQ,1856
181
180
  helm/benchmark/metrics/machine_translation_metrics.py,sha256=22vaGBCSw12uM1wmtDG-MBBZW8OiTZwNPaerjckdtDE,3860
182
- helm/benchmark/metrics/med_dialog_metrics.py,sha256=kzmrkQcmJ15zuOF9_Onk9N0oeNeyl9Rri1JEb1AqRT4,447
183
- helm/benchmark/metrics/medalign_metrics.py,sha256=q6l8p5Pie-H9pxhaA-lQkSOnliJWXr6zUeN8syEQ91Q,439
184
- helm/benchmark/metrics/medcalc_bench_metrics.py,sha256=9wZgg20-9QBNk0_XhuwR3LT940fqDPkCM4Kl0dPkbAs,5353
185
- helm/benchmark/metrics/medec_metrics.py,sha256=hNBOGX52G_QOmgTCp9LnIMrmGSRxbb5vgjxKU069TMQ,4152
186
- helm/benchmark/metrics/medi_qa_metrics.py,sha256=JWAEMuT0UXDZrb7qHn13W6W79ilbprk492V_9vWrB4s,432
187
- helm/benchmark/metrics/medication_qa_metrics.py,sha256=wit3nKNWpGFfgauu6Xye2IDTePAS0VHAQI_7OO9HR6M,462
181
+ helm/benchmark/metrics/medcalc_bench_metrics.py,sha256=2viECYEj8y65_w5MPH295Z1OgLTNrgP_iMzzYSgc2hQ,5895
182
+ helm/benchmark/metrics/medec_metrics.py,sha256=5z3HKZCEuQsOix-22PPzTHhWlYmjyHOAVFV-bgGUVJE,5137
188
183
  helm/benchmark/metrics/melt_bias_metric.py,sha256=mHDCkRGLD-0pyJA_depi_KX3sn7g7Bgd3_m0XdLQahY,11520
189
184
  helm/benchmark/metrics/melt_bias_word_lists.py,sha256=xA0araUdszAIOqfxiTi6MIJhKYwr_Gwsc1L9qinZx9U,27891
190
185
  helm/benchmark/metrics/melt_metric_specs.py,sha256=zaeV57LQEl8qK7be36NaojiUJlzmkoKY8JyOkOVuPqs,1619
191
186
  helm/benchmark/metrics/melt_toxicity_metric.py,sha256=ni6bb_QC51NM5jQpbFYLWtsQy3tNOLwQ_5b3PDV5vVk,4193
192
- helm/benchmark/metrics/mental_health_metrics.py,sha256=4HXCXl2GxFPn6wDzHptHeBTuP4BJVLUzEUKffpd5R_k,462
193
- helm/benchmark/metrics/metric.py,sha256=jqQyiKDq_pQv-ulGqfZI56ydRDQs3N3XhfHIPysUhrk,14311
187
+ helm/benchmark/metrics/metric.py,sha256=gF7KlWPoPIGUvbvqDeXagBNBZnl8rclh8JfgCPvuXvs,15065
194
188
  helm/benchmark/metrics/metric_name.py,sha256=POhgmUqqIWh_LjCbYpiKkzGqqChBLeW3FADy9u_FcWw,1354
195
189
  helm/benchmark/metrics/metric_service.py,sha256=bJaM7GisEgSWR3vPTcg7b67XF9X2K5viODacIgbGb24,1692
196
- helm/benchmark/metrics/mimic_bhc_metrics.py,sha256=da1YYrE8fL3YHeIJ9hf4WCKZtuj_8cksm3rJ24rcy70,442
197
- helm/benchmark/metrics/mimic_rrs_metrics.py,sha256=x3vSj1VG1UkNF3gbgJYDeA4z-crxfGIkK7iZo0xjq8c,442
198
- helm/benchmark/metrics/mimiciv_billing_code_metrics.py,sha256=Pu9efXoBrhsvxSeGHqwbUA5k365-pJTeXpMNhmcg0L0,3927
199
- helm/benchmark/metrics/mtsamples_procedures_metrics.py,sha256=XrddVk-gnc8jF8amCI1RBa_XTS9yEXD2Y9Ld9W7Q-m8,497
200
- helm/benchmark/metrics/mtsamples_replicate_metrics.py,sha256=rmH34aTX_wZWxLi4jrxf3sR1RIqNRF0QDANLRQUGhqM,492
190
+ helm/benchmark/metrics/mimiciv_billing_code_metrics.py,sha256=3kypTnrkbdG-Dpdbg_A_WQYVx35ylvZFjh2-R5wvhSE,5347
201
191
  helm/benchmark/metrics/nltk_helper.py,sha256=QMEps-lqJZ_pCgvjlMf4BvC0pzDu3ez5jit5F4p8dAk,1313
202
- helm/benchmark/metrics/omni_math_metrics.py,sha256=Gqih87UrE93-a0hbRhTBkjmfGLNTkuKQGaG-sTQeuG8,1287
192
+ helm/benchmark/metrics/omni_math_metrics.py,sha256=WF0cWpmJwduTdZw7c_O5QsXDNwet5GgHYV0Ww9PfKc8,1709
203
193
  helm/benchmark/metrics/openai_mrcr_metrics.py,sha256=TAop7G50FKaR-Jyo2EGLqmMOfJRmS2vNRDFiifa6mhg,2313
204
194
  helm/benchmark/metrics/output_processing_metric.py,sha256=ey9UBi2f3780OwFlp82ymzfjLR3MA2fpA9vW5R4W5TA,2581
205
195
  helm/benchmark/metrics/output_processors.py,sha256=ULZlDBOf6NupAXzDKBKyTDdgPZ5PSxOAlOYTbrQEek8,472
@@ -210,23 +200,22 @@ helm/benchmark/metrics/reference_metric.py,sha256=hseI7A16SOC8ymYZYFCL6nxnyxn0q9
210
200
  helm/benchmark/metrics/reka_vibe_critique_metrics.py,sha256=CwzzQ13bBT0r_o75TqFj2Zr0ST9vzQi74K_ezWTnLCU,6568
211
201
  helm/benchmark/metrics/ruler_qa_metrics.py,sha256=OuiA0ksByl0Tw1Oal7zbedhKjTrhJgQJDLXAgoTLXuc,1473
212
202
  helm/benchmark/metrics/safety_metrics.py,sha256=oARko_EwVnykBKYxi-w3ytKme4qcb1waz_0N2GKbSlg,3348
213
- helm/benchmark/metrics/seahelm_metrics.py,sha256=egRkeXnnb8Nqi9qJJMDXJRSl4NK6WvdUxAc_LffBips,6964
203
+ helm/benchmark/metrics/seahelm_metrics.py,sha256=GlNoK1O7kcuiuEOJEgTsnrfK9TcGwH7-tPj6Qe6JV90,7493
214
204
  helm/benchmark/metrics/seahelm_metrics_specs.py,sha256=cx8p4kwTuEOWxZioK9CVoeTNJT0fZjxRy_6_EM9F394,452
215
205
  helm/benchmark/metrics/spider_metrics.py,sha256=RSrFJoA5SNcNxfmgVqCQixcSLrfJBYuVQw5jsfrc9Xg,189
216
- helm/benchmark/metrics/starr_patient_instructions_metrics.py,sha256=YHdTeIFdZxRbvqBnlWpAyIsWzZyWAjjDFuKOXhHYiSM,525
217
206
  helm/benchmark/metrics/statistic.py,sha256=ATuOm0jU3L-0ELiZaF2GVMNF22W66-rMvzxRtlfqcII,3446
218
207
  helm/benchmark/metrics/summarization_critique_metrics.py,sha256=-mki8-zvZx54dQg8X0BG2Y6wmfypQhkIuD_9ZjNBl78,4782
219
- helm/benchmark/metrics/summarization_metrics.py,sha256=FJCdGRmlCJX5A-AmbtpGGlGRfNgg5Z8Bo0d9yFiE33E,16876
208
+ helm/benchmark/metrics/summarization_metrics.py,sha256=S99uhtvBtH0UQS-gDEuQLLTPYNG-dNUV1n3OnaOP7p8,22647
220
209
  helm/benchmark/metrics/test_bias_metrics.py,sha256=qEZsCULvwjVdIyfNgJSc2L7Xp9suKKW7L5OuQmGrwZ8,6393
221
210
  helm/benchmark/metrics/test_classification_metrics.py,sha256=CRDMGmVmzEUnNaM0C02qUTOU2AS11Mt2-GdEl89y7lw,9541
222
211
  helm/benchmark/metrics/test_disinformation_metrics.py,sha256=U3ZmS9s33oimTQbKO-7pgWeX_WiDB9chlOCtf_vslXw,2249
223
212
  helm/benchmark/metrics/test_evaluate_reference_metrics.py,sha256=B7xtDDWPAxF7d-vcUx_R51hFMae-DD52nUwbu_eWt6Y,1601
224
213
  helm/benchmark/metrics/test_metric.py,sha256=0sGlXE3_Al_VyKpOPBhQR_xT-XrcVgGepLpwut37DmA,771
225
214
  helm/benchmark/metrics/test_statistic.py,sha256=yK6m2BZ5UXWmb2D1cQzDH_2ELvrNDaR_lyzX4WoHw9Q,1273
226
- helm/benchmark/metrics/toxicity_metrics.py,sha256=ZLOzxDlMgbljl-9y6vT2ZgwdhsBZ4MfV-T66VpKk00U,4114
215
+ helm/benchmark/metrics/toxicity_metrics.py,sha256=s5Ypodu4cBmIc_fCbbQ9kCqcvVJf-OQ6zAvb85r8Cv8,5509
227
216
  helm/benchmark/metrics/toxicity_utils.py,sha256=-bfittLtMkHyV5wu-hj6KVtaiNGgVIO5duUmThBlX8w,988
228
217
  helm/benchmark/metrics/unitxt_metrics.py,sha256=8fawxnrg0xsAe0xO2wbL7S_yisj8RzJnrn6xtk8C6q8,4852
229
- helm/benchmark/metrics/wildbench_metrics.py,sha256=sY7MNTzRlJJK3yph3rCijgbMaajtLyCCquThlsoE5wU,1380
218
+ helm/benchmark/metrics/wildbench_metrics.py,sha256=THOguxE6GUun0zTr-pITXfQGEd664sScrfIzFGdNPXk,2163
230
219
  helm/benchmark/metrics/ifeval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
231
220
  helm/benchmark/metrics/ifeval/instructions.py,sha256=qNoa1vMPDNz6ORWfyMv_efwKZ4U5zkI-cf4aApyfSqU,53247
232
221
  helm/benchmark/metrics/ifeval/instructions_registry.py,sha256=NprvkRQz0QWaIpJsFp95CQCWsnuY_57ZSqFn2IISDP8,7555
@@ -267,7 +256,7 @@ helm/benchmark/metrics/image_generation/watermark/__init__.py,sha256=47DEQpj8HBS
267
256
  helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py,sha256=Ir4u8blJWTRtEBogb6u22qCy3JXAIzvx-Th6dSBLfdw,698
268
257
  helm/benchmark/metrics/image_generation/watermark/watermark_detector.py,sha256=w6WnTc6t6zx0W0gTjgedXC9OO5dq5iWpx9UcnioKml4,3641
269
258
  helm/benchmark/metrics/summac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
270
- helm/benchmark/metrics/summac/model_summac.py,sha256=ccOP0z4WEpR26iAzzTWviFfX33Cg9MdpZgKgSRQc9D8,17445
259
+ helm/benchmark/metrics/summac/model_summac.py,sha256=zheAPIJAz5MH6GU1gXpWSc9Q9gouhNzYx92PDd5PUXU,17447
271
260
  helm/benchmark/metrics/summac/utils_misc.py,sha256=7_Q1c72cKt8PWtxn8u4R8nB53HK6_JF2nP8bBXYNk-A,1485
272
261
  helm/benchmark/metrics/tokens/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
273
262
  helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py,sha256=XDZGK8h84F2w_pK8Zjko8ssKZmVxKFqTOuHL0mLBzMY,694
@@ -288,9 +277,10 @@ helm/benchmark/presentation/contamination.py,sha256=07IuIP92vfuI0GwfeNC-i_NZUlF8
288
277
  helm/benchmark/presentation/create_plots.py,sha256=bM6UNzH0Bx8Bv2iKcyMoYp7IwfCZSQob-w_XOOI6r1M,29090
289
278
  helm/benchmark/presentation/run_display.py,sha256=LmY2HES4dU94kRYuUxt-c9LTMDN6MU5CspWTF6rZwDo,12419
290
279
  helm/benchmark/presentation/run_entry.py,sha256=J1QgLOP99N7N4bs7nzXWxyU3pOd-a1j8xwL9ag1nP_Y,1158
291
- helm/benchmark/presentation/schema.py,sha256=j3gOhj-okQ4qzYoMh5N3ltsL0OXiOGuB7ydF-SI-Ug4,11229
292
- helm/benchmark/presentation/summarize.py,sha256=_d3gd45eBpx8yMnVq1XgF9D-pPMcpbuwseSZz4giybo,60092
280
+ helm/benchmark/presentation/schema.py,sha256=AMGmEwqxkHoZFkOKD-UVZ8aXwgbafG6KYASsWo6YEw8,11005
281
+ helm/benchmark/presentation/summarize.py,sha256=m3RSw6ogUFasdeZ8xSUh4wKV-nYzVi3iQv-KrrwtDFM,67828
293
282
  helm/benchmark/presentation/table.py,sha256=-foH1BIfMiD6YvpwoGJ910CH7Hib-_pYtHH1hE8zwNc,2904
283
+ helm/benchmark/presentation/taxonomy_info.py,sha256=pPIFOicis9H1sWeXApfsHHcqZpus1ezukxLQO7Lj2Vg,473
294
284
  helm/benchmark/presentation/test_contamination.py,sha256=RlihBOF6vx2tKEj6_EMnJojTYoStx0FUeJSLT1bdf8w,509
295
285
  helm/benchmark/presentation/test_create_plots.py,sha256=1FrJZnPW-5QUQKt_pf4y47uDha4B8wHyY1o5hqhKWhc,1293
296
286
  helm/benchmark/presentation/test_run_entry.py,sha256=4n484sSYT0gQ4WVt67Fs3ctKa4vi97hI32O5XXxGY1o,794
@@ -299,12 +289,12 @@ helm/benchmark/presentation/test_summarize.py,sha256=GzZNwBDybpstzl6wT0Rgqn75N9i
299
289
  helm/benchmark/presentation/torr_robustness_summarizer.py,sha256=SmMOZWCQ-KaJBp78otwvAeE1btWignyWalaQ8QG87r4,8242
300
290
  helm/benchmark/run_specs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
301
291
  helm/benchmark/run_specs/air_bench_run_specs.py,sha256=K86SqpINMBOiLIpuHz-jwlQL3SrH6n6WbqjD90i4LQQ,2231
302
- helm/benchmark/run_specs/arabic_run_specs.py,sha256=p5KPvcugJI3ERYhO7Le_aiKOZ4IM2EOvsXEmZE8R4Wc,3014
292
+ helm/benchmark/run_specs/arabic_run_specs.py,sha256=fPAI9GCV_D0BHPcLGSNZN45sAO2d449Gb54iHW1nocc,7399
303
293
  helm/benchmark/run_specs/audio_run_specs.py,sha256=baJz5LZiwWZP3KD0hluKgpidtswzdorQnshX0CoqKAc,23383
304
- helm/benchmark/run_specs/bluex_run_specs.py,sha256=OHweBHS8JC-k9_e5Zq1LUU2FZhJ2P7SDshatX-N15Ls,1798
294
+ helm/benchmark/run_specs/bluex_run_specs.py,sha256=jwrH33YeXqoAex11071XMUwTCKNkoJTQQS7iNoJDLmg,1797
305
295
  helm/benchmark/run_specs/call_center_run_specs.py,sha256=QhRQw91WblB9UaB319XNCO5K8PX8Riiza41Ym-1CcRU,7044
306
296
  helm/benchmark/run_specs/capabilities_run_specs.py,sha256=sbqhIj4AoujV45erwoVK61lWdlkjg4qssmGlu0eSr1U,12067
307
- helm/benchmark/run_specs/classic_run_specs.py,sha256=4DA-21Tiz87dQ_iklyrKpfsyTw2f51tbwtRvv3Zs57s,53727
297
+ helm/benchmark/run_specs/classic_run_specs.py,sha256=fe98HhzMkfloKpOZbi_mIMp1Hi-clv22rgWT-EdS0e4,53743
308
298
  helm/benchmark/run_specs/cleva_run_specs.py,sha256=lEIHEqQY3Efx-sl2Z6Rq9Qq_1HEWHqFYuUkZbGvq66s,13387
309
299
  helm/benchmark/run_specs/codeinsights_run_specs.py,sha256=lz3yysrPjCIiObzrIkRjJsWzkABh9qIXn-o7FSqZPl0,9207
310
300
  helm/benchmark/run_specs/decodingtrust_run_specs.py,sha256=7slILDS9f0_Z0y-Pz5xEspoGQUmOCOI2K2r4XWUVsm8,14428
@@ -318,8 +308,8 @@ helm/benchmark/run_specs/imdb_ptbr_run_specs.py,sha256=nkW5A_xeD5kCKeJVxsL8RFS8r
318
308
  helm/benchmark/run_specs/instruction_following_run_specs.py,sha256=GElJhgbQhlZMYSAM4YyGcYq0pqycR32kBCoHqG6m-ZY,4177
319
309
  helm/benchmark/run_specs/lite_run_specs.py,sha256=8OkL9g3wQBG96g0ijGZ9L1Trb59b7VPDyYMqvA3hXfE,11129
320
310
  helm/benchmark/run_specs/lmkt_run_specs.py,sha256=tNZvlA4mXUX-NBC9enRR90qFLeh8SNGFq701rXmXc18,5376
321
- helm/benchmark/run_specs/long_context_run_specs.py,sha256=A3yhg1IEds7kQWxkRYH7WVkMPouA1xDz28uxpHgwJvE,6229
322
- helm/benchmark/run_specs/medhelm_run_specs.py,sha256=--KgkjVwKt4uyiTebalrbeGV4FB-jGqPciYjFZED7zA,43407
311
+ helm/benchmark/run_specs/long_context_run_specs.py,sha256=wn7yY9rMIBJY30SN-275qg9U49aGPUl4hVZphKYFkBI,6442
312
+ helm/benchmark/run_specs/medhelm_run_specs.py,sha256=bi7sGIx5I7KQXAF_Uj6n_O_DFNgtc496unrVh7UuLcQ,53256
323
313
  helm/benchmark/run_specs/melt_run_specs.py,sha256=729MkALud2wG07yulx9zqAzejdXW_eVGkfF5cQWeGGY,32031
324
314
  helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py,sha256=kenpGGMK1XXaNtvNXsshPvdvN9ubv1sOfaPdjFM4obA,2034
325
315
  helm/benchmark/run_specs/multilingual_run_specs.py,sha256=umf8e6ZDgRXiU0G_BPoovj1UZ_dxyrXtIQ7i9WC6USg,2296
@@ -333,35 +323,40 @@ helm/benchmark/run_specs/tweetsentbr_run_specs.py,sha256=qogc-fb83Rh1DooKKaskhak
333
323
  helm/benchmark/run_specs/unitxt_run_specs.py,sha256=4Vbsq0MPpSe4cIJOXzeVpMm60N9Qafa2R85X5BeFQew,1873
334
324
  helm/benchmark/run_specs/vlm_run_specs.py,sha256=v-eWuDYc8u5HO46isLONPfAWv5zdA1ZOQrdyOvX3vlU,37512
335
325
  helm/benchmark/run_specs/winogrande_afr_run_specs.py,sha256=dhOm8z6Q_ZpnzYKrsS0nEbRQPWs_phkXxmL5pxCJzQA,1853
326
+ helm/benchmark/run_specs/medhelm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
327
+ helm/benchmark/run_specs/medhelm/benchmark_config.py,sha256=O1D5N4q1QwzrI1ioAQK815cch6hNoJoaIzzAlJo6GXk,7860
336
328
  helm/benchmark/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
337
- helm/benchmark/scenarios/aci_bench_scenario.py,sha256=W8h7eWz9mjR0kRAffKWSnA1Fs8t2l83sPyW8fjPOxWQ,5670
338
- helm/benchmark/scenarios/air_bench_scenario.py,sha256=B6_WMowLFe4gWfnoFA_yrHe0kagbIkZabEnK4kGGqSU,1884
329
+ helm/benchmark/scenarios/aci_bench_scenario.py,sha256=ry22AJdd3lvQuEFdzNf6wXzMyPFn46b0kScrYdpj-nA,6783
330
+ helm/benchmark/scenarios/air_bench_scenario.py,sha256=Ufcpxm5KaXHI2FfK4tdQsURaCSdcWNcXVaNmYkE4bo4,2820
339
331
  helm/benchmark/scenarios/alghafa_scenario.py,sha256=FJXO3W6qYzCgLJMSiJEhpddNcFyR3N5Brh8pATW_9GM,5217
340
- helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py,sha256=jnUGbppDGEsbe5xoJjmv7nW_RvwPIYm6cwSULeqk2Fk,5133
332
+ helm/benchmark/scenarios/alrage_scenario.py,sha256=MN-gMQboAaJCasYNg_rLJVgcrk5KZ1WCBN9R_lyRrhE,1499
333
+ helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py,sha256=EzS8td1lJE1yxEwFtuwTbjHtHm1hGIaur93BKAL_Hm4,6212
341
334
  helm/benchmark/scenarios/anthropic_red_team_scenario.py,sha256=_OWE33eVRaZI0gmfP7bLd572uOi_6jb39z_J6nkcvfg,3182
342
- helm/benchmark/scenarios/arabic_mmlu_scenario.py,sha256=mI6ttMFAT3sH_v87qVNxYptqDS2EMUhK0b8vpfePSdY,2807
343
- helm/benchmark/scenarios/aratrust_scenario.py,sha256=G20j6Z-C_6bUJf-bpdyUN23Hb7XK0YtieUprq_5Z5hA,2552
335
+ helm/benchmark/scenarios/arabic_exams_scenario.py,sha256=hv28A2pM66ejrO6oFOgmCx3JIP_nqwdUYvIsfGc0Kew,5359
336
+ helm/benchmark/scenarios/arabic_mmlu_scenario.py,sha256=xMRWPA16Wn8ONgAeyyHOB95X2SQca7tKUpUP8L5ZNJc,3018
337
+ helm/benchmark/scenarios/aratrust_scenario.py,sha256=ismiWLm1M6JmBgVZ0SoVglaOyFbAlyOHsSsiAv8Np8Y,3125
344
338
  helm/benchmark/scenarios/autobencher_capabilities_scenario.py,sha256=fOCHumFWZa4OJZcTZefJiJbdWsb3zjQnWLJYd10Cctw,2496
345
339
  helm/benchmark/scenarios/autobencher_safety_scenario.py,sha256=MFt3f5baN5r-FmzWZfUChGR1mX_PUB_5hxoINac_Whs,1854
346
- helm/benchmark/scenarios/babi_qa_scenario.py,sha256=SyM6RP4v08B1PjumkdQnuKrM9L8SyK0bXbx-LgmyTPo,5067
340
+ helm/benchmark/scenarios/babi_qa_scenario.py,sha256=CAmh3GfFjB9Xsuh9K-PUu-2xIFTV0v0YNgWbSuv09Y0,5711
347
341
  helm/benchmark/scenarios/banking77_scenario.py,sha256=dtiM-Q_pMDWqkLi-hgl0tH-aGuDdgHkXgweE1JqrPYs,1883
348
- helm/benchmark/scenarios/bbq_scenario.py,sha256=2A7MX6iMAZHuPpH9RePi9rVBeFRmGdiE6GlqZ5uNdAM,9603
342
+ helm/benchmark/scenarios/bbq_scenario.py,sha256=mVfxztgLI9sFwOYntx0dxElm8RmOb7XQYS9DOfgYjkI,10360
343
+ helm/benchmark/scenarios/best_chatgpt_prompts.yaml,sha256=KZdXj4KUbkwFzgIEXVakMpZLTqJ7rldxNuXVDIdlk-A,31304
349
344
  helm/benchmark/scenarios/big_bench_scenario.py,sha256=g1TLoDTYQAe-efzQnV9J5UBCaUfN1jWTTjTd-ZJQmVQ,8146
350
345
  helm/benchmark/scenarios/bigcodebench_scenario.py,sha256=zQLv91uwfGAR9N4jm_iBUmYOVFj9cL14Nj8aqoCqUM0,2004
351
346
  helm/benchmark/scenarios/bird_sql_scenario.py,sha256=n5elzanKEX9YclAl2l1y33aCjihTmaw1VF_ZsAU5IaM,3613
352
347
  helm/benchmark/scenarios/bird_sql_scenario_helper.py,sha256=FIwPk-dwfTY-8gDXeAiTZbfbS0Oe1OuWRlYiJOhZwk4,4664
353
348
  helm/benchmark/scenarios/blimp_scenario.py,sha256=9Ge3QKRgtVHpWy7aehZVKiO6JrsxK7zrEdtqAb4zxtQ,6284
354
- helm/benchmark/scenarios/bluex_scenario.py,sha256=eHAltiFqZ_bS0AVi0kbskTlxJbQXy7Sqj6E9nZPWqCc,2500
355
- helm/benchmark/scenarios/bold_scenario.py,sha256=iE9drB9IeXfRn3xvLnaQi3-nJAp-bV1RE0GJGnp9dJc,4130
356
- helm/benchmark/scenarios/boolq_scenario.py,sha256=wPETIu5jcI4jgP5GoFa_xi4SsvHtS9gxQ5TD8neHmdk,8037
349
+ helm/benchmark/scenarios/bluex_scenario.py,sha256=K4ob5_rd1hTOzlPJjuEvujcOdt_Ybgxj3jqj_BYjA9o,2599
350
+ helm/benchmark/scenarios/bold_scenario.py,sha256=MsXwUiJgZgFyVxh-E5gAagi4aPGicDe2C0xct5lQYwA,4882
351
+ helm/benchmark/scenarios/boolq_scenario.py,sha256=qQyJ0BdljChX9U_eEETdFyWLCSQvI0D4NrY6zOCXPh8,8824
357
352
  helm/benchmark/scenarios/call_center_scenario.py,sha256=19J2N57WnUkPMGRRbJyZak8YCeMTRwD3BRK1SArQlL0,3037
358
353
  helm/benchmark/scenarios/casehold_scenario.py,sha256=QSe0D3KQJhlTOo6kM9OHwdKy6NlclsFGRVCAB3mTG7s,3174
359
- helm/benchmark/scenarios/chw_care_plan_scenario.py,sha256=BbEjDqa4C5wpdil5jIb1nzj16CCZ29hKoZVsfapSfho,4005
354
+ helm/benchmark/scenarios/chw_care_plan_scenario.py,sha256=PE4vbj0y39674UIIdH6mgUwSKe4wW_XqRrNsksrwQRs,5104
360
355
  helm/benchmark/scenarios/ci_mcqa_scenario.py,sha256=slZZT74QI3OMQAgT-ybcR_xVcRDoopXw6mMu4iy3XCY,3074
361
- helm/benchmark/scenarios/civil_comments_scenario.py,sha256=pnZU2U_cYFYOJmlmwTehHU5oLIPx_Yg8Ayxinroh4IQ,4875
362
- helm/benchmark/scenarios/clear_scenario.py,sha256=yGdPxWO6vY4JHNa4xywtvD-9lOn6s5cr3njpZyFA0D0,6183
363
- helm/benchmark/scenarios/cleva_scenario.py,sha256=n-h2urZ06GUOuAC_60HMwspTTpBFid72Fx8eZGQppdA,57988
364
- helm/benchmark/scenarios/code_scenario.py,sha256=lSbZWw67ie9osOjXDZukj3EEZGa3L6TrMvTg--IbuxE,12520
356
+ helm/benchmark/scenarios/civil_comments_scenario.py,sha256=N1ZmQyKXkRjRXKPTyEHOpbDhBkjcY8WyHPKMWaBl2qo,5481
357
+ helm/benchmark/scenarios/clear_scenario.py,sha256=cLFlcWKUT1Uy6bYDnAjf1ySR06mK16NhN1AtsaEBZs0,7226
358
+ helm/benchmark/scenarios/cleva_scenario.py,sha256=WQDiDCVo6bhtI926_p3uvr1WhIAkBU1gLNLA5viEwMw,78127
359
+ helm/benchmark/scenarios/code_scenario.py,sha256=tdki0m59NzN4YOm1pMfaSkUP5uUDeTNMqUAB84p5QGI,13953
365
360
  helm/benchmark/scenarios/code_scenario_apps_pinned_file_order.py,sha256=KC-5MQ-d8Nn46aDN4FaPxmd6yk1DtVUmVR-CIZsNCp4,1738
366
361
  helm/benchmark/scenarios/code_scenario_helper.py,sha256=TnXAlY-wdAFwIDylFItf0z7HOu93WD6dNThwzZYe330,5904
367
362
  helm/benchmark/scenarios/codeinsights_code_efficiency_scenario.py,sha256=PK4wtuBXs4cPPwOoGfhBA4J4cGLQYC_MvRWuvWrkrv8,9068
@@ -369,84 +364,88 @@ helm/benchmark/scenarios/codeinsights_correct_code_scenario.py,sha256=7BpcezugYH
369
364
  helm/benchmark/scenarios/codeinsights_edge_case_scenario.py,sha256=csTwe-mv1f6Tyvnj9uZ0SYuj1GRVvgjzukV28gIhNpk,8703
370
365
  helm/benchmark/scenarios/codeinsights_student_coding_scenario.py,sha256=wc5Fefn4jpCw03dQ6WswCztJ8AO5j0Vrn6omcOVUq2k,7409
371
366
  helm/benchmark/scenarios/codeinsights_student_mistake_scenario.py,sha256=qX3yckZdMojYhiwvokvEuQpRXOzmN2zmzKjQb96Ljg8,9651
372
- helm/benchmark/scenarios/commonsense_scenario.py,sha256=yZ6n9aqOi7UWY3q4uTDNc2JRNZxaBZPIp7n_Snt_8g8,9511
373
- helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py,sha256=gKEwqHDD8KlKmW8z3xAxSIGmALTXrRRPcoDUzbv_IXg,3854
374
- helm/benchmark/scenarios/copyright_scenario.py,sha256=FHzUYEabj-BTKl90fgq7jSCq5_Yf9cO9MA9djn50B1Q,3697
367
+ helm/benchmark/scenarios/commonsense_scenario.py,sha256=VN6nNZZpz9a1IC-tW5MvqztaW71f2zsV8lq-A34p3iE,10696
368
+ helm/benchmark/scenarios/compositional_instructions.yaml,sha256=mPsFzPU6uaAD0xghzv-QD5Wk4uhoLY2sF3Fw_lunAsI,1822
369
+ helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py,sha256=sR3UzObloLUzgjNwTbSHLGGkeA0g9-Aq_utpBPT2u_4,4757
370
+ helm/benchmark/scenarios/copyright_scenario.py,sha256=GWRCJdLlnWZcz6ztB5XIASGMPNd2o8EZNR2GueP8xuc,5035
375
371
  helm/benchmark/scenarios/covid_dialog_scenario.py,sha256=Vnxfn6EKwN-KR1vH-x46YHUC5jf7UAOv7zsnXVHYmZY,4032
376
- helm/benchmark/scenarios/cti_to_mitre_scenario.py,sha256=pfHAteKXLNUrhKyAYk6m9j-d7iuEgz58o15xukp_GFQ,10260
372
+ helm/benchmark/scenarios/cti_to_mitre_scenario.py,sha256=FM6ty-JSFTDqdKLzfwgfhl3zV2oh_DWjRw4qI4-IrI0,11169
377
373
  helm/benchmark/scenarios/custom_mcqa_scenario.py,sha256=rgdHsSh8QknlcdGfZQ4VvqBUMLfTTHaNolCv4QgWHzE,1939
378
374
  helm/benchmark/scenarios/czech_bank_qa_scenario.py,sha256=ZBfkUYlIa-BagRVBf97RoyLfEloAjnM0RPv5wmEWueQ,4406
379
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py,sha256=vOUE5-rj_Wr6m7n76knte-kCMsphb-SSq9LraYf-Dh8,7933
380
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py,sha256=9qo3l44aby1EfQqohh1M2DVtHXqY1fuvj1XT3_n4hBk,5588
381
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py,sha256=rAOZnFSxO3ENOvcNz688P_f3Y7NzdwiWgoYTNvAaw3A,2866
382
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py,sha256=qhzqW614WnsiyN7TiHUdZY_NpEdW_iMO0AMrLK8DmK0,14116
383
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py,sha256=RSigvRdqjeFTwFfXNmslz8zyAGSmLf6UtBDA4NrQBCo,8304
384
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py,sha256=zaXn4sRPUEZiqPoudiDT1xHMV2DaiEXOOTz3qB5q7Go,20143
385
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=NjutVTOVVze-IJniRFecz8gqh_BUpuJG3-BUboTGKRw,2933
386
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py,sha256=EYKoXDWMesbY5dCNY-N0eYMRL0rjEfGsuS_TkeD3Suo,2952
375
+ helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py,sha256=pZK3dbKKNfNOHvNaGMkN9pjFznu4raNyLe4fWkxNHSo,8604
376
+ helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py,sha256=hBKRRYIHegOrhIo_i7-1RPtbxmuhXcg29DkUIep0x_o,6304
377
+ helm/benchmark/scenarios/decodingtrust_fairness_scenario.py,sha256=KzBz8nkrvPUTw5WmEoivtl0lLJ-mORek-IVKYmct2Pk,3460
378
+ helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py,sha256=OvJ3pfxbxtJRxeSfeK-uoYFZ4ZIDSqE7ZbqZBuO93DE,14743
379
+ helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py,sha256=zWhQWEE9Aa1O9ASLE5IAw55lzNLJ1ifGsBKZFk-jiXM,8942
380
+ helm/benchmark/scenarios/decodingtrust_privacy_scenario.py,sha256=XO--1Rxb6kyLDRUQw-GhzLG-aTagVyN7ktWriAbBTAE,20756
381
+ helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=vIkAgy4LysSSIm553bnts3CEN6NVIDKr3xeGkZ2GNyk,3520
382
+ helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py,sha256=5l0lRRNNJ8nAb1R4bMxq3lakMF-P3XFvVpnT1PrwMms,3556
387
383
  helm/benchmark/scenarios/dialogue_scenarios.py,sha256=yXCMZegzlgL0CXTY1W5lXdkFFHicUvq4z7_284MfRpw,5778
388
- helm/benchmark/scenarios/dischargeme_scenario.py,sha256=rBzagg0JVVN3o0VUfmHy2cN7gutV_RAJAo5Fa_El0GY,7842
389
- helm/benchmark/scenarios/disinformation_scenario.py,sha256=0T7LhXguzBP645Fruc2udfTaMuy7XGtOEMJKpFMIFRk,8565
390
- helm/benchmark/scenarios/dyck_language_scenario.py,sha256=hygFPTcICGUEPwjtxULLKBSbuBOXLYpozIgiGcT__W0,9379
384
+ helm/benchmark/scenarios/dischargeme_scenario.py,sha256=WTlqFnM76DFVGOUSLWv-g--vHWR71UWZ9VFXoEec3fo,9026
385
+ helm/benchmark/scenarios/disinformation_scenario.py,sha256=lq9Aj-DDpPJeFVk99wXEd2Qv3kahiBe9c8-RoBieCDM,9581
386
+ helm/benchmark/scenarios/dyck_language_scenario.py,sha256=HZEXetj5BkXrNJbAvg9HidrkxDgi2UUGIAVphNiN-jg,10052
391
387
  helm/benchmark/scenarios/echr_judgment_classification_scenario.py,sha256=IqODoUY1-zJD1KW4Qkg3VwJcUeeLgGUKThr62bW-wx8,4915
392
388
  helm/benchmark/scenarios/ehr_sql_scenario.py,sha256=Gm7Kw_TSUUxHW8ns-2e4E_tTBVX7h6Ta273VOpkMCQ8,5480
393
- helm/benchmark/scenarios/ehrshot_scenario.py,sha256=MWcTejCtwohBPbZYWei_WNZ-Hdnhml7ovTVbJAgUetU,67770
394
- helm/benchmark/scenarios/enem_challenge_scenario.py,sha256=sxYXKvf-mGNqctTkemwI9rrA_Rg2xA8mz3_W3TIfzUE,2147
395
- helm/benchmark/scenarios/entity_data_imputation_scenario.py,sha256=4V426oOuexGg59q0djHCTQjQmqYgyLT191Z5fayubmU,6681
396
- helm/benchmark/scenarios/entity_matching_scenario.py,sha256=kzzDaoVikL2P7Z-17EkLIVR_W7IHcNVerUts2oXDKLA,7111
389
+ helm/benchmark/scenarios/ehrshot_scenario.py,sha256=OzZrgi-UZrMH70ZnHSeUWPCOesUue5vxPqnNOaN45dE,68830
390
+ helm/benchmark/scenarios/enem_challenge_scenario.py,sha256=gceJqjxX-RxvOqPDANEwOrbHwKxtddpMz-FcsBfby0k,2854
391
+ helm/benchmark/scenarios/entity_data_imputation_scenario.py,sha256=03Ju45Sju2r4A_Peq2EsOyg5Ik99lMUv-6X--ejB9fk,7332
392
+ helm/benchmark/scenarios/entity_matching_scenario.py,sha256=83F017FPFED_106IOawJN1jdY6IfREGJPNRvCokKGNk,7761
397
393
  helm/benchmark/scenarios/entity_matching_scenario_fixed_random_state.py,sha256=TklbX7Kx4y-estV-YHUbI5O08q2qCZRrOmX9D3gZS9c,2193
398
394
  helm/benchmark/scenarios/ewok_scenario.py,sha256=vrbJg9vakAxE6n-1jURUcwb-ihrsYoY9e32BpnEGDaQ,4684
399
395
  helm/benchmark/scenarios/exams_multilingual_scenario.py,sha256=c9zMGGL8EbCeNogTm-88g_5wWUiX1Zr7z_tsyjUq2h0,5404
400
396
  helm/benchmark/scenarios/fin_qa_scenario.py,sha256=Dm_kGOivaxiKVhcqFgN8pRPs1eqm2LdBZxWy0yFhFuE,5958
401
397
  helm/benchmark/scenarios/financebench_scenario.py,sha256=cHMljdg0_9HA3FbwcwwMt3DR9rxl0jkyFN9jNrUStSE,1956
402
- helm/benchmark/scenarios/financial_phrasebank_scenario.py,sha256=dMTfI9MRHKXnECsXOIY8xvX6w5vAPEIa6A7TYyIu2Fw,4457
403
- helm/benchmark/scenarios/gold_commodity_news_scenario.py,sha256=-O4ilLwNcycmpQG5h_5WtQP7yJEr4mjWjKBe2eNP0uY,4806
404
- helm/benchmark/scenarios/gpqa_scenario.py,sha256=369E0JvaR12EcgcEFKKRcDw1iztt4sb8ghIsk9Brzi4,2884
398
+ helm/benchmark/scenarios/financial_phrasebank_scenario.py,sha256=I7eoymZfxu4gky3YjyLnZgaFIJcMkprxQxiCLM7wJV8,5455
399
+ helm/benchmark/scenarios/gold_commodity_news_scenario.py,sha256=Qw8OJzvp12716GRW5kIxxX--f92OFRcaP0oEy-gakjM,5674
400
+ helm/benchmark/scenarios/gpqa_scenario.py,sha256=MsMsBqgxz6jKt2-ys98XAslGWkxZgzpYOws0b9e4Uj8,3520
405
401
  helm/benchmark/scenarios/grammar.py,sha256=58tQYKPj013V9jIpW7fXUqZBLuboqEi_WLlDjx74spM,5590
406
- helm/benchmark/scenarios/grammar_scenario.py,sha256=Hz59gp5ivH3tIP5UAcHZbnk8pBX6GhIABSQlG33gIRI,1502
407
- helm/benchmark/scenarios/gsm_scenario.py,sha256=QIj0QK5ncF31ES0GUlxbdBk6SIiJJnj5wzamj0do0tQ,2674
402
+ helm/benchmark/scenarios/grammar_scenario.py,sha256=c3ATPkHM0WkA9QZEf2VNfThhuEUXD448uOuW6CAeVFw,2309
403
+ helm/benchmark/scenarios/gsm_scenario.py,sha256=S_rD8uZsajgqyaJGNMpqYvshYYIW9hMV9N2udbI1Ax8,3405
408
404
  helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py,sha256=8_ShEuOoEGu7iRE2b0tgi-cfBrCPF9k1L-Pgb__n3Bg,2005
409
405
  helm/benchmark/scenarios/harm_bench_scenario.py,sha256=CBo_AfbtHTlvJdsiquP0EDTKApVmDZc7EW0VTENNAfQ,2478
410
- helm/benchmark/scenarios/headqa_scenario.py,sha256=m6Kqt16JeqA1-OLJvmBPZzhVOVt7O6rbJGAwG9C7FZs,5658
406
+ helm/benchmark/scenarios/headqa_scenario.py,sha256=0hJewHkF9IKQfW6NUJ0DPjlwQmr7N90a2eSXrBQiFNA,6635
411
407
  helm/benchmark/scenarios/healthqa_br_scenario.py,sha256=YneXTfp8V6k8rYCF3BTX6bxN2ASxdG3qrBr7uH_IFWc,3406
412
- helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py,sha256=iv1khpdiWW0Z7lshyWOhhjRfYFdAU6etN8X5EDEQCrc,1302
413
- helm/benchmark/scenarios/ice_scenario.py,sha256=NCbeqvpDFIIG7kSCrJrS-Z9S3iG2THZ7HpAqghpi_y4,16725
408
+ helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py,sha256=5R9En7lTNirZCVsMNqNB2metw0dIEPa9usoFB9W11B4,1855
409
+ helm/benchmark/scenarios/ice_scenario.py,sha256=tEkXqRtvtXaoC6JfbJOcY0E8xWyYKGMOvsSYJGjM_9Q,17674
414
410
  helm/benchmark/scenarios/ice_scenario_pinned_file_order.py,sha256=fuirubIdi-rkJMfSd7YoDdBX2q0f5K7GGTN4XVapAUY,1613
415
- helm/benchmark/scenarios/ifeval_scenario.py,sha256=SYn9itpFG0tlWSayf6v0P8bRgdtc-BmOV1dF-4TEm-0,1675
411
+ helm/benchmark/scenarios/ifeval_scenario.py,sha256=v2Q1uYCd5i1jO4_gcIlTrbZdPZ27tJrCXi9e0sqcm8s,2308
416
412
  helm/benchmark/scenarios/imdb_ptbr_scenario.py,sha256=laq9UwyvBvZZuo54rf-8SdKTLrMdDHTdGWJ4TdC8Eng,2340
417
- helm/benchmark/scenarios/imdb_scenario.py,sha256=qHXd-QIXTCBq8rWW3N5I2Rvg6Pz9v1zFhZkwc73w9io,6259
413
+ helm/benchmark/scenarios/imdb_scenario.py,sha256=H9iHmKK-q4a5edSMcS166f1fjkNbOS5BEIgR3md3k7M,6887
418
414
  helm/benchmark/scenarios/imdb_scenario_pinned_file_order.py,sha256=fjW0Gkzg2Y3IAbtYJ3KC7MueWd9U8h0tlcBCqxYmRrM,1621
419
415
  helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py,sha256=JRTLaQc3PDpYeX9ewGnBteT9jXeaGbmJ1VzYGT8TsXI,3067
420
416
  helm/benchmark/scenarios/infinite_bench_en_qa_scenario.py,sha256=5fJHFonb7Ko7exHFtoUtvHar_7PhK2HjW9uDlU8Ljj0,2872
421
417
  helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py,sha256=6z3VlcucrwK2B30artWiSpo-mOTr9tiwYV6Fu8XD0VY,2657
422
418
  helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py,sha256=F-gDO6r4GPBJTLirhF5noRaV0edvoIT7tiIDlovBFfE,2253
423
- helm/benchmark/scenarios/koala_scenario.py,sha256=A5M6SD7Jjg7r9QlbHCtMaydBe-wpOtB6oc6gFXuZ47o,1389
424
- helm/benchmark/scenarios/kpi_edgar_scenario.py,sha256=23rZM3IA-phf2VnuPY9QWd64scE6eaJks49apDUNfic,6355
425
- helm/benchmark/scenarios/legal_contract_summarization_scenario.py,sha256=xjw3iKRf8P50Wo58n7ssnFiWHR2QFehzHlZhh9P1XKs,5374
419
+ helm/benchmark/scenarios/koala_scenario.py,sha256=h-dTHQrNVoi7p7sTXZDqWcpjlznfUgxNrgr4nW8Hrk0,2212
420
+ helm/benchmark/scenarios/kpi_edgar_scenario.py,sha256=DE8efUmcPW5R62tZ46Rdsjv-EQs4lXm403O5XxM9heQ,7303
421
+ helm/benchmark/scenarios/legal_contract_summarization_scenario.py,sha256=JTm4Zkwqed7PijdeHzSbQ2l4YZY037OYF_fbnKmlpKg,6185
426
422
  helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py,sha256=q_iezJo23_HNNoIXYT4cLYCbwNzLYJx6uvxgPSE5bQA,2804
427
- helm/benchmark/scenarios/legal_summarization_scenario.py,sha256=BFK524H7uLfz_ZURuRS7KrhzRCP-WyhIcOgdcBrsldA,8709
428
- helm/benchmark/scenarios/legal_support_scenario.py,sha256=Ty93M8yTr_VEHomJ_36htqqBDZZKWI7PHtXA4qkSJGk,3990
429
- helm/benchmark/scenarios/legalbench_scenario.py,sha256=Yfyouxb4ir16zlBea77Xzjc0BlcYPkiXoBoVI38FXwM,4779
430
- helm/benchmark/scenarios/lex_glue_scenario.py,sha256=-3fsSjTXjgRN96Hl4GzDIMB_dlxSR9NR0ATUb-CiU3w,10357
431
- helm/benchmark/scenarios/lextreme_scenario.py,sha256=gVTHoMYX6Q_Itt5rOVO5lYmqWfAtuuf63CnKAF8b_ak,20461
423
+ helm/benchmark/scenarios/legal_summarization_scenario.py,sha256=0DraJdQebbl8tv7S3WmLos98wnQFGJOzY6suGPoxR40,10954
424
+ helm/benchmark/scenarios/legal_support_scenario.py,sha256=cM98PnIAfjQzciUYGtgHqHYnWIdbdJfh3uy6uEIo488,4567
425
+ helm/benchmark/scenarios/legalbench_scenario.py,sha256=K_KjDH3Rk1AM6pXLRedo-6o2rEw9OIk3porlCr3IGvQ,5623
426
+ helm/benchmark/scenarios/lex_glue_scenario.py,sha256=H7f3F7gK7bgf6FXvqXGTQrecTE6RtZaitIKmwQLksck,10736
427
+ helm/benchmark/scenarios/lextreme_scenario.py,sha256=dR5UUIymth3J3RInoNybygZg0rNZ-8wwzVHneuTTOGE,20843
432
428
  helm/benchmark/scenarios/live_qa_scenario.py,sha256=TnWaOPOcA4U1_8JdahQOUZ9KBj0MpMf4BcK2TDBl3BE,3666
433
429
  helm/benchmark/scenarios/lm_entry_scenario.py,sha256=kQTnj5gKJmDxCgynmzQOmghwNySpna7aTY7K7RPD2x4,9109
434
430
  helm/benchmark/scenarios/lmkt_scenarios.py,sha256=K51CdOZqMOMOozUmADjrJuNCpUtXVEZwcOeIY-EZrwM,11162
435
- helm/benchmark/scenarios/lsat_qa_scenario.py,sha256=2VUJ36vHUZp6fZuLfRIuPSsU_K6Z3Im2ums06sZENqo,6153
436
- helm/benchmark/scenarios/math_scenario.py,sha256=tW-nGKxyDOwOo2siqu1ZzPrCGzw_lFYGK5uiUK0lF7A,14525
431
+ helm/benchmark/scenarios/lsat_qa_scenario.py,sha256=ZtheFEcsBMSqGIPw5UPOO_b3v93mPFar1yqxVnsLq4E,6785
432
+ helm/benchmark/scenarios/madinah_qa_scenario.py,sha256=W7YEQTHyNWUJD8sKFmXU9e-ubzvleWQs7Cj_1zdq2bk,2482
433
+ helm/benchmark/scenarios/math_scenario.py,sha256=p9tsdNsiYFtuG89cMByZYn60QjWzEsnCO21OHPr4DJo,16034
434
+ helm/benchmark/scenarios/mbzuai_human_translated_arabic_mmlu.py,sha256=Gtc9DgV2bLPIDngROmizTWQHbTftnwVodi9CYT0_P2A,2146
437
435
  helm/benchmark/scenarios/me_q_sum_scenario.py,sha256=7DOqQmO70BpDeJy_S4fJ5i2UcCH8tunxzjFgTIim9bQ,4062
438
- helm/benchmark/scenarios/med_dialog_scenario.py,sha256=AE10W1UWhOrgKUnz7e2brKSaQR1WJkQUcPoo4s6n0Fs,7553
439
- helm/benchmark/scenarios/med_mcqa_scenario.py,sha256=XEipvuIA-QoyZrtlm8nnaPuyZzdDaeTskAhnseD3Q68,5096
436
+ helm/benchmark/scenarios/med_dialog_scenario.py,sha256=MKDlZLJEUq1nDRzlkHlpTWOxHwgghWMXcQvHJcM2LP0,8615
437
+ helm/benchmark/scenarios/med_mcqa_scenario.py,sha256=tvF6d6e4WQi_mUIlZoLQvbOpVIfHR4nyMVVR8z4AkAE,5752
440
438
  helm/benchmark/scenarios/med_paragraph_simplification_scenario.py,sha256=0Z1JrizLygjd9v_LLFMk8uZ805IWjJPvg-ZvPVhtMm4,7652
441
- helm/benchmark/scenarios/med_qa_scenario.py,sha256=m0W-FgFi58psLglZyQy_ouMQIDP-2j3aL7uInkdVtms,4478
442
- helm/benchmark/scenarios/medalign_scenario.py,sha256=mhd8REXpPwxftH48-KKb0ZURJ1mdOlvPRmvN4g4M9Ho,3383
439
+ helm/benchmark/scenarios/med_qa_scenario.py,sha256=uW8FOEQhMw6k0WF_LKlH0oFTQVS9D_9MHXvVTNwDC7k,5140
440
+ helm/benchmark/scenarios/medalign_scenario.py,sha256=5ALak5Hq2XQbqwTF3fQYKg-QPtL_vjY7J1UsMm9SOFk,4481
443
441
  helm/benchmark/scenarios/medalign_scenario_helper.py,sha256=fKXJFVLGnLcZKRBLsbjJA6YA4WqMaQAjkEU-i6YzSTQ,11626
444
- helm/benchmark/scenarios/medbullets_scenario.py,sha256=8O0UsPWw-ESkrgiuWz4f8gR99jH5-wS5HtCKYwZ1ycs,6713
445
- helm/benchmark/scenarios/medcalc_bench_scenario.py,sha256=vwmEQZ119tOVeZtl6Zt-nXKwkA8Qt4WRiH2HogIkV0w,5560
446
- helm/benchmark/scenarios/medec_scenario.py,sha256=Lo7iVkek7C9omJ5LX-C83pA_Q5OrAfdNhJY4rslJWTQ,5270
447
- helm/benchmark/scenarios/medhallu_scenario.py,sha256=d4HlEi1cQtvh1a39jvIHezDDmjuIEsSPdqDLLkDTzw4,2544
448
- helm/benchmark/scenarios/medi_qa_scenario.py,sha256=FmXI3UwfbL8zinFPtSyTyw4X5VIe2d32HAg93vbXR94,4118
449
- helm/benchmark/scenarios/medication_qa_scenario.py,sha256=StQmfHTYi8pZLP9FMPzyS-VB9gilZS0XBme7MzAL2QA,2583
442
+ helm/benchmark/scenarios/medbullets_scenario.py,sha256=oMqnF3Ri9dghEWpGQYzfcTnYGMK5b2cJNVpJoqdtdUo,7694
443
+ helm/benchmark/scenarios/medcalc_bench_scenario.py,sha256=EDeeBKmbosUaMo3dg2MNVs_Cb_ws6WfnBYk15_B3lkU,6608
444
+ helm/benchmark/scenarios/medec_scenario.py,sha256=sLx6tcFXcvhDIThGNVi-425znECAn5pkUgRk83CM-Q8,6343
445
+ helm/benchmark/scenarios/medhallu_scenario.py,sha256=0EgeIxGuYMyBzM8xIOF4WcxfCOVqCp-oOuZe4Ai-CRM,3660
446
+ helm/benchmark/scenarios/medhelm_configurable_scenario.py,sha256=vxvvAaIFW4cWaMez1xbEOZBh6S2wEH6Ws8KcGpnaZbs,3852
447
+ helm/benchmark/scenarios/medi_qa_scenario.py,sha256=KXHQIliik9Cihaw2_M6GW5QdmHBeGoPc-0tnTw-_M5w,5224
448
+ helm/benchmark/scenarios/medication_qa_scenario.py,sha256=uyYxtCm_dX9Jt6X-3ha2gAUyxF55wKn3_k95g7VAzHQ,3636
450
449
  helm/benchmark/scenarios/melt_ir_scenario.py,sha256=d88DEGKVJZCeGnbrXrQZO_W4VJeqW8XNaYc8wIUiJtA,5978
451
450
  helm/benchmark/scenarios/melt_knowledge_scenario.py,sha256=FDG4OGYEV6Ac40VC7KAeikzbFKAK2XXFhH1-QUTw8jo,7923
452
451
  helm/benchmark/scenarios/melt_lm_scenarios.py,sha256=kSm0lRRixhnXctMprPnzi09PLOmgfs-C7TAW3QI8RmE,8969
@@ -454,60 +453,63 @@ helm/benchmark/scenarios/melt_scenarios.py,sha256=_WShDpmPaKrujGbZcazCqleDn0TKDh
454
453
  helm/benchmark/scenarios/melt_srn_scenario.py,sha256=EQSOZIXbfvVWCJMJ4H2e_CiBz6wc8THJndnbK2WwTHM,14674
455
454
  helm/benchmark/scenarios/melt_synthetic_reasoning_scenario.py,sha256=ptMQWgNn6R-XpAVAAjutSdZg_9ZUqG6fVotzAgeead4,7945
456
455
  helm/benchmark/scenarios/melt_translation_scenario.py,sha256=j9YrY60DQHZz4m1MJZaGLzyI6FERlHRx2wy9auyAVB8,5415
457
- helm/benchmark/scenarios/mental_health_scenario.py,sha256=O1Lfd0MxqawLZLKUDSynaqqbaGHRjDglmePIqepnJI4,4961
458
- helm/benchmark/scenarios/mimic_bhc_scenario.py,sha256=GSUlvK-NVPYB83emucc1cPj-HgAQVu2aXGuutfXJUHc,4098
459
- helm/benchmark/scenarios/mimic_rrs_scenario.py,sha256=bxwVWjE_z4I_Nk5eD78g3QAGyjpsNg7DVWpkp8IGWXM,3841
460
- helm/benchmark/scenarios/mimiciv_billing_code_scenario.py,sha256=tZBUZEaUMZvfSlsU6hcPs-pxQ0kDIL6qebGd7JmpDbk,2699
456
+ helm/benchmark/scenarios/mental_health_scenario.py,sha256=dwirS093vIdS1VG5yKqUw863TJoCF_keO-pr7ysTIxA,6066
457
+ helm/benchmark/scenarios/mimic_bhc_scenario.py,sha256=OIDB-f8wyn0ApsPqwpP11yJEpEtSpf3aYc6VVap6Jr8,5275
458
+ helm/benchmark/scenarios/mimic_rrs_scenario.py,sha256=pG_NK1Et0QZosQAOLAxbciyNSq_wIdOT7hkXsBb4mTg,4902
459
+ helm/benchmark/scenarios/mimiciv_billing_code_scenario.py,sha256=KRl1lYX-ITWTGxWS_NNQ0o3I4E__jlzNDhAYvI1by7g,3749
461
460
  helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py,sha256=-OkPMRyB7aO6QBFwoTl6a2rpzcoHeEl84tqz7k9kpCM,2982
462
- helm/benchmark/scenarios/mmlu_pro_scenario.py,sha256=pwpp0wqNhsGc8v2V11aUyEWbwdkmIm-42N676j1T3Ws,4031
463
- helm/benchmark/scenarios/mmlu_scenario.py,sha256=_5cX2uI7CxD7K_GvO3MD8CRJLuN4EzS2o_EFvbrfjSU,3855
461
+ helm/benchmark/scenarios/mmlu_pro_scenario.py,sha256=2FVL-6Umn0BufFpJ0e405q1ZgeeP8Np1kCvsE61GaOE,4686
462
+ helm/benchmark/scenarios/mmlu_scenario.py,sha256=P68i3gBlvVwjItZhLimtM6-zVGv3cYitSPH8ARwnkEk,4610
464
463
  helm/benchmark/scenarios/mmmlu_scenario.py,sha256=CyOISLOsXF9IEYGfeqWyYYkWGvrUvGivlWSJ5ttN9qY,2762
465
- helm/benchmark/scenarios/msmarco_scenario.py,sha256=-l7_rIMQjMWcpTyn6dGotmNJ5XxN_Ze8dEJyv5ftWFA,34050
466
- helm/benchmark/scenarios/mtsamples_procedures_scenario.py,sha256=13pXjs9lFduM-QL03mpM10hU0iA8Vr2jJG2FVBQdKOI,5577
467
- helm/benchmark/scenarios/mtsamples_replicate_scenario.py,sha256=RlyWrlI9e5MLsGbkQWpO2WRsIOZJi39xHskOIBypHdo,5399
468
- helm/benchmark/scenarios/n2c2_ct_matching_scenario.py,sha256=-Et7hJnQJOGl1U9Xdb5mLckYTpU_Ve1sCe450M-5haw,13513
469
- helm/benchmark/scenarios/narrativeqa_scenario.py,sha256=MiSq0UnUllJxHFU2gO7m4vr_vmulavJxc4ruZhsAt2U,5632
470
- helm/benchmark/scenarios/natural_qa_scenario.py,sha256=g-fP8L1lXs7zwNVQOc0ZUnbYkCyElQtLVt5fe5dtvSE,12564
464
+ helm/benchmark/scenarios/msmarco_scenario.py,sha256=p9YNL5oTa9isCGVvmqHHVofKmiwitjPQd28ElXmRAN4,35601
465
+ helm/benchmark/scenarios/mtsamples_procedures_scenario.py,sha256=gtVSZxrs321tOolyD0gOoLzc0--uTc--3_HdlBVIuHo,6607
466
+ helm/benchmark/scenarios/mtsamples_replicate_scenario.py,sha256=FIdI509nn0LN9opC4yJ8UsvWmh6-KECUMZF88duIEq0,6395
467
+ helm/benchmark/scenarios/n2c2_ct_matching_scenario.py,sha256=o7RydazvQkYK90epvuXsdEyE02fmpsDEwS6253fNptk,14365
468
+ helm/benchmark/scenarios/narrativeqa_scenario.py,sha256=XBGq3_gz1vaMhVX17RWF7mhXaSlKsv-_-JWCyHDkGWA,6428
469
+ helm/benchmark/scenarios/natural_qa_scenario.py,sha256=3wkXvYm7m0Isxv2EW6SIuIEwZEV2lihsSLQZaANsKZo,14017
471
470
  helm/benchmark/scenarios/newsqa_scenario.py,sha256=G25VYaLrV_JyyoT0jpzJ6p4l5qsOydm8rlzTvSptNKQ,7284
472
471
  helm/benchmark/scenarios/oab_exams_scenario.py,sha256=vbjUzQP0zU4ckvMbsk4lh24NddVWbUAtfWmsq1h24_w,2101
473
- helm/benchmark/scenarios/omni_math_scenario.py,sha256=5qb2cO-Ibb3kDbwYvkzsoU_aOsoKV3ROLgZbi83OyGU,1955
474
- helm/benchmark/scenarios/open_assistant_scenario.py,sha256=zd8T6eLOlYMZiFyKrRjc-EPwk5_KpbBedAcKDbZ-TdI,5609
472
+ helm/benchmark/scenarios/omni_math_scenario.py,sha256=nB2miRRQ-cWwhpqUkypOZibYugD56wZ299nxE5bty9Q,2582
473
+ helm/benchmark/scenarios/open_assistant_scenario.py,sha256=Z9eyaaHGRtFZTogIkOe1Pr6d70lqSe80tMsNPWR_jog,6577
475
474
  helm/benchmark/scenarios/openai_mrcr_scenario.py,sha256=XbO8Wpjjq2e8OsC2s_ZScV4TcZg3hlpVGy56hgxXY9w,3253
476
475
  helm/benchmark/scenarios/opinions_qa_scenario.py,sha256=JK39tq306tKe0RDBDLz1AfAdZwNjK_Ng-rHvu6bTRY4,7395
477
- helm/benchmark/scenarios/pubmed_qa_scenario.py,sha256=73D9D_q4Zw904qfd3tVPPhHxpGN4IZcWjlA6ZHEfp2s,8070
478
- helm/benchmark/scenarios/quac_scenario.py,sha256=RpJpOPbvhB0jv3R91Odc20LcNyZsny9J4IF24GNEygQ,6689
479
- helm/benchmark/scenarios/race_based_med_scenario.py,sha256=vZB43jtM47PWrl9L4HYOf1i7orpscKcHX01m0oVmk2g,5778
480
- helm/benchmark/scenarios/raft_scenario.py,sha256=Yk56dUMqDGXpp6SxoGWhyxa4lAIniSQfivjkoPqMuFA,4644
481
- helm/benchmark/scenarios/real_toxicity_prompts_scenario.py,sha256=zpQthgDi-AyEgOUFO5F0qaWCctLEI5WGHBEGlPEVpqc,2424
476
+ helm/benchmark/scenarios/pubmed_qa_scenario.py,sha256=Z8gR19kiTIugBTvBj6g9LiBXicfAxZ1AFh_GF_axgQc,9043
477
+ helm/benchmark/scenarios/quac_scenario.py,sha256=y5bm1LXHIICqPIkWOg3sibnH_sC15b2zYUfT-_Y0V4E,7349
478
+ helm/benchmark/scenarios/race_based_med_scenario.py,sha256=pyeOUjWlQ30WgNr48BuV7kP7fhqZljLfizbTfWjyV_k,6862
479
+ helm/benchmark/scenarios/raft_scenario.py,sha256=BQ-faIiWBuUYmHTMCRbI8XpymtWvKK8DN6oNejjNi7M,5443
480
+ helm/benchmark/scenarios/real_toxicity_prompts_scenario.py,sha256=USsjBVzoL-Bgq8B2clQvl3d-g4XlOlt8gvBje9VD7Dk,3077
482
481
  helm/benchmark/scenarios/ruler_qa_scenario_helper.py,sha256=jgVf1D4eTSxwxQsW0GBou5hfSo2dnlEJvHpVJqk3BxM,6327
483
482
  helm/benchmark/scenarios/ruler_qa_scenarios.py,sha256=Dy0INRMzxSiIs9Pm3fa0hYodN-W--WPSv4kcmeQhucM,3270
484
- helm/benchmark/scenarios/scenario.py,sha256=kSy7tmtFeC6-QSEsBuvlrMTA1PB6fOY9jycMld-vBVM,8592
485
- helm/benchmark/scenarios/seahelm_scenario.py,sha256=i8SnuYDQtFGFkm686h3_FF9J3vr-Bd9w_jd7h5tV5yY,78152
486
- helm/benchmark/scenarios/self_instruct_scenario.py,sha256=3Kvi3pLL6eGOEezjoQoGv9c1UxKiRVlFmILKzqst4pI,2309
487
- helm/benchmark/scenarios/shc_bmt_scenario.py,sha256=wF_sD61IZ4RDznBVQ1HYbGh3Vc2qjbcBuU0jdmp1aD8,2803
488
- helm/benchmark/scenarios/shc_cdi_scenario.py,sha256=5aVEiRgFCutEWW9yMcJBxEo11FlwW0SiZTaOyXY6ioc,2693
489
- helm/benchmark/scenarios/shc_conf_scenario.py,sha256=3LDB2pT6yi-ubSooGAD_0Ao7sYLo_MMAHNfm5Ux9Yvk,2889
490
- helm/benchmark/scenarios/shc_ent_scenario.py,sha256=PS_O_keZ5s5_nSKxAC1k_WV2W8umEbyyKmlFtxvaReI,2855
491
- helm/benchmark/scenarios/shc_gip_scenario.py,sha256=cxMpMmS05QpZ4xW2eogPH1hcDv6GzA6UQoAi9OSFO_Q,2702
492
- helm/benchmark/scenarios/shc_privacy_scenario.py,sha256=dbQI_pDqXepV6EyxMUNumIpyQ8oDwnu37qyQ29rxZfY,2998
493
- helm/benchmark/scenarios/shc_proxy_scenario.py,sha256=edepzg5qrN_GKa7u1W0RRhkpmfUi2vFHCvI1ma205WQ,2908
494
- helm/benchmark/scenarios/shc_ptbm_scenario.py,sha256=QOQdz21s_YaRyGz-ciCPHH-fCy6hiGIrHUZz0SWPm5o,3391
483
+ helm/benchmark/scenarios/scenario.py,sha256=6zYT0heGPh1HXmv9l2g360Y3CwcV4xjA6jUq5snNF5I,9482
484
+ helm/benchmark/scenarios/seahelm_scenario.py,sha256=Pgw05ZT9NByV7GL0031vGImbhGOZPrHv8aOR5DmP7sA,94098
485
+ helm/benchmark/scenarios/self_instruct_scenario.py,sha256=uPVclF96zh0P2VJ163nLa7XuTKlMKGaTDFN-6IcdbXQ,3164
486
+ helm/benchmark/scenarios/shc_bmt_scenario.py,sha256=kLnoSmpNaPKUcHDPhS6sDP38TC0YII5dlvEKpiUZYKY,3787
487
+ helm/benchmark/scenarios/shc_cdi_scenario.py,sha256=Fg6PKKLLtmVxuu8pTOAmmoRpPIlFhxWl4VzIUNr7w6Y,3519
488
+ helm/benchmark/scenarios/shc_conf_scenario.py,sha256=605KB8lTHlJh44XwbkilKXXAfJQGD2XVnZJmFoaV4Vw,3948
489
+ helm/benchmark/scenarios/shc_ent_scenario.py,sha256=Sr4E3z0keK69b0DIZ1QFISvG0TsEQ6S567h84eSEHcc,3737
490
+ helm/benchmark/scenarios/shc_gip_scenario.py,sha256=MhQ4mdKMJOtcZJ0gKxoVCg2RVyM8OKfjW_EA3wna_2c,3564
491
+ helm/benchmark/scenarios/shc_privacy_scenario.py,sha256=OTYdD5mifaEZeI84RF5fz3Q10M8cE74H0GR3a7QisAE,3974
492
+ helm/benchmark/scenarios/shc_proxy_scenario.py,sha256=bM_qSCv5Qp_03TiDezgl1gUSSs49IZ_M1L4xZnMzToc,3915
493
+ helm/benchmark/scenarios/shc_ptbm_scenario.py,sha256=BttMbH39uai4qg621W0ySAFX-UtoRLuyEi-f4bfSrFo,4461
495
494
  helm/benchmark/scenarios/shc_sei_scenario.py,sha256=pTcb7n97VkesyRuqUqe5JGed1jDsQEd19udciDras8E,4532
496
- helm/benchmark/scenarios/shc_sequoia_scenario.py,sha256=vjDyRZXP9UjkQzmA6u7SmKtMBuUwwn6KRQ4rT3vZqqc,2796
495
+ helm/benchmark/scenarios/shc_sequoia_scenario.py,sha256=IPOuJ74AIWOLDVIQ5lNUjMswcU9zeB_gOXg-K9HLTO4,3703
497
496
  helm/benchmark/scenarios/simple_safety_tests_scenario.py,sha256=sjIHT5NZlHv_IcXr_15-pOiBUPKKwykyH-QpMfvrHAY,1247
498
497
  helm/benchmark/scenarios/simple_scenarios.py,sha256=ersSzp9bFEFfpJ-SNy368AuonwswLnuyA1n7FOgkw4U,6459
498
+ helm/benchmark/scenarios/situation_prompts.yaml,sha256=nJA3X_I67PIpXgd7LTekWwEr5zn1ryqIHgvqCpAwoGQ,1790
499
499
  helm/benchmark/scenarios/spider_scenario.py,sha256=mhiV3XWGwpnIQkaHFM_rvZlrwE7nqS12-F9t1eB8kdI,3306
500
- helm/benchmark/scenarios/starr_patient_instructions_scenario.py,sha256=zdokiMy2Lrg5mS3V2QEakcZyJxIkqcoT5CqVCAtyoKU,4146
501
- helm/benchmark/scenarios/summarization_scenario.py,sha256=WZnqhMQED6UBmRjHSboygdenLecOqIhvgdYVXzy6Q-I,6912
500
+ helm/benchmark/scenarios/starr_patient_instructions_scenario.py,sha256=ZiXGXeKelEm9NrFsHQS5ft1L4oL6a_IlAJm_flRv-Z4,5228
501
+ helm/benchmark/scenarios/summarization_scenario.py,sha256=wry6hAO_YXk56gS79jJ6HP6VhrRjpExvEZSsl2vM910,8883
502
502
  helm/benchmark/scenarios/sumosum_scenario.py,sha256=HG3wrKj5alV0a2aKb_nau8bB4oKDtTOLtdf3bx8h7sw,7695
503
- helm/benchmark/scenarios/synthetic_efficiency_scenario.py,sha256=OaxEvT1H9VjOjBSw_yKs3dcYt33vFE_UARr-UIP9pBY,3120
504
- helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py,sha256=pt2Aln_dX1YMSl-9hV1HJmwW90MC3fWwGsMxZg-Q-UY,16391
503
+ helm/benchmark/scenarios/synthetic_efficiency_scenario.py,sha256=HbCeVUzPm3miSZoIDivTcAkP-fwi6X4TnyaAx0jUumk,3737
504
+ helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py,sha256=Ir8CVC0aD7Cy7H_ZKGMd1c0iLK-dWbkuMuUl2D7kcR4,17048
505
505
  helm/benchmark/scenarios/synthetic_reasoning_scenario.py,sha256=7STCSHiHGIQ2aaN_PwDE5jXUJ-qcu8PaS4pC-pbOceE,8410
506
506
  helm/benchmark/scenarios/test_air_bench_scenario.py,sha256=9o92CK57xxgPaA9Xt9uJPPie4Cxllzq-KbMt3G35UQ0,1320
507
507
  helm/benchmark/scenarios/test_alghafa_scenario.py,sha256=ARQyzjmEpX_FpN2QLnIB7P-ToAeMtE4dqsolzlq8KPQ,1696
508
- helm/benchmark/scenarios/test_aratrust_scenario.py,sha256=3rsIBfFCAmemNT_IJQ6RZ5liwrHseKGvtGmm2VHoBo0,904
508
+ helm/benchmark/scenarios/test_alrage_scenario.py,sha256=9ofFc05Sy1mdfU9VgHdL_SNp8olJ4ComnZllkMU6itU,6711
509
+ helm/benchmark/scenarios/test_arabic_exams_scenario.py,sha256=nD221WpNE3Ddy-VOdLQGYOHiYVBAcyJxeMc__lVNRLo,985
510
+ helm/benchmark/scenarios/test_aratrust_scenario.py,sha256=6Ks4DA13gU4BAP46qKwPISkqIJw-RiZt4ZhyviXdrUY,918
509
511
  helm/benchmark/scenarios/test_bigcodebench_scenario.py,sha256=q9FWJsxLJoFaB3PSMLjI_-YyPoZYusOsMPwn6X6NKXw,1304
510
- helm/benchmark/scenarios/test_bluex_scenario.py,sha256=fLTyMTmSiJ8MoJKYJ2pcE39yXwZm6zv3_oWsQbRbwH0,1930
512
+ helm/benchmark/scenarios/test_bluex_scenario.py,sha256=QCIqq0GRRrjb55lwLpBiEkDwSFzEZxBKbCQHvyYO_Fk,1928
511
513
  helm/benchmark/scenarios/test_commonsense_scenario.py,sha256=V5Mq4cxWqU6j1U3icfIuzcnCZsZO7NTKLQgF0lEpdyc,924
512
514
  helm/benchmark/scenarios/test_czech_bank_qa_scenario.py,sha256=bZNLEGu58iHmutGlSp-2uVC2931TO6Rxw7giqFh9RHY,828
513
515
  helm/benchmark/scenarios/test_enem_challenge_scenario.py,sha256=XfPkYaSwdGa63ToC_BLuVKTRSldWNBlKsZYK6CFzL3w,2000
@@ -538,18 +540,18 @@ helm/benchmark/scenarios/test_tweetsentbr_scenario.py,sha256=V6ZsT405ltgC3pYXW-F
538
540
  helm/benchmark/scenarios/test_wildbench_scenario.py,sha256=pmQ87MNoGAXwAmPf0eoep5qf9hk6BPP2zzgzGuKXwzs,527
539
541
  helm/benchmark/scenarios/test_winogrande_afr_scenario.py,sha256=LZfE4J42BZ7OF3BvfKgMWuCHpdw4-LpWnFiKyrHGXp8,910
540
542
  helm/benchmark/scenarios/thai_exam_scenario.py,sha256=YjFsom1yiu-xBZ3SGenNuczVCwQcmyoITTMavGv-QEk,6069
541
- helm/benchmark/scenarios/the_pile_scenario.py,sha256=X3GWABiJ5cSoZzeNpgNUVAz7_A9SyM5MhgpJseKpZow,5019
542
- helm/benchmark/scenarios/truthful_qa_scenario.py,sha256=kUQ-Bpu1N1s525EP3pa7v3sp9Wybl0RuJv2pVu0pAGQ,6155
543
+ helm/benchmark/scenarios/the_pile_scenario.py,sha256=Dz51JxxazqPiX_fk6viOav8hQ2n6Iw0LIPhouquu6aw,5632
544
+ helm/benchmark/scenarios/truthful_qa_scenario.py,sha256=0U7q8E9XB0H9oSN3OzhfsiZ-8PJrYXCCC04dffjicB8,6822
543
545
  helm/benchmark/scenarios/tweetsentbr_scenario.py,sha256=ppugbPWd_3hHesLC52QbC-wUknctr9ZX4tmHefnPf6w,2879
544
- helm/benchmark/scenarios/twitter_aae_scenario.py,sha256=wnP-zH38J62zmbdeOLzdU-E3iclbQPApgEk4AGyhdoo,2120
546
+ helm/benchmark/scenarios/twitter_aae_scenario.py,sha256=ydG8MvBF3v6TXHScMK0_-HPAhmPhMWh5G4foBEHDp84,2905
545
547
  helm/benchmark/scenarios/unitxt_scenario.py,sha256=uL8Gni-Uw_eIp9xKQefp4J7XtKSttjJHzJE4USyoC2U,1930
546
548
  helm/benchmark/scenarios/verifiability_judgment_scenario.py,sha256=2iCJplnxdR7NNKjhsLR5o51pL55Q0bcbjjWlvrk5lw4,6067
547
- helm/benchmark/scenarios/vicuna_scenario.py,sha256=RFLUXx4zTfVPl5nT5j_DZ9TuHzk216PQcktomXqqR50,1685
548
- helm/benchmark/scenarios/wikifact_scenario.py,sha256=tTIHk7-xEsi-CGTobcEdbsjVrtAXTZOeWXRVj0hOeWA,5856
549
+ helm/benchmark/scenarios/vicuna_scenario.py,sha256=OWwbFkhgEMHd5YH2G3v2E_p22DmYmPVsDbKKhBbyTDY,2478
550
+ helm/benchmark/scenarios/wikifact_scenario.py,sha256=AHHZz_trtGf8HRoCnE6vukqrTD_Of9XQ1GcrqyctgR0,6702
549
551
  helm/benchmark/scenarios/wikitext_103_scenario.py,sha256=k13TxITriwqoBrMzf-JzPKr5wHaC9M2A_HyxxBaASnk,3111
550
- helm/benchmark/scenarios/wildbench_scenario.py,sha256=Qd9b1SC9ZtY1spf1vVuuFXXVxSJ0FlmR_DP7mIvAs8I,2981
552
+ helm/benchmark/scenarios/wildbench_scenario.py,sha256=dWJSqF06ZWAyZhaejNmrZ0Uu4Vlh5HMdTaMLNkMfe8Q,3668
551
553
  helm/benchmark/scenarios/winogrande_afr_scenario.py,sha256=3SOVyrQ8D7Wzz06uSbczDE-IN4sjKSEAJ7Po-_-O6qw,3131
552
- helm/benchmark/scenarios/wmt_14_scenario.py,sha256=1YYjz4x2RbYfJAXBTux9X30dxYTSC-YNngCCLhEiNfI,4646
554
+ helm/benchmark/scenarios/wmt_14_scenario.py,sha256=TNIYBXnbuvaOcpfmKqRZF6-yta1pTZSLA4Fd_XHhjCY,5159
553
555
  helm/benchmark/scenarios/xstest_scenario.py,sha256=ndRNB5ApW4th5iltlmT9-Nfw9eTaVZQw5AMC4HZCI-k,1309
554
556
  helm/benchmark/scenarios/audio_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
555
557
  helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py,sha256=NtTEHzmbeCicbjTRxPBUueZrBGOPwF6RVc2Yftc-VKs,5634
@@ -600,6 +602,8 @@ helm/benchmark/scenarios/image_generation/radiology_scenario.py,sha256=7JN8OYap8
600
602
  helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py,sha256=DoabanZhd-2MHFDZeR9EoPit0T2TvbVwZGUR0RfJyW0,2362
601
603
  helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py,sha256=IB4_GbzQjjXBp-551XZ6PTNUCRX1jLcGfB3bVFI5lo4,3547
602
604
  helm/benchmark/scenarios/image_generation/winoground_scenario.py,sha256=E2xPQNQzylDSmqLjjMkQB8D7A6g7bzqtSF4bXPgfVbI,2889
605
+ helm/benchmark/scenarios/medhelm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
606
+ helm/benchmark/scenarios/medhelm/judges.yaml,sha256=g_O6zVgOMSL4_f1yNz8muDuUUBzcsM8e5gpfe56eI4Y,663
603
607
  helm/benchmark/scenarios/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
604
608
  helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py,sha256=s-sdEFVx2BgqDFTzuQCCQr4oXaYHUUeQpFgblcCU97I,3052
605
609
  helm/benchmark/scenarios/vision_language/bingo_scenario.py,sha256=6YlGGGZW04Oy5A1-UG8JrN6jwR5eBuzrQ5qAise88o4,4108
@@ -644,7 +648,7 @@ helm/benchmark/scenarios/vision_language/image2struct/webpage/jekyll_server.py,s
644
648
  helm/benchmark/scenarios/vision_language/image2struct/webpage/utils.py,sha256=UYe3PnxCKBYEbZTTEzdIoTY9gW7ZZAWmVISRIdItD-A,940
645
649
  helm/benchmark/static/contamination.yaml,sha256=rAfh1DqwyUcDtyzHPQ2QiUK5eY7QfuuRtBXpZMn4TeA,3171
646
650
  helm/benchmark/static/schema_air_bench.yaml,sha256=LapSMj3Ecl1Gp9XIwVCYfrerqS93GNErvp6oDnBCtgw,142378
647
- helm/benchmark/static/schema_arabic.yaml,sha256=PoudK_u7hV2lalGRvYDI5b89tSfch6Dx_bn5681Um_c,7688
651
+ helm/benchmark/static/schema_arabic.yaml,sha256=Iui-4_M4tV45Xzs3bz0diI3UZwVAuaLAxD5uNhjurgs,8925
648
652
  helm/benchmark/static/schema_audio.yaml,sha256=lVslZX7JmFo0ZgLU4n6amrs9DK8y43Ux0I9QyDUG-14,29119
649
653
  helm/benchmark/static/schema_autobencher.yaml,sha256=yb-NkF5w5R2YOg7RIsadNHJ_5G7lG1gbcDVq_25luEk,5716
650
654
  helm/benchmark/static/schema_call_center.yaml,sha256=i30aFzWqdOJRyAHN8vAzyHEX1v95DEK0TI1SMKTN4TE,9106
@@ -662,13 +666,14 @@ helm/benchmark/static/schema_image2struct.yaml,sha256=cD1X99YcPI8BMAnNfDmXlM-FN0
662
666
  helm/benchmark/static/schema_instruction_following.yaml,sha256=mYLpMv-iNtsmrv9ewfN9ceDOBBg8nSxOWfc6ByATmIk,6056
663
667
  helm/benchmark/static/schema_legal.yaml,sha256=RpoFOuVSIowNgxlPn3UMfJC-68RFr3CGDciUGLPfVqc,28806
664
668
  helm/benchmark/static/schema_lite.yaml,sha256=rFSoG7zGPNOtKkJyGgOViWf5WJbMiJMAXrgmqCAi9X4,36611
665
- helm/benchmark/static/schema_long_context.yaml,sha256=3YjlNkQBgp4hS4PE1EjZvjpvX9v4QjaBPALtOYLpPCs,11486
666
- helm/benchmark/static/schema_medhelm.yaml,sha256=84BrIengbq0m42ICWvyEWoYtdERR-8J8-8QbPOqUzvA,50747
669
+ helm/benchmark/static/schema_long_context.yaml,sha256=p01u7yPN75ZNmJhQodCRJo4q4Zb4vBieHKYqp4fD9Jg,11520
670
+ helm/benchmark/static/schema_medhelm.yaml,sha256=e3vVHdEXcS-joOUMUoIoFA3x9hEE__svDoajbjfqpLE,51793
667
671
  helm/benchmark/static/schema_melt.yaml,sha256=mmPqwDa26DVZXsRJkmKQSyD0OStvjlxaMoSPM25SpD4,47494
668
672
  helm/benchmark/static/schema_mmlu.yaml,sha256=KI3XnzEwBRpzfYGjP77yKL-hBklEg72D3vL0kVl1BeI,49666
669
673
  helm/benchmark/static/schema_mmlu_winogrande_afr.yaml,sha256=YIVYf-mOFPq82UVBdMhnCWNOr4sV8Oi3-ozOszJ2tWQ,40143
670
674
  helm/benchmark/static/schema_safety.yaml,sha256=7RfZDX4wr8Xr1BJ149ZwmplPzPkNL0-BKbEZuzUsl_0,9278
671
675
  helm/benchmark/static/schema_seahelm.yaml,sha256=9XF9Rlr7I-g-uW6R0LNh7Xg52Xs3_058QybXEiN-hnM,28296
676
+ helm/benchmark/static/schema_slp.yaml,sha256=5AV2leKoSBZwP3rIfXcwiqqpXPQbyWjXKE5kU73IAt4,7122
672
677
  helm/benchmark/static/schema_slphelm.yaml,sha256=3avOfp-ZEmVRGei3_M_WX6cSP5hQjbfHsDr1XrjayMY,5294
673
678
  helm/benchmark/static/schema_social_audio.yaml,sha256=Nj3ORXDT4RHD52cyo1RHfueWwbhqp1qW06TaVJ2lUfE,8653
674
679
  helm/benchmark/static/schema_sql.yaml,sha256=8rRff6p_i1CsH7oDbUjau2qRWbLGspuM1Hy-g5pOQiU,6047
@@ -680,15 +685,15 @@ helm/benchmark/static/schema_vhelm.yaml,sha256=0slYep2eepUefgtK_m4iSS785sHdJzljm
680
685
  helm/benchmark/static/schema_vhelm_lite.yaml,sha256=4I68Em9q5wW8sFzj5GCJz8m49fBEuMyVmSZM0-wbfOk,4024
681
686
  helm/benchmark/static/schema_video.yaml,sha256=FkpI5Slc4w-ty4hns82ArXIvTdqppWDnkJSpIp74QN4,9713
682
687
  helm/benchmark/static_build/config.js,sha256=o98g6QSly1NAfqhYWbU4lEoZB4LEpIrePZtmimiuoXc,165
683
- helm/benchmark/static_build/index.html,sha256=hlkvPO8WVcvIJXentHj3Kn5Cd3QwOoi7OqRAou0pRVQ,1178
688
+ helm/benchmark/static_build/index.html,sha256=MRRycZym58h-5KW7aKyiqGxIpRB8DV5OHkND5JL5aDk,1178
684
689
  helm/benchmark/static_build/assets/air-overview-d2e6c49f.png,sha256=0ubEn4J0T51-jx7IlwjaEGSrofZWlW_e67MJw47Ujzg,733055
685
690
  helm/benchmark/static_build/assets/crfm-logo-74391ab8.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
686
691
  helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png,sha256=Pl46pKbC_TU3L6kZQ_3G-0wTseluAhIYwb3EqpdQAjQ,1344452
687
692
  helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
688
693
  helm/benchmark/static_build/assets/helm-safety-2907a7b6.png,sha256=KQentq_1e3uGwiWMViAPxHu2XZ60gqFgovP3UWTyMmw,72312
689
694
  helm/benchmark/static_build/assets/helmhero-28e90f4d.png,sha256=KOkPTf-q28PdvGOBp1G5O4q1eWUJjuij3z2h_SUUf8s,55314
690
- helm/benchmark/static_build/assets/index-b9779128.css,sha256=uXeRKCUzQAC32ofNoaK3-WC7kRWR--KnR6--1m9NdQA,491471
691
- helm/benchmark/static_build/assets/index-e439d5e1.js,sha256=t7AnJSBjGs43kxIev2uLVumaInyBUxad9KVtvA86oUw,124597
695
+ helm/benchmark/static_build/assets/index-671a5e06.js,sha256=XEa85-IyP6ZeHfsWGoPno-Qj9pSxlnHsjLYmaqzdzqg,124954
696
+ helm/benchmark/static_build/assets/index-9352595e.css,sha256=k1JZXkXPFsUerOZ37oDhxjcb1ypOFEdDogJUP6H-NAQ,491553
692
697
  helm/benchmark/static_build/assets/medhelm-overview-eac29843.png,sha256=6sKYQ79cN07-cUsnt-JPsdoVwUBWu5KxOaHWSdwjdgA,284408
693
698
  helm/benchmark/static_build/assets/medhelm-v1-overview-3ddfcd65.png,sha256=Pd_NZfAf1ZeU2BIGx9zNT6WmypZNP2bk5z5AxDkbwoU,270625
694
699
  helm/benchmark/static_build/assets/overview-74aea3d8.png,sha256=dK6j2Nn3j9O-FMUIVRT5HGBpR_GL78vrKi8oHdG1eaI,74685
@@ -758,8 +763,9 @@ helm/clients/mistral_client.py,sha256=ceM8KLAcniAqK1BNVdUGzqy4av2SEEau6PVmPivxc0
758
763
  helm/clients/moderation_api_client.py,sha256=I5pYWRb2MmcLDYrScnC3P5N7OUFzQiVQ828_hf7zjM4,4719
759
764
  helm/clients/nvidia_nim_client.py,sha256=Z1UAqR2jHacIO_QGqQl1JUZ_82JiSPstBOtj6xURmQk,902
760
765
  helm/clients/open_lm_client.py,sha256=qFgYqlV_3UiW8WJKz66lLqRqg2jt1qtJ1bHMRAtBn40,1749
761
- helm/clients/openai_client.py,sha256=prSASL8IE3XO0_CaAuZ45iYSITAMQIwtD2q7UqM4qjA,27803
762
- helm/clients/openai_responses_client.py,sha256=aixsZwO_swP4dhOhJPe1ZcOIav3rxmovPGY0Ug6s5ZA,7308
766
+ helm/clients/openai_client.py,sha256=4Q4LVMqvPo-37MV_BhsMydpwmMLfo-2kftRZH9lGtZs,28538
767
+ helm/clients/openai_responses_client.py,sha256=FhQcOcXNZc5AuDMh1KBD3ZoRdEREy73dIeFBjUg9YDo,8444
768
+ helm/clients/openrouter_client.py,sha256=oK8gXBhBs1y0AriZ9tVp8kx5lSY7gUgQJv-mfywSTfI,980
763
769
  helm/clients/palmyra_client.py,sha256=4AaZcV2tPHU4HJ9FWSkOY8_C9ndEckH3PH715QxJQ8E,7086
764
770
  helm/clients/perspective_api_client.py,sha256=o_1FFTCrTny6AZ4EJTstX1H9t8SQSQ8dvhi321RTcL4,6105
765
771
  helm/clients/reka_client.py,sha256=hA0tq3Hc9669q2sYa4Jr5yWy2NAbvoFDnVqQ6vds62w,8334
@@ -773,12 +779,13 @@ helm/clients/stanfordhealthcare_shc_openai_client.py,sha256=V7K4KZaSjIiE0FkoY4qy
773
779
  helm/clients/test_auto_client.py,sha256=bc-rsMJ8JM0MFnQ4B48hBJ1jL3RtRyVvmPwOgzF2mF8,3155
774
780
  helm/clients/test_client.py,sha256=T27UsIPWsbE1JK_8DN_DW9LkEcIGRbgDjio14YOIAb0,3854
775
781
  helm/clients/test_huggingface_client.py,sha256=8Shzrf1Pad1UsiUAdeOSqsTPQaay0CrWXmdNeIfrJ2Y,3418
782
+ helm/clients/test_openrouter_client.py,sha256=gCzchJMQZi4kkgtpGe1Ma0xF2nsP1uDevJcqbprZ6RE,2414
776
783
  helm/clients/test_simple_client.py,sha256=G0JRQX69ypQN2VxhlNQXs5u2Tdtkcl_aeHqudDUVKi4,702
777
784
  helm/clients/test_together_client.py,sha256=kyBLu-2i4EJyuJm5ft0yg8W-H1IqmULRXggEbChuxdo,6178
778
- helm/clients/together_client.py,sha256=ByImeitpWRhXpZ9U6c0Kol1D8X7Fxno5xgo6D7sZYOY,24201
785
+ helm/clients/together_client.py,sha256=kEa6z54zPWlcLHCb2g2PCxLRpdJ8aE9zvG5Yzkaeun4,25518
779
786
  helm/clients/toxicity_classifier_client.py,sha256=AI_FizxMurubTIyeceRdkixSnhWQbcD-oEEONj5ve7o,464
780
787
  helm/clients/upstage_client.py,sha256=iSL1G8G3jWSbrpacz4I0l6Lwc5T01fsLR-wZzF39ftM,679
781
- helm/clients/vertexai_client.py,sha256=AxZRpZTRrzxwPs2xwKTgHH0eh7WEmHSS1ArTZwI_q3E,23268
788
+ helm/clients/vertexai_client.py,sha256=Qm-EkbpXnwiwZzB592-FPBuSlxKIkVH7tWBFFvOBvCY,23631
782
789
  helm/clients/vllm_client.py,sha256=xmXf35WX2oOZhpQnRxeooXGshENySOHZCUQ1E4pbQbA,2647
783
790
  helm/clients/vllm_granite_thinking_client.py,sha256=fds2i8LUG78OJYke1uYdDy6XRFqE3rZgSornFjzu4Sk,2172
784
791
  helm/clients/writer_client.py,sha256=flKLeMbFkyGfNmv1ozZGU4dxNy-QF5bFJF0mGHqpU3c,4467
@@ -790,6 +797,30 @@ helm/clients/audio_language/qwen2_5_omni_client.py,sha256=ftAVtOG0azvRQEcFjkSSBM
790
797
  helm/clients/audio_language/qwen2_audiolm_client.py,sha256=s9eH8fnVgw5xV39b_8AGt6IyNN3q9Uhcx6HZVxt7TM8,8981
791
798
  helm/clients/audio_language/qwen_audiolm_client.py,sha256=RvYweXANEyzhHYDx38H10F0ZEFaL8kj7n7TZ-UrRmZs,6338
792
799
  helm/clients/audio_language/test.py,sha256=FrKpirOwJW1__E2egq4VPgsTrgiSHZHBwfUCvxNjC0o,1969
800
+ helm/clients/audio_language/llama_omni/arguments.py,sha256=MxzZKE8sNsOe5eUse96gejOsmu_MfTJGiuOwR87xiSA,2334
801
+ helm/clients/audio_language/llama_omni/constants.py,sha256=IjFS9EUI5p1DLtGcX0B1lSxESkxcx5dMbuMkMm1UaSs,183
802
+ helm/clients/audio_language/llama_omni/conversation.py,sha256=SgoMEf1Roi_8ZxiIM6DXwY3ozw0ExOCYdFFX-5rRA0g,6881
803
+ helm/clients/audio_language/llama_omni/preprocess.py,sha256=2-YA6czgO1Zr-C1ChHvqVEfYa8qHhHp6n1Lb1Uw67qg,10764
804
+ helm/clients/audio_language/llama_omni/utils.py,sha256=GycpuTkNEZtMNG2ZTZ7cYVjPEilyC4o2itT9K9kwJFI,7556
805
+ helm/clients/audio_language/llama_omni/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
806
+ helm/clients/audio_language/llama_omni/model/builder.py,sha256=-y7amgUyPMEMknVutSSb_W3Zsm09r3K7u08jgEMinYA,3875
807
+ helm/clients/audio_language/llama_omni/model/omni_speech_arch.py,sha256=-Sgo9fEGHRBfkZrR63i3-uXZ19wkqYbGLqAiDqevRr0,11465
808
+ helm/clients/audio_language/llama_omni/model/language_model/omni_speech2s_llama.py,sha256=CqtEURdHlk6_29iM8WZnsmd7DMrUcnULGD2U2inWIxw,8426
809
+ helm/clients/audio_language/llama_omni/model/language_model/omni_speech_llama.py,sha256=ZmtQY7JT74O4OH78UYSuBnmxq5Hi4-86kEY5-svfU-M,4564
810
+ helm/clients/audio_language/llama_omni/model/speech_encoder/builder.py,sha256=TwSVGfSOA5N82pB2_P4cElN7w_4w2XHBXr9qicluM2w,389
811
+ helm/clients/audio_language/llama_omni/model/speech_encoder/speech_encoder.py,sha256=LF8znt1puoExQ87ovtoyc1-pzO9kWNqTu_CvUWr3nBE,965
812
+ helm/clients/audio_language/llama_omni/model/speech_generator/builder.py,sha256=nIjOSYgJTrdnqDvy5jnYjMcHvpOirAyvMNLuUbnL9pY,358
813
+ helm/clients/audio_language/llama_omni/model/speech_generator/generation.py,sha256=Rka7iVephHHj0z0mPPQLfe-3Tt_UsWbTI7VRevs1ek4,30644
814
+ helm/clients/audio_language/llama_omni/model/speech_generator/speech_generator.py,sha256=mllXYemRl-laMRntRsKtak8SIWEbVfWk0EpxPqs-su0,4612
815
+ helm/clients/audio_language/llama_omni/model/speech_projector/builder.py,sha256=rmzWg4yZIfGpYD7VhfSrRNN7t5U4xNq8TVugq0KLYWc,372
816
+ helm/clients/audio_language/llama_omni/model/speech_projector/speech_projector.py,sha256=naunMdDZXzK8VpASZJYsY6TwvuxQn3Uw9r_MUouUG5k,950
817
+ helm/clients/audio_language/qwen_omni/configuration_qwen2_5_omni.py,sha256=oIaVRv1KlFYPqbT1nPtATgTcVomfNvtHmxnIZ2wcTC4,19088
818
+ helm/clients/audio_language/qwen_omni/modeling_qwen2_5_omni.py,sha256=s08H7EY_TzHqVk1b6DZv_gI4VVwP_ub_FwF6JJu0z-c,180552
819
+ helm/clients/audio_language/qwen_omni/processing_qwen2_5_omni.py,sha256=n8by91xA1xTYz8BfsbYAwCL5G0x1FuLhSGDAP33Qyyw,12216
820
+ helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
821
+ helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/__init__.py,sha256=ZZ5I9X_p1-ttDbYsLBxImO_CxbC5LESLI8AAIe9kKv0,365
822
+ helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/audio_process.py,sha256=VKATc5W9kl0fo9TuU19MaXYSObGxX2V2Fo1NlD4GC4I,2516
823
+ helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/vision_process.py,sha256=TFvQvPiP0X8Zt-agQR84o75LUZp0uXDZAUqUl0vhPM8,14635
793
824
  helm/clients/clip_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
794
825
  helm/clients/clip_scorers/base_clip_scorer.py,sha256=NfXe79g6M4Wype3Xf-oXxscaUgjbZLmy9dRnBaLiWwk,695
795
826
  helm/clients/clip_scorers/clip_scorer.py,sha256=5KzYTrGuy5zA8yHX6c67Is98HLkqQooWhioPxHNLJ7s,1932
@@ -813,7 +844,7 @@ helm/clients/image_generation/cogview2/coglm_strategy.py,sha256=P3NU3Z4jsj171PrH
813
844
  helm/clients/image_generation/cogview2/coglm_utils.py,sha256=EJPOEQJInCDVi2LHqkjEUsgw6GgVlLDrIptlT9cXk-Y,2900
814
845
  helm/clients/image_generation/cogview2/sr_pipeline/__init__.py,sha256=qWuNwKlcvGwEFcw5932wk_t0_baNwUILIJzQWJjgh2A,488
815
846
  helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py,sha256=1DwcUw9Tb563JpKpkPNIB5Ew1djozvPiGASShffiABk,3716
816
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py,sha256=xYn3acxU4BRdDeRjk98Vj0qq8qqty93kPCLdz-bOMKs,10818
847
+ helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py,sha256=IUTvHpIaaYrH00CQZZX9L45JMRb-twYir99K7LLnOzQ,10819
817
848
  helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py,sha256=OonYjdtNKJo12cNb-t-gFHLXRFxItCXjKgS9YxWAI-k,7718
818
849
  helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py,sha256=LSvAHRupsOqk3yb4GxyTsubRxrnPOEfObFym2j4eiKc,5120
819
850
  helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py,sha256=5D1QWyAcY0CpwITk7EBN6ylUtc7mvZaE9iHG628AqMQ,10390
@@ -837,7 +868,7 @@ helm/clients/image_generation/mindalle/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JC
837
868
  helm/clients/image_generation/mindalle/models/__init__.py,sha256=1UieFJ0LGinYSB-idy3atl-gFAmS_ouiiGX6TM2Mh-I,8372
838
869
  helm/clients/image_generation/mindalle/models/tokenizer.py,sha256=NFFdLUhoxEkv9SZqU3QIFk0ukaCcn6w_xFWQIRGhZJ4,1190
839
870
  helm/clients/image_generation/mindalle/models/stage1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
840
- helm/clients/image_generation/mindalle/models/stage1/layers.py,sha256=cg5c6KShCMbMmHFuzDBIG-WgIBBkDrG9XvXC1DxqowA,11044
871
+ helm/clients/image_generation/mindalle/models/stage1/layers.py,sha256=Q-yZeB8ZIxwOdQaKpEeBVbwF9nXeQJ2xJhiD6KjqRi4,11046
841
872
  helm/clients/image_generation/mindalle/models/stage1/vqgan.py,sha256=KcarvKoMuPBpP0H8F8W67FogdvHaAQuo9jP3rFRxc5E,4035
842
873
  helm/clients/image_generation/mindalle/models/stage2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
843
874
  helm/clients/image_generation/mindalle/models/stage2/layers.py,sha256=LvDADun5nMaencaRT0pm-dq78xHpPPkpi8rlu7RLHco,5306
@@ -904,16 +935,16 @@ helm/common/file_caches/file_cache.py,sha256=QfF1hlF8FQ-rcPn9Zyl6L0dOCokvYgd-dFq
904
935
  helm/common/file_caches/local_file_cache.py,sha256=NiXbat1BBGl5P27oERqSLFfhIHpYqA1IQrvE_N1sWR8,1944
905
936
  helm/common/file_caches/test_local_file_cache.py,sha256=ANb01ctUV-J4i1ab3l4uhg9Ce54U_56xq9Hayjt1WhQ,686
906
937
  helm/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
907
- helm/config/model_deployments.yaml,sha256=sB3cV6io0NzUQXuKlA49-H3UzOEvWpFDP_MZ30gH0I0,171682
908
- helm/config/model_metadata.yaml,sha256=0Ps6WlsgElxOpCHVGiWu7QfS0o3Ls4zi1iuwC8PTUgE,269972
909
- helm/config/tokenizer_configs.yaml,sha256=Vq6MY2nplhYgiyLR98xCXBJWQgEpm64yenrskmkm2NI,40415
938
+ helm/config/model_deployments.yaml,sha256=JGM4eLHXv3KgndTu2ZqnMH5rwvoXNvKAoTAnmfZDs7A,174425
939
+ helm/config/model_metadata.yaml,sha256=8W9u04RugI_L6Kj3ipGqQlWLeXAd_FQwcw-2usKm5y4,274605
940
+ helm/config/tokenizer_configs.yaml,sha256=KZ6nReCV6AoActBoQYfi9BH4eGYkSx4OmSa2gzWh0uo,41039
910
941
  helm/proxy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
911
942
  helm/proxy/accounts.py,sha256=gd5cKhKeqklf_cXCAISl65AUvZeD6afBNrs6WK3IBvQ,14764
912
943
  helm/proxy/cli.py,sha256=kEDoHpisFO0EJ0Wfm1FLpJdP9sXk9j8WCILEq42RKb0,8317
913
- helm/proxy/example_queries.py,sha256=EB2vVpAryOUAFiLrwsMiFz0zGl_UAQ8TJ9SkWngvsu4,4389
944
+ helm/proxy/example_queries.py,sha256=A4JKvLwkHQIprsgMFhGvruW1-Ud4YKNqwUWhv6iWfzw,4449
914
945
  helm/proxy/query.py,sha256=eftbiUICMh8QIHVs-7cLtv_rDXKeKdRPmwjLMu0TDxQ,645
915
946
  helm/proxy/retry.py,sha256=o64BZsW2vwu2iewRA18wdsru2xC3eNBQ7WUw3IjC_5g,3698
916
- helm/proxy/server.py,sha256=Q4Mzts8mketktGVJ5AoOEA-_SGCue5QeOlK8dqPUuHI,10853
947
+ helm/proxy/server.py,sha256=PYG8oMb-lq8eGR3Kad2ZTudJxgY4QH4jVbyoOgjes7I,10904
917
948
  helm/proxy/test_accounts.py,sha256=Vs1iOzTPN29LosDAAEs6IagQ3PccvutrJTlR1qNIcj0,1146
918
949
  helm/proxy/test_retry.py,sha256=db0owyGTThmIMhYWU_Eh1U-AJvQ-Wa9j_kRmC9DNjOA,1059
919
950
  helm/proxy/critique/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -933,9 +964,9 @@ helm/proxy/services/test_remote_service.py,sha256=xzkyptctXw3y5d1fgbidBMyw8B4rIL
933
964
  helm/proxy/services/test_service.py,sha256=oDYen-71iwZ6YMNBVbVSdEFsH6GMvZYw5tS5Eg4YHjY,8987
934
965
  helm/proxy/static/general.js,sha256=qcsntanG5UMWK2vznSVAVFy9zd3BMc8DFfNa7KKezew,3053
935
966
  helm/proxy/static/help.html,sha256=2Rn_lGZspqrZhNfLQ4wIAvYO_BK9q67Q_AS2-3WsMpY,6231
936
- helm/proxy/static/index.css,sha256=1OBOJ87LhwI2PtpoIyZoGQbSxQK2dz2vxk8BVmAybWY,717
967
+ helm/proxy/static/index.css,sha256=3z_JuWVuJFngWtHI4T5-EVyk4LyaCPDcSzlalvUYhmQ,754
937
968
  helm/proxy/static/index.html,sha256=nUJf_hwBPokqrm_hDZsVfHcJrnhZLYhkVSoLdGOocf8,2009
938
- helm/proxy/static/index.js,sha256=-OXgf2rUYI49vg4KhwdL2VygKgzAGoYHKngaWgMb4E0,14996
969
+ helm/proxy/static/index.js,sha256=bCjx29j88UnfoeYL4jRYGaqg7fd6o8IePZ0sTl-HRy8,15292
939
970
  helm/proxy/static/info-icon.png,sha256=P-PW3Ek3NGiRAW5BXOjJRPBfMVqprjAqtQheGWu7zNI,3428
940
971
  helm/proxy/token_counters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
941
972
  helm/proxy/token_counters/auto_token_counter.py,sha256=Ag368Sb-eLQUMLW7lmWc2EOKN3kgkiCTsYnHNrsf9kw,2071
@@ -967,8 +998,8 @@ helm/tokenizers/yalm_tokenizer_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQ
967
998
  helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py,sha256=1ZcPL3srfk031LmA8bEdPcIraAPnHGiYi_CqTiJSTlc,904
968
999
  helm/tokenizers/yalm_tokenizer_data/voc_100b.sp,sha256=LmPD0_OIOXi8dWuNjXUYOSPhf8kPp2xhvK-g3bXcwrQ,2815034
969
1000
  helm/tokenizers/yalm_tokenizer_data/yalm_tokenizer.py,sha256=kH5Qig1_6r_sKbAHinX7C83tqBUoTwbe-gGZCbGVkko,6389
970
- crfm_helm-0.5.7.dist-info/METADATA,sha256=TMyCY6K4C2Z3wO2Jh5XVDq-hHQ1xxCArIm31BUeGbgM,23548
971
- crfm_helm-0.5.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
972
- crfm_helm-0.5.7.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
973
- crfm_helm-0.5.7.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
974
- crfm_helm-0.5.7.dist-info/RECORD,,
1001
+ crfm_helm-0.5.8.dist-info/METADATA,sha256=UCr1ojkpYEsbV8_KfuhviO1vhPRs0fnfz7ADVaqa32E,18414
1002
+ crfm_helm-0.5.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1003
+ crfm_helm-0.5.8.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
1004
+ crfm_helm-0.5.8.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
1005
+ crfm_helm-0.5.8.dist-info/RECORD,,