crfm-helm 0.5.7__py3-none-any.whl → 0.5.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (333) hide show
  1. {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/METADATA +7 -77
  2. {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/RECORD +315 -282
  3. helm/benchmark/adaptation/adapter_spec.py +10 -0
  4. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +11 -3
  5. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +11 -8
  6. helm/benchmark/annotation/aci_bench_annotator.py +11 -22
  7. helm/benchmark/annotation/alrage_annotator.py +90 -0
  8. helm/benchmark/annotation/chw_care_plan_annotator.py +10 -21
  9. helm/benchmark/annotation/dischargeme_annotator.py +11 -22
  10. helm/benchmark/annotation/med_dialog_annotator.py +11 -22
  11. helm/benchmark/annotation/medalign_annotator.py +11 -22
  12. helm/benchmark/annotation/medi_qa_annotator.py +11 -22
  13. helm/benchmark/annotation/medication_qa_annotator.py +11 -22
  14. helm/benchmark/annotation/mental_health_annotator.py +11 -22
  15. helm/benchmark/annotation/mimic_bhc_annotator.py +11 -22
  16. helm/benchmark/annotation/mimic_rrs_annotator.py +11 -22
  17. helm/benchmark/annotation/model_as_judge.py +23 -18
  18. helm/benchmark/annotation/mtsamples_procedures_annotator.py +11 -22
  19. helm/benchmark/annotation/mtsamples_replicate_annotator.py +11 -22
  20. helm/benchmark/annotation/starr_patient_instructions_annotator.py +11 -22
  21. helm/benchmark/metrics/air_bench_metrics.py +3157 -1
  22. helm/benchmark/metrics/alrage_metric.py +35 -0
  23. helm/benchmark/metrics/basic_metrics.py +267 -2
  24. helm/benchmark/metrics/bbq_metrics.py +12 -0
  25. helm/benchmark/metrics/classification_metrics.py +19 -1
  26. helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +12 -1
  27. helm/benchmark/metrics/dry_run_metrics.py +30 -1
  28. helm/benchmark/metrics/efficiency_metrics.py +74 -0
  29. helm/benchmark/metrics/ehr_sql_metrics.py +57 -1
  30. helm/benchmark/metrics/evaluate_reference_metrics.py +311 -0
  31. helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +13 -1
  32. helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +13 -1
  33. helm/benchmark/metrics/ifeval_metrics.py +13 -1
  34. helm/benchmark/metrics/instruction_following_critique_metrics.py +41 -1
  35. helm/benchmark/metrics/kpi_edgar_metrics.py +21 -0
  36. helm/benchmark/metrics/language_modeling_metrics.py +13 -1
  37. helm/benchmark/metrics/live_qa_metrics.py +13 -1
  38. helm/benchmark/metrics/llm_jury_metrics.py +13 -1
  39. helm/benchmark/metrics/medcalc_bench_metrics.py +14 -1
  40. helm/benchmark/metrics/medec_metrics.py +25 -2
  41. helm/benchmark/metrics/metric.py +25 -0
  42. helm/benchmark/metrics/mimiciv_billing_code_metrics.py +32 -1
  43. helm/benchmark/metrics/omni_math_metrics.py +13 -1
  44. helm/benchmark/metrics/safety_metrics.py +13 -1
  45. helm/benchmark/metrics/seahelm_metrics.py +14 -1
  46. helm/benchmark/metrics/summac/model_summac.py +2 -2
  47. helm/benchmark/metrics/summarization_metrics.py +129 -1
  48. helm/benchmark/metrics/toxicity_metrics.py +31 -1
  49. helm/benchmark/metrics/ultra_suite_asr_classification_metrics.py +52 -0
  50. helm/benchmark/metrics/wildbench_metrics.py +21 -1
  51. helm/benchmark/presentation/run_display.py +13 -3
  52. helm/benchmark/presentation/run_entry.py +2 -2
  53. helm/benchmark/presentation/schema.py +5 -22
  54. helm/benchmark/presentation/summarize.py +180 -11
  55. helm/benchmark/presentation/taxonomy_info.py +20 -0
  56. helm/benchmark/run.py +1 -1
  57. helm/benchmark/run_expander.py +4 -0
  58. helm/benchmark/run_specs/arabic_run_specs.py +140 -16
  59. helm/benchmark/run_specs/bluex_run_specs.py +1 -1
  60. helm/benchmark/run_specs/classic_run_specs.py +2 -2
  61. helm/benchmark/run_specs/long_context_run_specs.py +2 -2
  62. helm/benchmark/run_specs/medhelm/__init__.py +0 -0
  63. helm/benchmark/run_specs/medhelm/benchmark_config.py +219 -0
  64. helm/benchmark/run_specs/medhelm_run_specs.py +362 -52
  65. helm/benchmark/run_specs/speech_disorder_audio_run_specs.py +6 -2
  66. helm/benchmark/scenarios/aci_bench_scenario.py +23 -0
  67. helm/benchmark/scenarios/air_bench_scenario.py +21 -0
  68. helm/benchmark/scenarios/alrage_scenario.py +54 -0
  69. helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +23 -1
  70. helm/benchmark/scenarios/anthropic_red_team_scenario.py +12 -1
  71. helm/benchmark/scenarios/arabic_exams_scenario.py +114 -0
  72. helm/benchmark/scenarios/arabic_mmlu_scenario.py +8 -4
  73. helm/benchmark/scenarios/aratrust_scenario.py +19 -0
  74. helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification_scenario.py +24 -54
  75. helm/benchmark/scenarios/audio_language/ultra_suite_asr_transcription_scenario.py +19 -48
  76. helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py +22 -61
  77. helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py +21 -29
  78. helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py +21 -60
  79. helm/benchmark/scenarios/babi_qa_scenario.py +15 -0
  80. helm/benchmark/scenarios/banking77_scenario.py +21 -0
  81. helm/benchmark/scenarios/bbq_scenario.py +15 -0
  82. helm/benchmark/scenarios/best_chatgpt_prompts.yaml +473 -0
  83. helm/benchmark/scenarios/bird_sql_scenario.py +18 -0
  84. helm/benchmark/scenarios/bluex_scenario.py +6 -2
  85. helm/benchmark/scenarios/bold_scenario.py +15 -0
  86. helm/benchmark/scenarios/boolq_scenario.py +20 -0
  87. helm/benchmark/scenarios/chw_care_plan_scenario.py +23 -0
  88. helm/benchmark/scenarios/civil_comments_scenario.py +13 -0
  89. helm/benchmark/scenarios/clear_scenario.py +23 -0
  90. helm/benchmark/scenarios/cleva_scenario.py +479 -0
  91. helm/benchmark/scenarios/code_scenario.py +28 -0
  92. helm/benchmark/scenarios/commonsense_scenario.py +32 -0
  93. helm/benchmark/scenarios/compositional_instructions.yaml +70 -0
  94. helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +21 -0
  95. helm/benchmark/scenarios/copyright_scenario.py +35 -1
  96. helm/benchmark/scenarios/cti_to_mitre_scenario.py +21 -0
  97. helm/benchmark/scenarios/czech_bank_qa_scenario.py +18 -0
  98. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +22 -1
  99. helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +23 -1
  100. helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +22 -1
  101. helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +21 -1
  102. helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +13 -0
  103. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +13 -1
  104. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +13 -1
  105. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +13 -1
  106. helm/benchmark/scenarios/dischargeme_scenario.py +24 -0
  107. helm/benchmark/scenarios/disinformation_scenario.py +22 -0
  108. helm/benchmark/scenarios/dyck_language_scenario.py +15 -0
  109. helm/benchmark/scenarios/ehrshot_scenario.py +22 -0
  110. helm/benchmark/scenarios/enem_challenge_scenario.py +19 -0
  111. helm/benchmark/scenarios/entity_data_imputation_scenario.py +14 -0
  112. helm/benchmark/scenarios/entity_matching_scenario.py +14 -0
  113. helm/benchmark/scenarios/fin_qa_scenario.py +20 -0
  114. helm/benchmark/scenarios/financebench_scenario.py +21 -0
  115. helm/benchmark/scenarios/financial_phrasebank_scenario.py +21 -0
  116. helm/benchmark/scenarios/gold_commodity_news_scenario.py +21 -0
  117. helm/benchmark/scenarios/gpqa_scenario.py +18 -0
  118. helm/benchmark/scenarios/grammar_scenario.py +20 -1
  119. helm/benchmark/scenarios/gsm_scenario.py +21 -0
  120. helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +12 -1
  121. helm/benchmark/scenarios/harm_bench_scenario.py +12 -1
  122. helm/benchmark/scenarios/headqa_scenario.py +22 -0
  123. helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +13 -0
  124. helm/benchmark/scenarios/ice_scenario.py +21 -1
  125. helm/benchmark/scenarios/ifeval_scenario.py +18 -0
  126. helm/benchmark/scenarios/imdb_scenario.py +15 -0
  127. helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py +21 -0
  128. helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py +19 -0
  129. helm/benchmark/scenarios/koala_scenario.py +21 -1
  130. helm/benchmark/scenarios/kpi_edgar_scenario.py +21 -0
  131. helm/benchmark/scenarios/legal_contract_summarization_scenario.py +20 -0
  132. helm/benchmark/scenarios/legal_summarization_scenario.py +50 -0
  133. helm/benchmark/scenarios/legal_support_scenario.py +13 -0
  134. helm/benchmark/scenarios/legalbench_scenario.py +19 -0
  135. helm/benchmark/scenarios/lex_glue_scenario.py +11 -0
  136. helm/benchmark/scenarios/lextreme_scenario.py +11 -0
  137. helm/benchmark/scenarios/lsat_qa_scenario.py +14 -0
  138. helm/benchmark/scenarios/madinah_qa_scenario.py +73 -0
  139. helm/benchmark/scenarios/math_scenario.py +33 -0
  140. helm/benchmark/scenarios/mbzuai_human_translated_arabic_mmlu.py +68 -0
  141. helm/benchmark/scenarios/med_dialog_scenario.py +32 -1
  142. helm/benchmark/scenarios/med_mcqa_scenario.py +14 -0
  143. helm/benchmark/scenarios/med_qa_scenario.py +20 -0
  144. helm/benchmark/scenarios/medalign_scenario.py +23 -0
  145. helm/benchmark/scenarios/medbullets_scenario.py +22 -0
  146. helm/benchmark/scenarios/medcalc_bench_scenario.py +22 -0
  147. helm/benchmark/scenarios/medec_scenario.py +23 -0
  148. helm/benchmark/scenarios/medhallu_scenario.py +23 -0
  149. helm/benchmark/scenarios/medhelm/__init__.py +0 -0
  150. helm/benchmark/scenarios/medhelm/judges.yaml +14 -0
  151. helm/benchmark/scenarios/medhelm_configurable_scenario.py +101 -0
  152. helm/benchmark/scenarios/medi_qa_scenario.py +24 -1
  153. helm/benchmark/scenarios/medication_qa_scenario.py +31 -1
  154. helm/benchmark/scenarios/mental_health_scenario.py +23 -0
  155. helm/benchmark/scenarios/mimic_bhc_scenario.py +24 -0
  156. helm/benchmark/scenarios/mimic_rrs_scenario.py +23 -0
  157. helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +22 -0
  158. helm/benchmark/scenarios/mmlu_pro_scenario.py +18 -0
  159. helm/benchmark/scenarios/mmlu_scenario.py +21 -0
  160. helm/benchmark/scenarios/msmarco_scenario.py +30 -0
  161. helm/benchmark/scenarios/mtsamples_procedures_scenario.py +22 -0
  162. helm/benchmark/scenarios/mtsamples_replicate_scenario.py +22 -0
  163. helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +20 -0
  164. helm/benchmark/scenarios/narrativeqa_scenario.py +19 -0
  165. helm/benchmark/scenarios/natural_qa_scenario.py +32 -0
  166. helm/benchmark/scenarios/omni_math_scenario.py +18 -0
  167. helm/benchmark/scenarios/open_assistant_scenario.py +22 -0
  168. helm/benchmark/scenarios/openai_mrcr_scenario.py +15 -0
  169. helm/benchmark/scenarios/pubmed_qa_scenario.py +22 -0
  170. helm/benchmark/scenarios/quac_scenario.py +14 -0
  171. helm/benchmark/scenarios/race_based_med_scenario.py +23 -0
  172. helm/benchmark/scenarios/raft_scenario.py +15 -0
  173. helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +14 -1
  174. helm/benchmark/scenarios/ruler_qa_scenarios.py +40 -0
  175. helm/benchmark/scenarios/scenario.py +31 -0
  176. helm/benchmark/scenarios/seahelm_scenario.py +348 -0
  177. helm/benchmark/scenarios/self_instruct_scenario.py +29 -1
  178. helm/benchmark/scenarios/shc_bmt_scenario.py +22 -0
  179. helm/benchmark/scenarios/shc_cdi_scenario.py +20 -0
  180. helm/benchmark/scenarios/shc_conf_scenario.py +23 -0
  181. helm/benchmark/scenarios/shc_ent_scenario.py +21 -0
  182. helm/benchmark/scenarios/shc_gip_scenario.py +20 -0
  183. helm/benchmark/scenarios/shc_privacy_scenario.py +22 -0
  184. helm/benchmark/scenarios/shc_proxy_scenario.py +22 -0
  185. helm/benchmark/scenarios/shc_ptbm_scenario.py +23 -0
  186. helm/benchmark/scenarios/shc_sequoia_scenario.py +21 -0
  187. helm/benchmark/scenarios/simple_safety_tests_scenario.py +12 -1
  188. helm/benchmark/scenarios/situation_prompts.yaml +49 -0
  189. helm/benchmark/scenarios/spider_scenario.py +18 -0
  190. helm/benchmark/scenarios/starr_patient_instructions_scenario.py +22 -0
  191. helm/benchmark/scenarios/summarization_scenario.py +37 -0
  192. helm/benchmark/scenarios/synthetic_efficiency_scenario.py +22 -1
  193. helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +13 -0
  194. helm/benchmark/scenarios/test_alrage_scenario.py +23 -0
  195. helm/benchmark/scenarios/test_arabic_exams_scenario.py +21 -0
  196. helm/benchmark/scenarios/test_aratrust_scenario.py +1 -1
  197. helm/benchmark/scenarios/test_bluex_scenario.py +2 -2
  198. helm/benchmark/scenarios/thai_exam_scenario.py +95 -0
  199. helm/benchmark/scenarios/the_pile_scenario.py +13 -1
  200. helm/benchmark/scenarios/truthful_qa_scenario.py +14 -0
  201. helm/benchmark/scenarios/twitter_aae_scenario.py +20 -1
  202. helm/benchmark/scenarios/vicuna_scenario.py +21 -1
  203. helm/benchmark/scenarios/wikifact_scenario.py +20 -0
  204. helm/benchmark/scenarios/wildbench_scenario.py +18 -0
  205. helm/benchmark/scenarios/wmt_14_scenario.py +19 -0
  206. helm/benchmark/static/schema_arabic.yaml +55 -12
  207. helm/benchmark/static/schema_long_context.yaml +11 -30
  208. helm/benchmark/static/schema_medhelm.yaml +36 -0
  209. helm/benchmark/static/schema_slp.yaml +219 -0
  210. helm/benchmark/static_build/assets/audio-table-Dn5NMMeJ.png +0 -0
  211. helm/benchmark/static_build/assets/index-oIeiQW2g.css +1 -0
  212. helm/benchmark/static_build/assets/index-qOFpOyHb.js +10 -0
  213. helm/benchmark/static_build/assets/react-BteFIppM.js +85 -0
  214. helm/benchmark/static_build/assets/recharts-DxuQtTOs.js +97 -0
  215. helm/benchmark/static_build/assets/tremor-DR4fE7ko.js +10 -0
  216. helm/benchmark/static_build/index.html +5 -6
  217. helm/clients/ai21_client.py +2 -0
  218. helm/clients/aleph_alpha_client.py +2 -0
  219. helm/clients/anthropic_client.py +7 -1
  220. helm/clients/audio_language/diva_llama_client.py +2 -0
  221. helm/clients/audio_language/llama_omni/arguments.py +61 -0
  222. helm/clients/audio_language/llama_omni/constants.py +9 -0
  223. helm/clients/audio_language/llama_omni/conversation.py +213 -0
  224. helm/clients/audio_language/llama_omni/model/__init__.py +0 -0
  225. helm/clients/audio_language/llama_omni/model/builder.py +88 -0
  226. helm/clients/audio_language/llama_omni/model/language_model/omni_speech2s_llama.py +190 -0
  227. helm/clients/audio_language/llama_omni/model/language_model/omni_speech_llama.py +118 -0
  228. helm/clients/audio_language/llama_omni/model/omni_speech_arch.py +249 -0
  229. helm/clients/audio_language/llama_omni/model/speech_encoder/builder.py +9 -0
  230. helm/clients/audio_language/llama_omni/model/speech_encoder/speech_encoder.py +27 -0
  231. helm/clients/audio_language/llama_omni/model/speech_generator/builder.py +9 -0
  232. helm/clients/audio_language/llama_omni/model/speech_generator/generation.py +622 -0
  233. helm/clients/audio_language/llama_omni/model/speech_generator/speech_generator.py +104 -0
  234. helm/clients/audio_language/llama_omni/model/speech_projector/builder.py +9 -0
  235. helm/clients/audio_language/llama_omni/model/speech_projector/speech_projector.py +27 -0
  236. helm/clients/audio_language/llama_omni/preprocess.py +295 -0
  237. helm/clients/audio_language/llama_omni/utils.py +202 -0
  238. helm/clients/audio_language/llama_omni_client.py +2 -1
  239. helm/clients/audio_language/qwen2_5_omni_client.py +2 -1
  240. helm/clients/audio_language/qwen2_audiolm_client.py +2 -1
  241. helm/clients/audio_language/qwen_audiolm_client.py +2 -1
  242. helm/clients/audio_language/qwen_omni/configuration_qwen2_5_omni.py +519 -0
  243. helm/clients/audio_language/qwen_omni/modeling_qwen2_5_omni.py +4308 -0
  244. helm/clients/audio_language/qwen_omni/processing_qwen2_5_omni.py +270 -0
  245. helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/__init__.py +0 -0
  246. helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/__init__.py +8 -0
  247. helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/audio_process.py +56 -0
  248. helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/vision_process.py +380 -0
  249. helm/clients/bedrock_client.py +2 -0
  250. helm/clients/cohere_client.py +3 -0
  251. helm/clients/google_client.py +2 -0
  252. helm/clients/http_model_client.py +2 -0
  253. helm/clients/huggingface_client.py +2 -1
  254. helm/clients/ibm_client.py +3 -1
  255. helm/clients/image_generation/adobe_vision_client.py +2 -0
  256. helm/clients/image_generation/aleph_alpha_image_generation_client.py +2 -0
  257. helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +1 -1
  258. helm/clients/image_generation/cogview2_client.py +2 -1
  259. helm/clients/image_generation/dalle2_client.py +2 -0
  260. helm/clients/image_generation/dalle_mini_client.py +2 -1
  261. helm/clients/image_generation/deep_floyd_client.py +2 -0
  262. helm/clients/image_generation/huggingface_diffusers_client.py +2 -1
  263. helm/clients/image_generation/lexica_client.py +2 -0
  264. helm/clients/image_generation/mindalle/models/stage1/layers.py +2 -2
  265. helm/clients/image_generation/mindalle_client.py +2 -1
  266. helm/clients/image_generation/together_image_generation_client.py +2 -0
  267. helm/clients/megatron_client.py +2 -0
  268. helm/clients/mistral_client.py +2 -0
  269. helm/clients/moderation_api_client.py +2 -0
  270. helm/clients/openai_client.py +36 -20
  271. helm/clients/openai_responses_client.py +27 -3
  272. helm/clients/openrouter_client.py +31 -0
  273. helm/clients/palmyra_client.py +2 -1
  274. helm/clients/reka_client.py +2 -1
  275. helm/clients/stanfordhealthcare_azure_openai_client.py +2 -2
  276. helm/clients/stanfordhealthcare_http_model_client.py +2 -0
  277. helm/clients/test_openrouter_client.py +69 -0
  278. helm/clients/together_client.py +52 -11
  279. helm/clients/vertexai_client.py +12 -2
  280. helm/clients/vision_language/huggingface_vision2seq_client.py +2 -1
  281. helm/clients/vision_language/huggingface_vlm_client.py +2 -0
  282. helm/clients/vision_language/idefics_client.py +2 -1
  283. helm/clients/vision_language/open_flamingo_client.py +2 -1
  284. helm/clients/vision_language/paligemma_client.py +2 -1
  285. helm/clients/vision_language/palmyra_vision_client.py +2 -0
  286. helm/clients/vision_language/qwen2_vlm_client.py +2 -1
  287. helm/clients/vision_language/qwen_vlm_client.py +2 -1
  288. helm/clients/writer_client.py +2 -0
  289. helm/common/hierarchical_logger.py +20 -0
  290. helm/common/optional_dependencies.py +1 -1
  291. helm/common/test_general.py +4 -0
  292. helm/config/model_deployments.yaml +300 -1
  293. helm/config/model_metadata.yaml +302 -9
  294. helm/config/tokenizer_configs.yaml +92 -4
  295. helm/proxy/example_queries.py +8 -8
  296. helm/proxy/server.py +2 -1
  297. helm/proxy/static/index.css +4 -0
  298. helm/proxy/static/index.js +7 -1
  299. helm/benchmark/metrics/aci_bench_metrics.py +0 -14
  300. helm/benchmark/metrics/chw_care_plan_metrics.py +0 -14
  301. helm/benchmark/metrics/dischargeme_metrics.py +0 -14
  302. helm/benchmark/metrics/med_dialog_metrics.py +0 -14
  303. helm/benchmark/metrics/medalign_metrics.py +0 -14
  304. helm/benchmark/metrics/medi_qa_metrics.py +0 -14
  305. helm/benchmark/metrics/medication_qa_metrics.py +0 -14
  306. helm/benchmark/metrics/mental_health_metrics.py +0 -14
  307. helm/benchmark/metrics/mimic_bhc_metrics.py +0 -14
  308. helm/benchmark/metrics/mimic_rrs_metrics.py +0 -14
  309. helm/benchmark/metrics/mtsamples_procedures_metrics.py +0 -14
  310. helm/benchmark/metrics/mtsamples_replicate_metrics.py +0 -14
  311. helm/benchmark/metrics/starr_patient_instructions_metrics.py +0 -14
  312. helm/benchmark/static_build/assets/index-b9779128.css +0 -1
  313. helm/benchmark/static_build/assets/index-e439d5e1.js +0 -10
  314. helm/benchmark/static_build/assets/react-f82877fd.js +0 -85
  315. helm/benchmark/static_build/assets/recharts-4037aff0.js +0 -97
  316. helm/benchmark/static_build/assets/tremor-38a10867.js +0 -10
  317. {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/WHEEL +0 -0
  318. {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/entry_points.txt +0 -0
  319. {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/licenses/LICENSE +0 -0
  320. {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/top_level.txt +0 -0
  321. /helm/benchmark/static_build/assets/{air-overview-d2e6c49f.png → air-overview-DpBbyagA.png} +0 -0
  322. /helm/benchmark/static_build/assets/{crfm-logo-74391ab8.png → crfm-logo-Du4T1uWZ.png} +0 -0
  323. /helm/benchmark/static_build/assets/{heim-logo-3e5e3aa4.png → heim-logo-BJtQlEbV.png} +0 -0
  324. /helm/benchmark/static_build/assets/{helm-logo-simple-2ed5400b.png → helm-logo-simple-DzOhNN41.png} +0 -0
  325. /helm/benchmark/static_build/assets/{helm-safety-2907a7b6.png → helm-safety-COfndXuS.png} +0 -0
  326. /helm/benchmark/static_build/assets/{helmhero-28e90f4d.png → helmhero-D9TvmJsp.png} +0 -0
  327. /helm/benchmark/static_build/assets/{medhelm-overview-eac29843.png → medhelm-overview-CND0EIsy.png} +0 -0
  328. /helm/benchmark/static_build/assets/{medhelm-v1-overview-3ddfcd65.png → medhelm-v1-overview-Cu2tphBB.png} +0 -0
  329. /helm/benchmark/static_build/assets/{overview-74aea3d8.png → overview-BwypNWnk.png} +0 -0
  330. /helm/benchmark/static_build/assets/{process-flow-bd2eba96.png → process-flow-DWDJC733.png} +0 -0
  331. /helm/benchmark/static_build/assets/{vhelm-aspects-1437d673.png → vhelm-aspects-NiDQofvP.png} +0 -0
  332. /helm/benchmark/static_build/assets/{vhelm-framework-a1ca3f3f.png → vhelm-framework-NxJE4fdA.png} +0 -0
  333. /helm/benchmark/static_build/assets/{vhelm-model-8afb7616.png → vhelm-model-ypCL5Yvq.png} +0 -0
@@ -1,4 +1,4 @@
1
- crfm_helm-0.5.7.dist-info/licenses/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
1
+ crfm_helm-0.5.9.dist-info/licenses/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
2
2
  helm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  helm/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  helm/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -12,8 +12,8 @@ helm/benchmark/model_metadata_registry.py,sha256=7XisV0an_edM8hvP8LSoCnTeUN2QLJr
12
12
  helm/benchmark/multi_gpu_runner.py,sha256=WmTKpVfcKXyiiPzrmxpbvQoZy0Ua8IyPgxB8r_3jrRw,4773
13
13
  helm/benchmark/reeval_run.py,sha256=vImL8JNhveEOftZbRQ6JAxF0L-XCKIwh65M6fIYo4RU,7198
14
14
  helm/benchmark/reeval_runner.py,sha256=bJPl7XVOVwK2fUA7voOVQYwVFEOfKVnrT2tbSGQzQY8,15584
15
- helm/benchmark/run.py,sha256=ZyqkKnqkMqM2AH4HL6sH72H8-mrDWu0NW0piE7BY0HM,13973
16
- helm/benchmark/run_expander.py,sha256=hKFLpmq8W2KBl_mBf-ahHEbt67qZFgu-VxjvidOeQuE,56543
15
+ helm/benchmark/run.py,sha256=n6FYrAOkdCLVzPK-HqNx0MWkdqazviUVqMBAVcpSUUk,14004
16
+ helm/benchmark/run_expander.py,sha256=IMPhg16Yd3diaFRLGYcLCXGO4L_B2WXW69oZP0fx6lE,56857
17
17
  helm/benchmark/run_spec.py,sha256=GiIU8iGO2FGYFDWIxt51CeNPsW7rM7BzDqH1KgEL1cg,3217
18
18
  helm/benchmark/run_spec_factory.py,sha256=Hxeft3fXoWNz9yGo-2nIfb5pd3GDWlwYWc6YYvAkTjM,7785
19
19
  helm/benchmark/runner.py,sha256=O-91eRRrNgE4_tlCVeLq9_0QsRfNELvaQT-KWtJw894,14618
@@ -25,7 +25,7 @@ helm/benchmark/test_data_preprocessor.py,sha256=_esdtkqyU_8Yp5ZOO7n1b-Y4Qc28wpD5
25
25
  helm/benchmark/test_run_expander.py,sha256=gLeHkNt_nLgbwEJiYxhwda-eKA3sJAxkYolCvgRN5TY,1163
26
26
  helm/benchmark/tokenizer_config_registry.py,sha256=ZOImg38ta0FXZYAWna6q7A5xrG2mU7Ofr-8j4EqGlUY,1585
27
27
  helm/benchmark/adaptation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
- helm/benchmark/adaptation/adapter_spec.py,sha256=WrDOvQoeV5Ciw2bmvtnz6HTCAEfjCHTYgfTZwRZzkN8,5680
28
+ helm/benchmark/adaptation/adapter_spec.py,sha256=ONFbOdQiVbffP48_VonC3QgkwtJVObfG7j4wsCNGiJQ,6407
29
29
  helm/benchmark/adaptation/common_adapter_specs.py,sha256=V8aYhQYuwohzwW0T_IU_ymGlxEwARKIiChLvwLKt-ew,12553
30
30
  helm/benchmark/adaptation/prompt.py,sha256=vPCFeKVUwpbnTe0IbphkyAKFkkM0YnEONfvjcb8Hj50,2158
31
31
  helm/benchmark/adaptation/request_state.py,sha256=WAPyubn35on-Ry7xKpXsVz3wYBMCMc_LidDOdcKxatI,3053
@@ -40,7 +40,7 @@ helm/benchmark/adaptation/adapters/generation_adapter.py,sha256=LI7uWpKIHvTUjGiy
40
40
  helm/benchmark/adaptation/adapters/in_context_learning_adapter.py,sha256=8LepCkI5b0MOL70pRPGb7vEH0KFMxIlpCQIVIzQT_vE,15030
41
41
  helm/benchmark/adaptation/adapters/language_modeling_adapter.py,sha256=u_GFEgg5wmpate-s5U5aMsmcHuFmreJcA8J0TO1kPCc,14907
42
42
  helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py,sha256=-fY4mvzoGCCoR0HesT_xf2U2m2arVjgDuj59lm07_tg,1923
43
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=lzmHwvDOHWl9IWC3NTLGfJDbduXtK_zrS2_YoUQmdc8,4464
43
+ helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=nOCuX9lFKb3BHpznhTwpNCO0YsZBNhcMYuFnsLT_u-s,4579
44
44
  helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py,sha256=RV6B3i5juBbJCtPDWzSfma49YXeDq3vQAQ5xQwnH-cA,3282
45
45
  helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py,sha256=hhH9ehK092j1WdUwrKYSy5PvNJ73gsIu6-5W8aLoYVI,2190
46
46
  helm/benchmark/adaptation/adapters/test_adapter.py,sha256=7Nr6kMK3JN0UjMjjZ6P1fsD5xhOeaqh0D1xI6LFKCos,641
@@ -51,12 +51,13 @@ helm/benchmark/adaptation/adapters/multimodal/__init__.py,sha256=47DEQpj8HBSa-_T
51
51
  helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py,sha256=MvE7YdIt8Y0nefXLskY9gPmXp7QWi2b8cqg8fxUpzbM,1980
52
52
  helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py,sha256=KXP9MzDdmUao3uVjPgZYKjZQ_LvGHgZvI-86o3E87xA,6404
53
53
  helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py,sha256=jyL61UxBsIr68hUz-jtjBUnyB2HBp5ESNyECGp_Gf6Q,2129
54
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py,sha256=ftwSOTPugDuw8vh2WaQDJb0tQAeWR7S7qtD4yE_nOt4,4804
54
+ helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py,sha256=GP2Fg1kW0-5jCkjgzVkhuN7YBQFyFgQpPTfpSgfbAvk,5178
55
55
  helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py,sha256=mjjyn9p31V-yt6S8BX7SvqvkQ56D9cKSff6d-daM6HM,10250
56
56
  helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py,sha256=6nuz0Vn89A1mOedutsiq2SwTOG3qn8dUZTiaXhKffiw,3587
57
57
  helm/benchmark/annotation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
- helm/benchmark/annotation/aci_bench_annotator.py,sha256=SjXidlbpm5HOhdhNXg3HjabMEQvt3hq1iJ5GPajxt8M,3228
58
+ helm/benchmark/annotation/aci_bench_annotator.py,sha256=aAzXqbjj_3bv0-ATCrFu4JvrsqORE5lkYpgxtXAEGSA,2777
59
59
  helm/benchmark/annotation/air_bench_annotator.py,sha256=CDyHVwD4eoymfLduJC5WvvhDX1DOgYBqgjvqBjoCfU8,3501
60
+ helm/benchmark/annotation/alrage_annotator.py,sha256=3DcHbD8WXTg5PN3feipHTsFls0v5owMyb_rqpNWokls,3531
60
61
  helm/benchmark/annotation/annotator.py,sha256=__BkMVpAEpSs1pbwPK5sVWLdCAXnjsHcPYgmOqmNPu0,1843
61
62
  helm/benchmark/annotation/annotator_factory.py,sha256=8uo5uz1UpIVCHUd7CRvmy6b9XB1gspdHmgxH5UZMPVI,2335
62
63
  helm/benchmark/annotation/anthropic_red_team_annotator.py,sha256=4hob15m2k9e2A97E0aG9FstCbJ_oMM7-9y-nh2EaYqc,2395
@@ -65,28 +66,28 @@ helm/benchmark/annotation/autobencher_safety_annotator.py,sha256=w_xjZmY1zuLjVvV
65
66
  helm/benchmark/annotation/bigcodebench_annotator.py,sha256=CJG2pn1DeHJCp3yHETRquNIkCHfd6ZNuOiUjG1cQ_JY,4448
66
67
  helm/benchmark/annotation/bird_sql_annotator.py,sha256=FQDZs1-O1jfJOET0eDeU7lf5xLaiMPohC5BdmQ4XkzI,2436
67
68
  helm/benchmark/annotation/call_center_annotator.py,sha256=pTEjwfA4tgZhroFbamoQ8IO_D1O9r6k5GIlD50JEg5c,11601
68
- helm/benchmark/annotation/chw_care_plan_annotator.py,sha256=6ybNBvJi59i0cpAhI_fLwXoSnqhAH6m7Lo6ad_PufBs,2966
69
+ helm/benchmark/annotation/chw_care_plan_annotator.py,sha256=R6Hexh20T6WBBRBhwLhQv_IQvW7Z55Pf9IYBCWxUTaQ,2517
69
70
  helm/benchmark/annotation/czech_bank_qa_annotator.py,sha256=YIH5g4zHe3BQF2Y-6uRVw7g9u_SPBncqBobdvZdIzyA,3096
70
- helm/benchmark/annotation/dischargeme_annotator.py,sha256=Z6xnUK1cNrFco9x0w8B_qhlLOEZrzXBwT6TKZPKoPBk,3676
71
+ helm/benchmark/annotation/dischargeme_annotator.py,sha256=blP76BgwmbHDDDRdaaGwtTHfukCvXXLN72vjGj_LI_U,3225
71
72
  helm/benchmark/annotation/ehr_sql_annotator.py,sha256=Izpq0biZ9lkJOPk6NwTuv2wk8Bg88vj56BKZrY8XhT4,4021
72
73
  helm/benchmark/annotation/financebench_annotator.py,sha256=gNERLY35t2kcpayXGGrY4-pBs2jbEUomqElRYbb9nho,4150
73
74
  helm/benchmark/annotation/harm_bench_annotator.py,sha256=zhkWnV3qZgY-nvHgQRHGrrCMC7605JwFHesY7UC3ZnQ,2293
74
75
  helm/benchmark/annotation/helpdesk_call_summarization_annotator.py,sha256=I7TjpN502Sa-Z4uUKemJXSAdOiVA3MMO92YIAAXeDBg,6034
75
76
  helm/benchmark/annotation/live_qa_annotator.py,sha256=PSff59mU_t3ypmptYsYRKU3m1vMLF0dMyUySIOxBrPw,3553
76
- helm/benchmark/annotation/med_dialog_annotator.py,sha256=OVTFIlvdhcOr_hdK0tnrDes9hYdN1mDWFTp4GDYY7O0,3162
77
- helm/benchmark/annotation/medalign_annotator.py,sha256=8edAZh8oQgDKUT1bQ3Hp2NBE-QnBZ_-ZQjHkV7YKWhs,3240
78
- helm/benchmark/annotation/medi_qa_annotator.py,sha256=v8e6hkHZX1x9KtTedCnpCseh-Y72z5kUgUrXHWPUkX8,3074
79
- helm/benchmark/annotation/medication_qa_annotator.py,sha256=uZ3VpJ0nsDyF70_kn8kSSBPr4OlfiNdZC7q8wq_jJFE,3090
80
- helm/benchmark/annotation/mental_health_annotator.py,sha256=JwgSeXtwf4KFZxNtAxsnqdLJQSvP-F-ZoCcCWdasrMQ,3275
81
- helm/benchmark/annotation/mimic_bhc_annotator.py,sha256=pwwniNlu5VTa1ZdyO0KFcMFZcpqM5CjguujgSpEGslw,3174
82
- helm/benchmark/annotation/mimic_rrs_annotator.py,sha256=zABO1FJH9pOFhUe5vc2B-c14Hf5RsuU9jQAGiMg6G0I,3204
83
- helm/benchmark/annotation/model_as_judge.py,sha256=FIJOUzIhf2QpxqFf6hjgAM5hPEm0VlXzB-jiHJUrPDs,11985
84
- helm/benchmark/annotation/mtsamples_procedures_annotator.py,sha256=qqWHY2HfCwMP5GqvObS3JpMIYVs4yyITCsA1B7lcDks,3201
85
- helm/benchmark/annotation/mtsamples_replicate_annotator.py,sha256=TUxNzJcItErsw0gw76hiKZAWeQTNHGHnC0qf-_CGeF0,3316
77
+ helm/benchmark/annotation/med_dialog_annotator.py,sha256=uGp8d74WGgOOiexpoKj5CMdr5jOvAnfe-ZLKGSHT6ng,2711
78
+ helm/benchmark/annotation/medalign_annotator.py,sha256=glAPpVdIfebm39GhrBY3BE2hdofVBIBXUxPU3_qqZOw,2789
79
+ helm/benchmark/annotation/medi_qa_annotator.py,sha256=bLXxXe-obPvud15sPrqp9i-wSq1QqguCPt_UJaXRz_I,2623
80
+ helm/benchmark/annotation/medication_qa_annotator.py,sha256=98XU2VVSoQ8XlAkuVKWnNBOS76X_lIviq_A-nyrlqcw,2639
81
+ helm/benchmark/annotation/mental_health_annotator.py,sha256=08b_XqgfSpIhutDUaaSgVRdiZB6metAQQ_WHF8U2-c0,2824
82
+ helm/benchmark/annotation/mimic_bhc_annotator.py,sha256=a9AHMFY2shV4I2qVUfKnOvZFbmQjL5vPKsbytTBfU0A,2723
83
+ helm/benchmark/annotation/mimic_rrs_annotator.py,sha256=eu9rZhRAXVbo0j7BP7vuAKwGkuwhTCvVRvJ4dPbcR4I,2753
84
+ helm/benchmark/annotation/model_as_judge.py,sha256=eZZlyCrW6U9a8bHhaPrbV1AJ23q3uP0ho1NbVErGBXs,12160
85
+ helm/benchmark/annotation/mtsamples_procedures_annotator.py,sha256=ZgJVtNpab3BrMs0ZXFW6L0CNp1Hcqfgv7FHP4rpxFPg,2750
86
+ helm/benchmark/annotation/mtsamples_replicate_annotator.py,sha256=VtHiEGFZLUsd3zkgnSoti5itZnDPgERMPZlORkEp7ok,2865
86
87
  helm/benchmark/annotation/omni_math_annotator.py,sha256=PvZZb1oGw60qT-oHRIs93AZbh5wTbpsmD8BforudFhA,6144
87
88
  helm/benchmark/annotation/simple_safety_tests_annotator.py,sha256=if4S8MaENr1HZ42ZsOjDPXZ-kJ0p4l4B2j9m994RuxQ,2140
88
89
  helm/benchmark/annotation/spider_annotator.py,sha256=B48ylGg5J7xuTSUio7VztdXk3lI6ilMqrUvAD-ve0sE,621
89
- helm/benchmark/annotation/starr_patient_instructions_annotator.py,sha256=5jU-dK_0OvB_jXNLDZtQ5E3gaSUcAxFNzv6prA17eAg,3186
90
+ helm/benchmark/annotation/starr_patient_instructions_annotator.py,sha256=Te9rQhcUV-T2I4oBCBzInAZW65EV3lv0LXLPgGzLd8c,2735
90
91
  helm/benchmark/annotation/test_annotator_factory.py,sha256=ifv5hxSbFe113AHeXLqTPkVJ-C2PW_gb9L3a0SHNi-M,986
91
92
  helm/benchmark/annotation/test_dummy_annotator.py,sha256=LfY1ErJDUJ7rD8JUy92RUDD1b91jUs4Nk8Gvope-Z98,1644
92
93
  helm/benchmark/annotation/wildbench_annotator.py,sha256=OXR59zdKw9W7v3Q_sFnt1cEPN3nOzQDVqSbh4jDbEUs,5457
@@ -126,17 +127,16 @@ helm/benchmark/efficiency_data/inference_denoised_runtimes.json,sha256=ios_dt-_8
126
127
  helm/benchmark/efficiency_data/inference_idealized_runtimes.json,sha256=5w7reeZc0yc4cjH8kJGxQQSoe8yaRVX2SSlSrx0QWFQ,12348
127
128
  helm/benchmark/efficiency_data/training_efficiency.json,sha256=aH2moiBLStOLVi8Ci2KTK5ZkWlTBLK-B3fRfNZwhoSg,9763
128
129
  helm/benchmark/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
129
- helm/benchmark/metrics/aci_bench_metrics.py,sha256=fAuTm8Sr1vvyd7Tjcz9WWKrFkqrwCV-CiF6lqUO3dKU,442
130
- helm/benchmark/metrics/air_bench_metrics.py,sha256=VMNQDDEtz2CiK4U55lCHLz0b_DxHprTAZ1WtYtGXjcY,2282
130
+ helm/benchmark/metrics/air_bench_metrics.py,sha256=WvfjjHLSE567Y7BC8tGlMINBwP-d1URRUZcMUF1yf1g,171277
131
+ helm/benchmark/metrics/alrage_metric.py,sha256=4QHtL00aEIRYQx2QkDs5uldu7ZAkbFYMALH6DL9LSJg,1233
131
132
  helm/benchmark/metrics/annotation_metrics.py,sha256=JbXNleQsPJVF2uc1xXgUW2bzvJqwLPZyhnndqc6THv0,4268
132
- helm/benchmark/metrics/basic_metrics.py,sha256=d0iwYnwrbF7w7CFtazx8vPIsZnj51U2PVVoscCb-HJA,20495
133
- helm/benchmark/metrics/bbq_metrics.py,sha256=GeZhSSJzqGD0e5EAiRHitIC3XtPICF7rDI6GfeYQc8E,6201
133
+ helm/benchmark/metrics/basic_metrics.py,sha256=3y1M0mFJL8FlkMkQWWs4ZV2NiriaMGydddbeY3F-vXk,30547
134
+ helm/benchmark/metrics/bbq_metrics.py,sha256=oHd4U6Q5sv2h0UtVnAJ2_cf32XiISWaDvc-2y0fU-gk,6574
134
135
  helm/benchmark/metrics/bias_metrics.py,sha256=8qcInRJwQsuCI-lMC1umd-ZZaYvorUPrMjnuC6vSeb4,11602
135
136
  helm/benchmark/metrics/bias_word_lists.py,sha256=eyk6we2J4SW8ZaZxQUWLB7Yapn92uM5TCekhFB5vg-U,13908
136
137
  helm/benchmark/metrics/bigcodebench_metrics.py,sha256=JcPZrSiHR-kxT-MFM8zXqOs6wTC5Hus3TbxuHFQVZow,860
137
138
  helm/benchmark/metrics/bird_sql_metrics.py,sha256=ooCuXW5nPpRs_-4seCONQmn25DzTbcUgGXznXTK9y0Y,1153
138
- helm/benchmark/metrics/chw_care_plan_metrics.py,sha256=WOAdwuF4vusZhjaXSAB3r7PD_ZxeNmVu2oAmOqzVLtU,460
139
- helm/benchmark/metrics/classification_metrics.py,sha256=1Xa_bO4PqIAV2iZitE69kc4VKS4A7PloG5ElZAgvmh8,8851
139
+ helm/benchmark/metrics/classification_metrics.py,sha256=CfkyMiiWo74VbIB7eEhNxIcPbGA_imbzETrAExqn5WM,9498
140
140
  helm/benchmark/metrics/cleva_accuracy_metrics.py,sha256=1eDxHxVk-JW1mF9SBcuplIefAoi_edUwKpp-XxYbmeU,2740
141
141
  helm/benchmark/metrics/cleva_harms_metrics.py,sha256=xVubv2pG3iinVs3namoVHWAmV9oUPywZwFB_0JGhP_w,11277
142
142
  helm/benchmark/metrics/cleva_metrics_helper.py,sha256=8UwiGhekUmp7DxYWU4rxqX2v3ewkg-O5-jOh49iOGmc,304
@@ -149,57 +149,47 @@ helm/benchmark/metrics/codeinsights_edge_case_metrics.py,sha256=B7EEELwwH67Vxmgr
149
149
  helm/benchmark/metrics/codeinsights_metric_specs.py,sha256=BkKWII9yTkChdZVsGeeeCbiWQDYvvcAKo0nxi_RTTUk,1798
150
150
  helm/benchmark/metrics/comet_metric.py,sha256=EJWZ9x8CGeDDQlfxYrY-np_NVJBt5gun0XLJvtpjXVI,4798
151
151
  helm/benchmark/metrics/common_metric_specs.py,sha256=JKqmO4ovBdfOYKC-00OSzOMv--g9NTCVfUHLaz-1Uns,6025
152
- helm/benchmark/metrics/conv_fin_qa_calc_metrics.py,sha256=Zrf6HyH_WNe7gGFgW0j8FJlX5KZvbk-05iX8QFPJDyU,2656
152
+ helm/benchmark/metrics/conv_fin_qa_calc_metrics.py,sha256=F2bfg8XbjH3WOQ0O_c5S7UUxgpzu7AD5wRtNdNcJlUs,2997
153
153
  helm/benchmark/metrics/copyright_metrics.py,sha256=RYOWKFN97UCD2Vj51gzKGbnnY9wAq6KJgiRt2cecVfs,7824
154
154
  helm/benchmark/metrics/czech_bank_qa_metrics.py,sha256=bKoooK2T5v_fFKNbUnsuW6Mv9muAirJD5lTrzuHfpz8,1113
155
155
  helm/benchmark/metrics/decodingtrust_fairness_metrics.py,sha256=x66XP0iQGk4ThT7ddmrlLCA0XF4arRbQMDT42LHf2kE,3297
156
156
  helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py,sha256=TxTkkWdx6d6ym0MirZTiucl_TWFdn4uJLnlTfLjQvgk,2925
157
157
  helm/benchmark/metrics/decodingtrust_privacy_metrics.py,sha256=OU7lka-hm6PubR5Gjj4uNyrqhjlfhe0mmjBCAz9vlRs,3456
158
158
  helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py,sha256=bW4zafRyKFa__8fGrdiTPUu848ovNnvakLCfqcMrcHk,6461
159
- helm/benchmark/metrics/dischargeme_metrics.py,sha256=D8LI52E17hNSPDpEvb2tw1za4QWDE3p9xgx7Nm9l7_Y,454
160
159
  helm/benchmark/metrics/disinformation_metrics.py,sha256=5n8wgRBb6FaDjqe1nR3Cj9aS48esmMsIUq4KpBHoQoU,7870
161
- helm/benchmark/metrics/dry_run_metrics.py,sha256=Ss0lzf944HIbL1CX6QuJpGFPqOzhBT0qVWLNR1BoEjk,3784
162
- helm/benchmark/metrics/efficiency_metrics.py,sha256=SJqpA1d_GfBPl9H6moai8ra1GVe7tlaCfg3PeiWT54c,11845
163
- helm/benchmark/metrics/ehr_sql_metrics.py,sha256=YRjvPIty7zlyoyGD6wo3HYOz7y_PThySOZzVRJ38iww,4797
160
+ helm/benchmark/metrics/dry_run_metrics.py,sha256=ouS6_8lESuCGSQgegN4xKKyoGr7Rb1K-dufHPT1fDwc,4886
161
+ helm/benchmark/metrics/efficiency_metrics.py,sha256=VnM5PgxxK6UKk9MzPprnN_7d-t6xVlIgFMQYrFh8dwY,15262
162
+ helm/benchmark/metrics/ehr_sql_metrics.py,sha256=yyz-2tsk4Fu6D5ELp3cbLaAWGjqtDGrUdvFvgHvxevg,7418
164
163
  helm/benchmark/metrics/evaluate_instances_metric.py,sha256=LGk1Dv_76Ak0YUlWKFTsOLEFiBSmcGVhNrbj_4zg9g4,2913
165
- helm/benchmark/metrics/evaluate_reference_metrics.py,sha256=T3pftPfYEUR88NEZEZuzYOTNoHELo7nSbz4qmxN8oQc,19628
164
+ helm/benchmark/metrics/evaluate_reference_metrics.py,sha256=kVYKCFX6LiG8ucA12Ib3RAkDd1kFaSONEtvgIatvIrE,31884
166
165
  helm/benchmark/metrics/fin_qa_metrics.py,sha256=MtXxGMGYiCiwCD1CclBXPopzly-Tz3zJTrXJaHYTXn4,2470
167
166
  helm/benchmark/metrics/fin_qa_metrics_helper.py,sha256=sH5FIpsxxGUkXO21YGS2EtVsev1EdQ44lYoqFZPSSGo,11884
168
- helm/benchmark/metrics/gpqa_chain_of_thought_metric.py,sha256=HRRKkcTbCu5ScOVwmjzYaA7UAEGE_AJUZVOCDRuv4Po,4321
167
+ helm/benchmark/metrics/gpqa_chain_of_thought_metric.py,sha256=Lkil9DRtO3NS3zr5Ef_qqGxZBL-ObCNpbKoJvMhCrb8,4762
169
168
  helm/benchmark/metrics/gpt4_audio_critique_metrics.py,sha256=L9tGFwvl1-Ew3MdInQ7KPa8OlI5YexIB2KuCYVYsuPY,7023
170
169
  helm/benchmark/metrics/gpt4_audio_refusal_metrics.py,sha256=vYPRJq-4uNhUWUWMrDkpHmfIBkhEyAgaMNEI6RKPP80,5896
171
170
  helm/benchmark/metrics/gpt4v_originality_critique_metrics.py,sha256=1m7IWy9vu66svnmdBRjZQI-2YsGYzH2vXZMptlRGM0Y,5654
172
- helm/benchmark/metrics/helpdesk_call_summarization_metrics.py,sha256=9-kB3NeBacI6nxs2oQ7Km_1SHyiz98UVZuR8PAlvCHM,1442
173
- helm/benchmark/metrics/ifeval_metrics.py,sha256=4_Vp9bNnrctKtv6xZ1RpvBstPAZPwv1xiohH-ogs99U,2565
174
- helm/benchmark/metrics/instruction_following_critique_metrics.py,sha256=RR9cMIG113oXUnBjU_denn7DaCGB11k1oGtQ5dQON3M,9874
175
- helm/benchmark/metrics/kpi_edgar_metrics.py,sha256=1GsW-nBz8TgP4wFIVEGA4_BhI17kihmk96zuLpD4NZc,4636
176
- helm/benchmark/metrics/language_modeling_metrics.py,sha256=yS7k8iFjxfkckSBA0RVA7VdOivSEBtNzCjczK6We7y0,4598
177
- helm/benchmark/metrics/live_qa_metrics.py,sha256=f2XFmQaohjQNqYqNg8NcDVavCzyP4cd8Cl8rLArn9EM,816
178
- helm/benchmark/metrics/llm_jury_metrics.py,sha256=yzAsdacyX0MFJy2qKIjhI0y7JvtflELpCh6R14wuCgk,1704
171
+ helm/benchmark/metrics/helpdesk_call_summarization_metrics.py,sha256=5Z43F9ZI9OHBxeZENBGSE4fB1YTo1NKOquPt_Sw-F5s,1835
172
+ helm/benchmark/metrics/ifeval_metrics.py,sha256=33IqTVdYlX9ZI6sR-FfFAKbVJ9tAGDNqZpLHS5yInio,3036
173
+ helm/benchmark/metrics/instruction_following_critique_metrics.py,sha256=AK_ZpayimVZ9MxX8CJG-K1uPKo2j1dNJ_H9uSz1CWiY,11612
174
+ helm/benchmark/metrics/kpi_edgar_metrics.py,sha256=rnvVlvFgWwaavaIu9n8iVlODhkk2g3liOiK7kwfGbN8,5474
175
+ helm/benchmark/metrics/language_modeling_metrics.py,sha256=NK8vYLFyFAidDG8UXVkP242zbg_6W6EZ4xZPNbokGlw,5001
176
+ helm/benchmark/metrics/live_qa_metrics.py,sha256=YGodrQ-b9ucQTK3ICKXRla5r26RR0wxC4iPOTcYrV1k,1195
177
+ helm/benchmark/metrics/llm_jury_metrics.py,sha256=-5w8tFG4JE0cMcH3KS7xQ1z6mbdtDf7reCMz6u5vtag,2158
179
178
  helm/benchmark/metrics/lmkt_metric_specs.py,sha256=0Fa0xLjQDXwsRCE5VqGzEfb5ZdzKsDoSCwR_zHogFcc,376
180
179
  helm/benchmark/metrics/lmkt_metrics.py,sha256=GaZTfl-NQXa1YSzcJUGlZ5wZURH1CnJxGkPFBj8ydTQ,1856
181
180
  helm/benchmark/metrics/machine_translation_metrics.py,sha256=22vaGBCSw12uM1wmtDG-MBBZW8OiTZwNPaerjckdtDE,3860
182
- helm/benchmark/metrics/med_dialog_metrics.py,sha256=kzmrkQcmJ15zuOF9_Onk9N0oeNeyl9Rri1JEb1AqRT4,447
183
- helm/benchmark/metrics/medalign_metrics.py,sha256=q6l8p5Pie-H9pxhaA-lQkSOnliJWXr6zUeN8syEQ91Q,439
184
- helm/benchmark/metrics/medcalc_bench_metrics.py,sha256=9wZgg20-9QBNk0_XhuwR3LT940fqDPkCM4Kl0dPkbAs,5353
185
- helm/benchmark/metrics/medec_metrics.py,sha256=hNBOGX52G_QOmgTCp9LnIMrmGSRxbb5vgjxKU069TMQ,4152
186
- helm/benchmark/metrics/medi_qa_metrics.py,sha256=JWAEMuT0UXDZrb7qHn13W6W79ilbprk492V_9vWrB4s,432
187
- helm/benchmark/metrics/medication_qa_metrics.py,sha256=wit3nKNWpGFfgauu6Xye2IDTePAS0VHAQI_7OO9HR6M,462
181
+ helm/benchmark/metrics/medcalc_bench_metrics.py,sha256=2viECYEj8y65_w5MPH295Z1OgLTNrgP_iMzzYSgc2hQ,5895
182
+ helm/benchmark/metrics/medec_metrics.py,sha256=5z3HKZCEuQsOix-22PPzTHhWlYmjyHOAVFV-bgGUVJE,5137
188
183
  helm/benchmark/metrics/melt_bias_metric.py,sha256=mHDCkRGLD-0pyJA_depi_KX3sn7g7Bgd3_m0XdLQahY,11520
189
184
  helm/benchmark/metrics/melt_bias_word_lists.py,sha256=xA0araUdszAIOqfxiTi6MIJhKYwr_Gwsc1L9qinZx9U,27891
190
185
  helm/benchmark/metrics/melt_metric_specs.py,sha256=zaeV57LQEl8qK7be36NaojiUJlzmkoKY8JyOkOVuPqs,1619
191
186
  helm/benchmark/metrics/melt_toxicity_metric.py,sha256=ni6bb_QC51NM5jQpbFYLWtsQy3tNOLwQ_5b3PDV5vVk,4193
192
- helm/benchmark/metrics/mental_health_metrics.py,sha256=4HXCXl2GxFPn6wDzHptHeBTuP4BJVLUzEUKffpd5R_k,462
193
- helm/benchmark/metrics/metric.py,sha256=jqQyiKDq_pQv-ulGqfZI56ydRDQs3N3XhfHIPysUhrk,14311
187
+ helm/benchmark/metrics/metric.py,sha256=gF7KlWPoPIGUvbvqDeXagBNBZnl8rclh8JfgCPvuXvs,15065
194
188
  helm/benchmark/metrics/metric_name.py,sha256=POhgmUqqIWh_LjCbYpiKkzGqqChBLeW3FADy9u_FcWw,1354
195
189
  helm/benchmark/metrics/metric_service.py,sha256=bJaM7GisEgSWR3vPTcg7b67XF9X2K5viODacIgbGb24,1692
196
- helm/benchmark/metrics/mimic_bhc_metrics.py,sha256=da1YYrE8fL3YHeIJ9hf4WCKZtuj_8cksm3rJ24rcy70,442
197
- helm/benchmark/metrics/mimic_rrs_metrics.py,sha256=x3vSj1VG1UkNF3gbgJYDeA4z-crxfGIkK7iZo0xjq8c,442
198
- helm/benchmark/metrics/mimiciv_billing_code_metrics.py,sha256=Pu9efXoBrhsvxSeGHqwbUA5k365-pJTeXpMNhmcg0L0,3927
199
- helm/benchmark/metrics/mtsamples_procedures_metrics.py,sha256=XrddVk-gnc8jF8amCI1RBa_XTS9yEXD2Y9Ld9W7Q-m8,497
200
- helm/benchmark/metrics/mtsamples_replicate_metrics.py,sha256=rmH34aTX_wZWxLi4jrxf3sR1RIqNRF0QDANLRQUGhqM,492
190
+ helm/benchmark/metrics/mimiciv_billing_code_metrics.py,sha256=3kypTnrkbdG-Dpdbg_A_WQYVx35ylvZFjh2-R5wvhSE,5347
201
191
  helm/benchmark/metrics/nltk_helper.py,sha256=QMEps-lqJZ_pCgvjlMf4BvC0pzDu3ez5jit5F4p8dAk,1313
202
- helm/benchmark/metrics/omni_math_metrics.py,sha256=Gqih87UrE93-a0hbRhTBkjmfGLNTkuKQGaG-sTQeuG8,1287
192
+ helm/benchmark/metrics/omni_math_metrics.py,sha256=WF0cWpmJwduTdZw7c_O5QsXDNwet5GgHYV0Ww9PfKc8,1709
203
193
  helm/benchmark/metrics/openai_mrcr_metrics.py,sha256=TAop7G50FKaR-Jyo2EGLqmMOfJRmS2vNRDFiifa6mhg,2313
204
194
  helm/benchmark/metrics/output_processing_metric.py,sha256=ey9UBi2f3780OwFlp82ymzfjLR3MA2fpA9vW5R4W5TA,2581
205
195
  helm/benchmark/metrics/output_processors.py,sha256=ULZlDBOf6NupAXzDKBKyTDdgPZ5PSxOAlOYTbrQEek8,472
@@ -209,24 +199,24 @@ helm/benchmark/metrics/ranking_metrics.py,sha256=hSNKy4h7zRkGYSgo6RWt4PXQztA5ZX1
209
199
  helm/benchmark/metrics/reference_metric.py,sha256=hseI7A16SOC8ymYZYFCL6nxnyxn0q9_Gywuvb1r9FLE,6092
210
200
  helm/benchmark/metrics/reka_vibe_critique_metrics.py,sha256=CwzzQ13bBT0r_o75TqFj2Zr0ST9vzQi74K_ezWTnLCU,6568
211
201
  helm/benchmark/metrics/ruler_qa_metrics.py,sha256=OuiA0ksByl0Tw1Oal7zbedhKjTrhJgQJDLXAgoTLXuc,1473
212
- helm/benchmark/metrics/safety_metrics.py,sha256=oARko_EwVnykBKYxi-w3ytKme4qcb1waz_0N2GKbSlg,3348
213
- helm/benchmark/metrics/seahelm_metrics.py,sha256=egRkeXnnb8Nqi9qJJMDXJRSl4NK6WvdUxAc_LffBips,6964
202
+ helm/benchmark/metrics/safety_metrics.py,sha256=PZjyNsxiBe4VTdIujsqrLUtsQfLUpcm8snlAk3g9zWA,3870
203
+ helm/benchmark/metrics/seahelm_metrics.py,sha256=GlNoK1O7kcuiuEOJEgTsnrfK9TcGwH7-tPj6Qe6JV90,7493
214
204
  helm/benchmark/metrics/seahelm_metrics_specs.py,sha256=cx8p4kwTuEOWxZioK9CVoeTNJT0fZjxRy_6_EM9F394,452
215
205
  helm/benchmark/metrics/spider_metrics.py,sha256=RSrFJoA5SNcNxfmgVqCQixcSLrfJBYuVQw5jsfrc9Xg,189
216
- helm/benchmark/metrics/starr_patient_instructions_metrics.py,sha256=YHdTeIFdZxRbvqBnlWpAyIsWzZyWAjjDFuKOXhHYiSM,525
217
206
  helm/benchmark/metrics/statistic.py,sha256=ATuOm0jU3L-0ELiZaF2GVMNF22W66-rMvzxRtlfqcII,3446
218
207
  helm/benchmark/metrics/summarization_critique_metrics.py,sha256=-mki8-zvZx54dQg8X0BG2Y6wmfypQhkIuD_9ZjNBl78,4782
219
- helm/benchmark/metrics/summarization_metrics.py,sha256=FJCdGRmlCJX5A-AmbtpGGlGRfNgg5Z8Bo0d9yFiE33E,16876
208
+ helm/benchmark/metrics/summarization_metrics.py,sha256=S99uhtvBtH0UQS-gDEuQLLTPYNG-dNUV1n3OnaOP7p8,22647
220
209
  helm/benchmark/metrics/test_bias_metrics.py,sha256=qEZsCULvwjVdIyfNgJSc2L7Xp9suKKW7L5OuQmGrwZ8,6393
221
210
  helm/benchmark/metrics/test_classification_metrics.py,sha256=CRDMGmVmzEUnNaM0C02qUTOU2AS11Mt2-GdEl89y7lw,9541
222
211
  helm/benchmark/metrics/test_disinformation_metrics.py,sha256=U3ZmS9s33oimTQbKO-7pgWeX_WiDB9chlOCtf_vslXw,2249
223
212
  helm/benchmark/metrics/test_evaluate_reference_metrics.py,sha256=B7xtDDWPAxF7d-vcUx_R51hFMae-DD52nUwbu_eWt6Y,1601
224
213
  helm/benchmark/metrics/test_metric.py,sha256=0sGlXE3_Al_VyKpOPBhQR_xT-XrcVgGepLpwut37DmA,771
225
214
  helm/benchmark/metrics/test_statistic.py,sha256=yK6m2BZ5UXWmb2D1cQzDH_2ELvrNDaR_lyzX4WoHw9Q,1273
226
- helm/benchmark/metrics/toxicity_metrics.py,sha256=ZLOzxDlMgbljl-9y6vT2ZgwdhsBZ4MfV-T66VpKk00U,4114
215
+ helm/benchmark/metrics/toxicity_metrics.py,sha256=s5Ypodu4cBmIc_fCbbQ9kCqcvVJf-OQ6zAvb85r8Cv8,5509
227
216
  helm/benchmark/metrics/toxicity_utils.py,sha256=-bfittLtMkHyV5wu-hj6KVtaiNGgVIO5duUmThBlX8w,988
217
+ helm/benchmark/metrics/ultra_suite_asr_classification_metrics.py,sha256=dSJXAS7--n2sxRaajDo20Omzwx4LY5x0gd8nTxX3DAE,2317
228
218
  helm/benchmark/metrics/unitxt_metrics.py,sha256=8fawxnrg0xsAe0xO2wbL7S_yisj8RzJnrn6xtk8C6q8,4852
229
- helm/benchmark/metrics/wildbench_metrics.py,sha256=sY7MNTzRlJJK3yph3rCijgbMaajtLyCCquThlsoE5wU,1380
219
+ helm/benchmark/metrics/wildbench_metrics.py,sha256=THOguxE6GUun0zTr-pITXfQGEd664sScrfIzFGdNPXk,2163
230
220
  helm/benchmark/metrics/ifeval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
231
221
  helm/benchmark/metrics/ifeval/instructions.py,sha256=qNoa1vMPDNz6ORWfyMv_efwKZ4U5zkI-cf4aApyfSqU,53247
232
222
  helm/benchmark/metrics/ifeval/instructions_registry.py,sha256=NprvkRQz0QWaIpJsFp95CQCWsnuY_57ZSqFn2IISDP8,7555
@@ -267,7 +257,7 @@ helm/benchmark/metrics/image_generation/watermark/__init__.py,sha256=47DEQpj8HBS
267
257
  helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py,sha256=Ir4u8blJWTRtEBogb6u22qCy3JXAIzvx-Th6dSBLfdw,698
268
258
  helm/benchmark/metrics/image_generation/watermark/watermark_detector.py,sha256=w6WnTc6t6zx0W0gTjgedXC9OO5dq5iWpx9UcnioKml4,3641
269
259
  helm/benchmark/metrics/summac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
270
- helm/benchmark/metrics/summac/model_summac.py,sha256=ccOP0z4WEpR26iAzzTWviFfX33Cg9MdpZgKgSRQc9D8,17445
260
+ helm/benchmark/metrics/summac/model_summac.py,sha256=zheAPIJAz5MH6GU1gXpWSc9Q9gouhNzYx92PDd5PUXU,17447
271
261
  helm/benchmark/metrics/summac/utils_misc.py,sha256=7_Q1c72cKt8PWtxn8u4R8nB53HK6_JF2nP8bBXYNk-A,1485
272
262
  helm/benchmark/metrics/tokens/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
273
263
  helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py,sha256=XDZGK8h84F2w_pK8Zjko8ssKZmVxKFqTOuHL0mLBzMY,694
@@ -286,11 +276,12 @@ helm/benchmark/metrics/vision_language/image_utils.py,sha256=xwtydR8-s23cJacIGXD
286
276
  helm/benchmark/presentation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
287
277
  helm/benchmark/presentation/contamination.py,sha256=07IuIP92vfuI0GwfeNC-i_NZUlF8N1azzagC19YHOMQ,2802
288
278
  helm/benchmark/presentation/create_plots.py,sha256=bM6UNzH0Bx8Bv2iKcyMoYp7IwfCZSQob-w_XOOI6r1M,29090
289
- helm/benchmark/presentation/run_display.py,sha256=LmY2HES4dU94kRYuUxt-c9LTMDN6MU5CspWTF6rZwDo,12419
290
- helm/benchmark/presentation/run_entry.py,sha256=J1QgLOP99N7N4bs7nzXWxyU3pOd-a1j8xwL9ag1nP_Y,1158
291
- helm/benchmark/presentation/schema.py,sha256=j3gOhj-okQ4qzYoMh5N3ltsL0OXiOGuB7ydF-SI-Ug4,11229
292
- helm/benchmark/presentation/summarize.py,sha256=_d3gd45eBpx8yMnVq1XgF9D-pPMcpbuwseSZz4giybo,60092
279
+ helm/benchmark/presentation/run_display.py,sha256=byOcVknL6UgwSBGWUPiWEdSBRbW6PYwmo7vJ1Ms50iY,12890
280
+ helm/benchmark/presentation/run_entry.py,sha256=_hgsKMpZ-WpgaK7nta68GohXe07JCyaWD6jRjINujXk,1182
281
+ helm/benchmark/presentation/schema.py,sha256=AMGmEwqxkHoZFkOKD-UVZ8aXwgbafG6KYASsWo6YEw8,11005
282
+ helm/benchmark/presentation/summarize.py,sha256=m3RSw6ogUFasdeZ8xSUh4wKV-nYzVi3iQv-KrrwtDFM,67828
293
283
  helm/benchmark/presentation/table.py,sha256=-foH1BIfMiD6YvpwoGJ910CH7Hib-_pYtHH1hE8zwNc,2904
284
+ helm/benchmark/presentation/taxonomy_info.py,sha256=pPIFOicis9H1sWeXApfsHHcqZpus1ezukxLQO7Lj2Vg,473
294
285
  helm/benchmark/presentation/test_contamination.py,sha256=RlihBOF6vx2tKEj6_EMnJojTYoStx0FUeJSLT1bdf8w,509
295
286
  helm/benchmark/presentation/test_create_plots.py,sha256=1FrJZnPW-5QUQKt_pf4y47uDha4B8wHyY1o5hqhKWhc,1293
296
287
  helm/benchmark/presentation/test_run_entry.py,sha256=4n484sSYT0gQ4WVt67Fs3ctKa4vi97hI32O5XXxGY1o,794
@@ -299,12 +290,12 @@ helm/benchmark/presentation/test_summarize.py,sha256=GzZNwBDybpstzl6wT0Rgqn75N9i
299
290
  helm/benchmark/presentation/torr_robustness_summarizer.py,sha256=SmMOZWCQ-KaJBp78otwvAeE1btWignyWalaQ8QG87r4,8242
300
291
  helm/benchmark/run_specs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
301
292
  helm/benchmark/run_specs/air_bench_run_specs.py,sha256=K86SqpINMBOiLIpuHz-jwlQL3SrH6n6WbqjD90i4LQQ,2231
302
- helm/benchmark/run_specs/arabic_run_specs.py,sha256=p5KPvcugJI3ERYhO7Le_aiKOZ4IM2EOvsXEmZE8R4Wc,3014
293
+ helm/benchmark/run_specs/arabic_run_specs.py,sha256=x3pBNbUcYfx6f0APXroLBQodOgv6oWuJNb301c_QUhg,7768
303
294
  helm/benchmark/run_specs/audio_run_specs.py,sha256=baJz5LZiwWZP3KD0hluKgpidtswzdorQnshX0CoqKAc,23383
304
- helm/benchmark/run_specs/bluex_run_specs.py,sha256=OHweBHS8JC-k9_e5Zq1LUU2FZhJ2P7SDshatX-N15Ls,1798
295
+ helm/benchmark/run_specs/bluex_run_specs.py,sha256=jwrH33YeXqoAex11071XMUwTCKNkoJTQQS7iNoJDLmg,1797
305
296
  helm/benchmark/run_specs/call_center_run_specs.py,sha256=QhRQw91WblB9UaB319XNCO5K8PX8Riiza41Ym-1CcRU,7044
306
297
  helm/benchmark/run_specs/capabilities_run_specs.py,sha256=sbqhIj4AoujV45erwoVK61lWdlkjg4qssmGlu0eSr1U,12067
307
- helm/benchmark/run_specs/classic_run_specs.py,sha256=4DA-21Tiz87dQ_iklyrKpfsyTw2f51tbwtRvv3Zs57s,53727
298
+ helm/benchmark/run_specs/classic_run_specs.py,sha256=fe98HhzMkfloKpOZbi_mIMp1Hi-clv22rgWT-EdS0e4,53743
308
299
  helm/benchmark/run_specs/cleva_run_specs.py,sha256=lEIHEqQY3Efx-sl2Z6Rq9Qq_1HEWHqFYuUkZbGvq66s,13387
309
300
  helm/benchmark/run_specs/codeinsights_run_specs.py,sha256=lz3yysrPjCIiObzrIkRjJsWzkABh9qIXn-o7FSqZPl0,9207
310
301
  helm/benchmark/run_specs/decodingtrust_run_specs.py,sha256=7slILDS9f0_Z0y-Pz5xEspoGQUmOCOI2K2r4XWUVsm8,14428
@@ -318,8 +309,8 @@ helm/benchmark/run_specs/imdb_ptbr_run_specs.py,sha256=nkW5A_xeD5kCKeJVxsL8RFS8r
318
309
  helm/benchmark/run_specs/instruction_following_run_specs.py,sha256=GElJhgbQhlZMYSAM4YyGcYq0pqycR32kBCoHqG6m-ZY,4177
319
310
  helm/benchmark/run_specs/lite_run_specs.py,sha256=8OkL9g3wQBG96g0ijGZ9L1Trb59b7VPDyYMqvA3hXfE,11129
320
311
  helm/benchmark/run_specs/lmkt_run_specs.py,sha256=tNZvlA4mXUX-NBC9enRR90qFLeh8SNGFq701rXmXc18,5376
321
- helm/benchmark/run_specs/long_context_run_specs.py,sha256=A3yhg1IEds7kQWxkRYH7WVkMPouA1xDz28uxpHgwJvE,6229
322
- helm/benchmark/run_specs/medhelm_run_specs.py,sha256=--KgkjVwKt4uyiTebalrbeGV4FB-jGqPciYjFZED7zA,43407
312
+ helm/benchmark/run_specs/long_context_run_specs.py,sha256=wn7yY9rMIBJY30SN-275qg9U49aGPUl4hVZphKYFkBI,6442
313
+ helm/benchmark/run_specs/medhelm_run_specs.py,sha256=CGFHmoQB58gpqi4b4BbffuHzRBs12aeq8suUkaaFbqc,53262
323
314
  helm/benchmark/run_specs/melt_run_specs.py,sha256=729MkALud2wG07yulx9zqAzejdXW_eVGkfF5cQWeGGY,32031
324
315
  helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py,sha256=kenpGGMK1XXaNtvNXsshPvdvN9ubv1sOfaPdjFM4obA,2034
325
316
  helm/benchmark/run_specs/multilingual_run_specs.py,sha256=umf8e6ZDgRXiU0G_BPoovj1UZ_dxyrXtIQ7i9WC6USg,2296
@@ -327,41 +318,46 @@ helm/benchmark/run_specs/oab_exams_specs.py,sha256=ws7Vppo_zJvxKqQ_sNhm9N7-5eQbX
327
318
  helm/benchmark/run_specs/safety_run_specs.py,sha256=3X6tYaq2SlRsZs9q6SCtBUgjNEpOwUtV6M7iY2Kowm0,6807
328
319
  helm/benchmark/run_specs/seahelm_run_specs.py,sha256=R3mg4_OoaRizZ5n0FHcUQpJLny3j-ulBlHzOyF0a0Ok,23904
329
320
  helm/benchmark/run_specs/simple_run_specs.py,sha256=0kK_e8U4JUWZ6wO4N-GPFRE1iGT4ilvSMUGfirvpIE0,3837
330
- helm/benchmark/run_specs/speech_disorder_audio_run_specs.py,sha256=P1mxSu7ErjiK0ARbRmbIzFwYO3fC-6QpsZQeez4U3qI,7346
321
+ helm/benchmark/run_specs/speech_disorder_audio_run_specs.py,sha256=Hx0BxdzORXU8cyEGFYJJWs60Ssuny6tIpWqCR6fFSfI,7464
331
322
  helm/benchmark/run_specs/sql_run_specs.py,sha256=JWCICELKi81m11MggyR6CJNl3vpWPwk4kr8DZSsWvj4,1965
332
323
  helm/benchmark/run_specs/tweetsentbr_run_specs.py,sha256=qogc-fb83Rh1DooKKaskhak52ycvu8DAnhabw9rc7yA,1129
333
324
  helm/benchmark/run_specs/unitxt_run_specs.py,sha256=4Vbsq0MPpSe4cIJOXzeVpMm60N9Qafa2R85X5BeFQew,1873
334
325
  helm/benchmark/run_specs/vlm_run_specs.py,sha256=v-eWuDYc8u5HO46isLONPfAWv5zdA1ZOQrdyOvX3vlU,37512
335
326
  helm/benchmark/run_specs/winogrande_afr_run_specs.py,sha256=dhOm8z6Q_ZpnzYKrsS0nEbRQPWs_phkXxmL5pxCJzQA,1853
327
+ helm/benchmark/run_specs/medhelm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
328
+ helm/benchmark/run_specs/medhelm/benchmark_config.py,sha256=O1D5N4q1QwzrI1ioAQK815cch6hNoJoaIzzAlJo6GXk,7860
336
329
  helm/benchmark/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
337
- helm/benchmark/scenarios/aci_bench_scenario.py,sha256=W8h7eWz9mjR0kRAffKWSnA1Fs8t2l83sPyW8fjPOxWQ,5670
338
- helm/benchmark/scenarios/air_bench_scenario.py,sha256=B6_WMowLFe4gWfnoFA_yrHe0kagbIkZabEnK4kGGqSU,1884
330
+ helm/benchmark/scenarios/aci_bench_scenario.py,sha256=ry22AJdd3lvQuEFdzNf6wXzMyPFn46b0kScrYdpj-nA,6783
331
+ helm/benchmark/scenarios/air_bench_scenario.py,sha256=Ufcpxm5KaXHI2FfK4tdQsURaCSdcWNcXVaNmYkE4bo4,2820
339
332
  helm/benchmark/scenarios/alghafa_scenario.py,sha256=FJXO3W6qYzCgLJMSiJEhpddNcFyR3N5Brh8pATW_9GM,5217
340
- helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py,sha256=jnUGbppDGEsbe5xoJjmv7nW_RvwPIYm6cwSULeqk2Fk,5133
341
- helm/benchmark/scenarios/anthropic_red_team_scenario.py,sha256=_OWE33eVRaZI0gmfP7bLd572uOi_6jb39z_J6nkcvfg,3182
342
- helm/benchmark/scenarios/arabic_mmlu_scenario.py,sha256=mI6ttMFAT3sH_v87qVNxYptqDS2EMUhK0b8vpfePSdY,2807
343
- helm/benchmark/scenarios/aratrust_scenario.py,sha256=G20j6Z-C_6bUJf-bpdyUN23Hb7XK0YtieUprq_5Z5hA,2552
333
+ helm/benchmark/scenarios/alrage_scenario.py,sha256=MN-gMQboAaJCasYNg_rLJVgcrk5KZ1WCBN9R_lyRrhE,1499
334
+ helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py,sha256=EzS8td1lJE1yxEwFtuwTbjHtHm1hGIaur93BKAL_Hm4,6212
335
+ helm/benchmark/scenarios/anthropic_red_team_scenario.py,sha256=3axwlXnKI0-mRDaYfD-hcCNZwtw_1jte_5THuyuV7Ok,3683
336
+ helm/benchmark/scenarios/arabic_exams_scenario.py,sha256=hv28A2pM66ejrO6oFOgmCx3JIP_nqwdUYvIsfGc0Kew,5359
337
+ helm/benchmark/scenarios/arabic_mmlu_scenario.py,sha256=xMRWPA16Wn8ONgAeyyHOB95X2SQca7tKUpUP8L5ZNJc,3018
338
+ helm/benchmark/scenarios/aratrust_scenario.py,sha256=ismiWLm1M6JmBgVZ0SoVglaOyFbAlyOHsSsiAv8Np8Y,3125
344
339
  helm/benchmark/scenarios/autobencher_capabilities_scenario.py,sha256=fOCHumFWZa4OJZcTZefJiJbdWsb3zjQnWLJYd10Cctw,2496
345
340
  helm/benchmark/scenarios/autobencher_safety_scenario.py,sha256=MFt3f5baN5r-FmzWZfUChGR1mX_PUB_5hxoINac_Whs,1854
346
- helm/benchmark/scenarios/babi_qa_scenario.py,sha256=SyM6RP4v08B1PjumkdQnuKrM9L8SyK0bXbx-LgmyTPo,5067
347
- helm/benchmark/scenarios/banking77_scenario.py,sha256=dtiM-Q_pMDWqkLi-hgl0tH-aGuDdgHkXgweE1JqrPYs,1883
348
- helm/benchmark/scenarios/bbq_scenario.py,sha256=2A7MX6iMAZHuPpH9RePi9rVBeFRmGdiE6GlqZ5uNdAM,9603
341
+ helm/benchmark/scenarios/babi_qa_scenario.py,sha256=CAmh3GfFjB9Xsuh9K-PUu-2xIFTV0v0YNgWbSuv09Y0,5711
342
+ helm/benchmark/scenarios/banking77_scenario.py,sha256=cYM5Itksjy-tufjC9mmIKy_FG0kqPuDkDIhPhDZUX2I,2773
343
+ helm/benchmark/scenarios/bbq_scenario.py,sha256=VAlwXpAegpmvb3Zf-oMaBE7HGnfKVbhprCn2SE8ayKM,10355
344
+ helm/benchmark/scenarios/best_chatgpt_prompts.yaml,sha256=KZdXj4KUbkwFzgIEXVakMpZLTqJ7rldxNuXVDIdlk-A,31304
349
345
  helm/benchmark/scenarios/big_bench_scenario.py,sha256=g1TLoDTYQAe-efzQnV9J5UBCaUfN1jWTTjTd-ZJQmVQ,8146
350
346
  helm/benchmark/scenarios/bigcodebench_scenario.py,sha256=zQLv91uwfGAR9N4jm_iBUmYOVFj9cL14Nj8aqoCqUM0,2004
351
- helm/benchmark/scenarios/bird_sql_scenario.py,sha256=n5elzanKEX9YclAl2l1y33aCjihTmaw1VF_ZsAU5IaM,3613
347
+ helm/benchmark/scenarios/bird_sql_scenario.py,sha256=KoCcy4enWJzrwK1X405EWnK8E0kjpmcwhVFfBrKSRRQ,4235
352
348
  helm/benchmark/scenarios/bird_sql_scenario_helper.py,sha256=FIwPk-dwfTY-8gDXeAiTZbfbS0Oe1OuWRlYiJOhZwk4,4664
353
349
  helm/benchmark/scenarios/blimp_scenario.py,sha256=9Ge3QKRgtVHpWy7aehZVKiO6JrsxK7zrEdtqAb4zxtQ,6284
354
- helm/benchmark/scenarios/bluex_scenario.py,sha256=eHAltiFqZ_bS0AVi0kbskTlxJbQXy7Sqj6E9nZPWqCc,2500
355
- helm/benchmark/scenarios/bold_scenario.py,sha256=iE9drB9IeXfRn3xvLnaQi3-nJAp-bV1RE0GJGnp9dJc,4130
356
- helm/benchmark/scenarios/boolq_scenario.py,sha256=wPETIu5jcI4jgP5GoFa_xi4SsvHtS9gxQ5TD8neHmdk,8037
350
+ helm/benchmark/scenarios/bluex_scenario.py,sha256=K4ob5_rd1hTOzlPJjuEvujcOdt_Ybgxj3jqj_BYjA9o,2599
351
+ helm/benchmark/scenarios/bold_scenario.py,sha256=MsXwUiJgZgFyVxh-E5gAagi4aPGicDe2C0xct5lQYwA,4882
352
+ helm/benchmark/scenarios/boolq_scenario.py,sha256=qQyJ0BdljChX9U_eEETdFyWLCSQvI0D4NrY6zOCXPh8,8824
357
353
  helm/benchmark/scenarios/call_center_scenario.py,sha256=19J2N57WnUkPMGRRbJyZak8YCeMTRwD3BRK1SArQlL0,3037
358
354
  helm/benchmark/scenarios/casehold_scenario.py,sha256=QSe0D3KQJhlTOo6kM9OHwdKy6NlclsFGRVCAB3mTG7s,3174
359
- helm/benchmark/scenarios/chw_care_plan_scenario.py,sha256=BbEjDqa4C5wpdil5jIb1nzj16CCZ29hKoZVsfapSfho,4005
355
+ helm/benchmark/scenarios/chw_care_plan_scenario.py,sha256=PE4vbj0y39674UIIdH6mgUwSKe4wW_XqRrNsksrwQRs,5104
360
356
  helm/benchmark/scenarios/ci_mcqa_scenario.py,sha256=slZZT74QI3OMQAgT-ybcR_xVcRDoopXw6mMu4iy3XCY,3074
361
- helm/benchmark/scenarios/civil_comments_scenario.py,sha256=pnZU2U_cYFYOJmlmwTehHU5oLIPx_Yg8Ayxinroh4IQ,4875
362
- helm/benchmark/scenarios/clear_scenario.py,sha256=yGdPxWO6vY4JHNa4xywtvD-9lOn6s5cr3njpZyFA0D0,6183
363
- helm/benchmark/scenarios/cleva_scenario.py,sha256=n-h2urZ06GUOuAC_60HMwspTTpBFid72Fx8eZGQppdA,57988
364
- helm/benchmark/scenarios/code_scenario.py,sha256=lSbZWw67ie9osOjXDZukj3EEZGa3L6TrMvTg--IbuxE,12520
357
+ helm/benchmark/scenarios/civil_comments_scenario.py,sha256=N1ZmQyKXkRjRXKPTyEHOpbDhBkjcY8WyHPKMWaBl2qo,5481
358
+ helm/benchmark/scenarios/clear_scenario.py,sha256=cLFlcWKUT1Uy6bYDnAjf1ySR06mK16NhN1AtsaEBZs0,7226
359
+ helm/benchmark/scenarios/cleva_scenario.py,sha256=WQDiDCVo6bhtI926_p3uvr1WhIAkBU1gLNLA5viEwMw,78127
360
+ helm/benchmark/scenarios/code_scenario.py,sha256=tdki0m59NzN4YOm1pMfaSkUP5uUDeTNMqUAB84p5QGI,13953
365
361
  helm/benchmark/scenarios/code_scenario_apps_pinned_file_order.py,sha256=KC-5MQ-d8Nn46aDN4FaPxmd6yk1DtVUmVR-CIZsNCp4,1738
366
362
  helm/benchmark/scenarios/code_scenario_helper.py,sha256=TnXAlY-wdAFwIDylFItf0z7HOu93WD6dNThwzZYe330,5904
367
363
  helm/benchmark/scenarios/codeinsights_code_efficiency_scenario.py,sha256=PK4wtuBXs4cPPwOoGfhBA4J4cGLQYC_MvRWuvWrkrv8,9068
@@ -369,84 +365,88 @@ helm/benchmark/scenarios/codeinsights_correct_code_scenario.py,sha256=7BpcezugYH
369
365
  helm/benchmark/scenarios/codeinsights_edge_case_scenario.py,sha256=csTwe-mv1f6Tyvnj9uZ0SYuj1GRVvgjzukV28gIhNpk,8703
370
366
  helm/benchmark/scenarios/codeinsights_student_coding_scenario.py,sha256=wc5Fefn4jpCw03dQ6WswCztJ8AO5j0Vrn6omcOVUq2k,7409
371
367
  helm/benchmark/scenarios/codeinsights_student_mistake_scenario.py,sha256=qX3yckZdMojYhiwvokvEuQpRXOzmN2zmzKjQb96Ljg8,9651
372
- helm/benchmark/scenarios/commonsense_scenario.py,sha256=yZ6n9aqOi7UWY3q4uTDNc2JRNZxaBZPIp7n_Snt_8g8,9511
373
- helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py,sha256=gKEwqHDD8KlKmW8z3xAxSIGmALTXrRRPcoDUzbv_IXg,3854
374
- helm/benchmark/scenarios/copyright_scenario.py,sha256=FHzUYEabj-BTKl90fgq7jSCq5_Yf9cO9MA9djn50B1Q,3697
368
+ helm/benchmark/scenarios/commonsense_scenario.py,sha256=f1E94zQAArwha730LcdJFo75KrX50lqcaFRn9sq85Yw,10855
369
+ helm/benchmark/scenarios/compositional_instructions.yaml,sha256=mPsFzPU6uaAD0xghzv-QD5Wk4uhoLY2sF3Fw_lunAsI,1822
370
+ helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py,sha256=sR3UzObloLUzgjNwTbSHLGGkeA0g9-Aq_utpBPT2u_4,4757
371
+ helm/benchmark/scenarios/copyright_scenario.py,sha256=GWRCJdLlnWZcz6ztB5XIASGMPNd2o8EZNR2GueP8xuc,5035
375
372
  helm/benchmark/scenarios/covid_dialog_scenario.py,sha256=Vnxfn6EKwN-KR1vH-x46YHUC5jf7UAOv7zsnXVHYmZY,4032
376
- helm/benchmark/scenarios/cti_to_mitre_scenario.py,sha256=pfHAteKXLNUrhKyAYk6m9j-d7iuEgz58o15xukp_GFQ,10260
373
+ helm/benchmark/scenarios/cti_to_mitre_scenario.py,sha256=FM6ty-JSFTDqdKLzfwgfhl3zV2oh_DWjRw4qI4-IrI0,11169
377
374
  helm/benchmark/scenarios/custom_mcqa_scenario.py,sha256=rgdHsSh8QknlcdGfZQ4VvqBUMLfTTHaNolCv4QgWHzE,1939
378
- helm/benchmark/scenarios/czech_bank_qa_scenario.py,sha256=ZBfkUYlIa-BagRVBf97RoyLfEloAjnM0RPv5wmEWueQ,4406
379
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py,sha256=vOUE5-rj_Wr6m7n76knte-kCMsphb-SSq9LraYf-Dh8,7933
380
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py,sha256=9qo3l44aby1EfQqohh1M2DVtHXqY1fuvj1XT3_n4hBk,5588
381
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py,sha256=rAOZnFSxO3ENOvcNz688P_f3Y7NzdwiWgoYTNvAaw3A,2866
382
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py,sha256=qhzqW614WnsiyN7TiHUdZY_NpEdW_iMO0AMrLK8DmK0,14116
383
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py,sha256=RSigvRdqjeFTwFfXNmslz8zyAGSmLf6UtBDA4NrQBCo,8304
384
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py,sha256=zaXn4sRPUEZiqPoudiDT1xHMV2DaiEXOOTz3qB5q7Go,20143
385
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=NjutVTOVVze-IJniRFecz8gqh_BUpuJG3-BUboTGKRw,2933
386
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py,sha256=EYKoXDWMesbY5dCNY-N0eYMRL0rjEfGsuS_TkeD3Suo,2952
375
+ helm/benchmark/scenarios/czech_bank_qa_scenario.py,sha256=jnBIx4RPnTCGfWIcKNTTCyzBZWqUidWGTzTleOm958A,5020
376
+ helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py,sha256=pZK3dbKKNfNOHvNaGMkN9pjFznu4raNyLe4fWkxNHSo,8604
377
+ helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py,sha256=hBKRRYIHegOrhIo_i7-1RPtbxmuhXcg29DkUIep0x_o,6304
378
+ helm/benchmark/scenarios/decodingtrust_fairness_scenario.py,sha256=KzBz8nkrvPUTw5WmEoivtl0lLJ-mORek-IVKYmct2Pk,3460
379
+ helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py,sha256=OvJ3pfxbxtJRxeSfeK-uoYFZ4ZIDSqE7ZbqZBuO93DE,14743
380
+ helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py,sha256=zWhQWEE9Aa1O9ASLE5IAw55lzNLJ1ifGsBKZFk-jiXM,8942
381
+ helm/benchmark/scenarios/decodingtrust_privacy_scenario.py,sha256=XO--1Rxb6kyLDRUQw-GhzLG-aTagVyN7ktWriAbBTAE,20756
382
+ helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=vIkAgy4LysSSIm553bnts3CEN6NVIDKr3xeGkZ2GNyk,3520
383
+ helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py,sha256=5l0lRRNNJ8nAb1R4bMxq3lakMF-P3XFvVpnT1PrwMms,3556
387
384
  helm/benchmark/scenarios/dialogue_scenarios.py,sha256=yXCMZegzlgL0CXTY1W5lXdkFFHicUvq4z7_284MfRpw,5778
388
- helm/benchmark/scenarios/dischargeme_scenario.py,sha256=rBzagg0JVVN3o0VUfmHy2cN7gutV_RAJAo5Fa_El0GY,7842
389
- helm/benchmark/scenarios/disinformation_scenario.py,sha256=0T7LhXguzBP645Fruc2udfTaMuy7XGtOEMJKpFMIFRk,8565
390
- helm/benchmark/scenarios/dyck_language_scenario.py,sha256=hygFPTcICGUEPwjtxULLKBSbuBOXLYpozIgiGcT__W0,9379
385
+ helm/benchmark/scenarios/dischargeme_scenario.py,sha256=WTlqFnM76DFVGOUSLWv-g--vHWR71UWZ9VFXoEec3fo,9026
386
+ helm/benchmark/scenarios/disinformation_scenario.py,sha256=lq9Aj-DDpPJeFVk99wXEd2Qv3kahiBe9c8-RoBieCDM,9581
387
+ helm/benchmark/scenarios/dyck_language_scenario.py,sha256=HZEXetj5BkXrNJbAvg9HidrkxDgi2UUGIAVphNiN-jg,10052
391
388
  helm/benchmark/scenarios/echr_judgment_classification_scenario.py,sha256=IqODoUY1-zJD1KW4Qkg3VwJcUeeLgGUKThr62bW-wx8,4915
392
389
  helm/benchmark/scenarios/ehr_sql_scenario.py,sha256=Gm7Kw_TSUUxHW8ns-2e4E_tTBVX7h6Ta273VOpkMCQ8,5480
393
- helm/benchmark/scenarios/ehrshot_scenario.py,sha256=MWcTejCtwohBPbZYWei_WNZ-Hdnhml7ovTVbJAgUetU,67770
394
- helm/benchmark/scenarios/enem_challenge_scenario.py,sha256=sxYXKvf-mGNqctTkemwI9rrA_Rg2xA8mz3_W3TIfzUE,2147
395
- helm/benchmark/scenarios/entity_data_imputation_scenario.py,sha256=4V426oOuexGg59q0djHCTQjQmqYgyLT191Z5fayubmU,6681
396
- helm/benchmark/scenarios/entity_matching_scenario.py,sha256=kzzDaoVikL2P7Z-17EkLIVR_W7IHcNVerUts2oXDKLA,7111
390
+ helm/benchmark/scenarios/ehrshot_scenario.py,sha256=OzZrgi-UZrMH70ZnHSeUWPCOesUue5vxPqnNOaN45dE,68830
391
+ helm/benchmark/scenarios/enem_challenge_scenario.py,sha256=gceJqjxX-RxvOqPDANEwOrbHwKxtddpMz-FcsBfby0k,2854
392
+ helm/benchmark/scenarios/entity_data_imputation_scenario.py,sha256=03Ju45Sju2r4A_Peq2EsOyg5Ik99lMUv-6X--ejB9fk,7332
393
+ helm/benchmark/scenarios/entity_matching_scenario.py,sha256=83F017FPFED_106IOawJN1jdY6IfREGJPNRvCokKGNk,7761
397
394
  helm/benchmark/scenarios/entity_matching_scenario_fixed_random_state.py,sha256=TklbX7Kx4y-estV-YHUbI5O08q2qCZRrOmX9D3gZS9c,2193
398
395
  helm/benchmark/scenarios/ewok_scenario.py,sha256=vrbJg9vakAxE6n-1jURUcwb-ihrsYoY9e32BpnEGDaQ,4684
399
396
  helm/benchmark/scenarios/exams_multilingual_scenario.py,sha256=c9zMGGL8EbCeNogTm-88g_5wWUiX1Zr7z_tsyjUq2h0,5404
400
- helm/benchmark/scenarios/fin_qa_scenario.py,sha256=Dm_kGOivaxiKVhcqFgN8pRPs1eqm2LdBZxWy0yFhFuE,5958
401
- helm/benchmark/scenarios/financebench_scenario.py,sha256=cHMljdg0_9HA3FbwcwwMt3DR9rxl0jkyFN9jNrUStSE,1956
402
- helm/benchmark/scenarios/financial_phrasebank_scenario.py,sha256=dMTfI9MRHKXnECsXOIY8xvX6w5vAPEIa6A7TYyIu2Fw,4457
403
- helm/benchmark/scenarios/gold_commodity_news_scenario.py,sha256=-O4ilLwNcycmpQG5h_5WtQP7yJEr4mjWjKBe2eNP0uY,4806
404
- helm/benchmark/scenarios/gpqa_scenario.py,sha256=369E0JvaR12EcgcEFKKRcDw1iztt4sb8ghIsk9Brzi4,2884
397
+ helm/benchmark/scenarios/fin_qa_scenario.py,sha256=kwjdhO6_09csdZJ7KqMKXpnpOy6necDDZVOkiSW1lhY,6807
398
+ helm/benchmark/scenarios/financebench_scenario.py,sha256=_DompAP_3JzR6sGkvaBe_qubz7fS0BHB4wV0Gt8jGrQ,2900
399
+ helm/benchmark/scenarios/financial_phrasebank_scenario.py,sha256=I7eoymZfxu4gky3YjyLnZgaFIJcMkprxQxiCLM7wJV8,5455
400
+ helm/benchmark/scenarios/gold_commodity_news_scenario.py,sha256=Qw8OJzvp12716GRW5kIxxX--f92OFRcaP0oEy-gakjM,5674
401
+ helm/benchmark/scenarios/gpqa_scenario.py,sha256=MsMsBqgxz6jKt2-ys98XAslGWkxZgzpYOws0b9e4Uj8,3520
405
402
  helm/benchmark/scenarios/grammar.py,sha256=58tQYKPj013V9jIpW7fXUqZBLuboqEi_WLlDjx74spM,5590
406
- helm/benchmark/scenarios/grammar_scenario.py,sha256=Hz59gp5ivH3tIP5UAcHZbnk8pBX6GhIABSQlG33gIRI,1502
407
- helm/benchmark/scenarios/gsm_scenario.py,sha256=QIj0QK5ncF31ES0GUlxbdBk6SIiJJnj5wzamj0do0tQ,2674
408
- helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py,sha256=8_ShEuOoEGu7iRE2b0tgi-cfBrCPF9k1L-Pgb__n3Bg,2005
409
- helm/benchmark/scenarios/harm_bench_scenario.py,sha256=CBo_AfbtHTlvJdsiquP0EDTKApVmDZc7EW0VTENNAfQ,2478
410
- helm/benchmark/scenarios/headqa_scenario.py,sha256=m6Kqt16JeqA1-OLJvmBPZzhVOVt7O6rbJGAwG9C7FZs,5658
403
+ helm/benchmark/scenarios/grammar_scenario.py,sha256=c3ATPkHM0WkA9QZEf2VNfThhuEUXD448uOuW6CAeVFw,2309
404
+ helm/benchmark/scenarios/gsm_scenario.py,sha256=dwIHWplfz0wCxD8BasRaIoCmG9cfMt3fRF3KhfhjyH0,3579
405
+ helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py,sha256=hBh0ci-WXlAbhiPpsA96XEIkJPC9w_A2DAGRAHrIf9Q,2511
406
+ helm/benchmark/scenarios/harm_bench_scenario.py,sha256=ZFuVusNrbYxLwi57Dx2ACgLY3ydZySFB6PDwh8XP3XU,2949
407
+ helm/benchmark/scenarios/headqa_scenario.py,sha256=0hJewHkF9IKQfW6NUJ0DPjlwQmr7N90a2eSXrBQiFNA,6635
411
408
  helm/benchmark/scenarios/healthqa_br_scenario.py,sha256=YneXTfp8V6k8rYCF3BTX6bxN2ASxdG3qrBr7uH_IFWc,3406
412
- helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py,sha256=iv1khpdiWW0Z7lshyWOhhjRfYFdAU6etN8X5EDEQCrc,1302
413
- helm/benchmark/scenarios/ice_scenario.py,sha256=NCbeqvpDFIIG7kSCrJrS-Z9S3iG2THZ7HpAqghpi_y4,16725
409
+ helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py,sha256=5R9En7lTNirZCVsMNqNB2metw0dIEPa9usoFB9W11B4,1855
410
+ helm/benchmark/scenarios/ice_scenario.py,sha256=tEkXqRtvtXaoC6JfbJOcY0E8xWyYKGMOvsSYJGjM_9Q,17674
414
411
  helm/benchmark/scenarios/ice_scenario_pinned_file_order.py,sha256=fuirubIdi-rkJMfSd7YoDdBX2q0f5K7GGTN4XVapAUY,1613
415
- helm/benchmark/scenarios/ifeval_scenario.py,sha256=SYn9itpFG0tlWSayf6v0P8bRgdtc-BmOV1dF-4TEm-0,1675
412
+ helm/benchmark/scenarios/ifeval_scenario.py,sha256=v2Q1uYCd5i1jO4_gcIlTrbZdPZ27tJrCXi9e0sqcm8s,2308
416
413
  helm/benchmark/scenarios/imdb_ptbr_scenario.py,sha256=laq9UwyvBvZZuo54rf-8SdKTLrMdDHTdGWJ4TdC8Eng,2340
417
- helm/benchmark/scenarios/imdb_scenario.py,sha256=qHXd-QIXTCBq8rWW3N5I2Rvg6Pz9v1zFhZkwc73w9io,6259
414
+ helm/benchmark/scenarios/imdb_scenario.py,sha256=H9iHmKK-q4a5edSMcS166f1fjkNbOS5BEIgR3md3k7M,6887
418
415
  helm/benchmark/scenarios/imdb_scenario_pinned_file_order.py,sha256=fjW0Gkzg2Y3IAbtYJ3KC7MueWd9U8h0tlcBCqxYmRrM,1621
419
- helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py,sha256=JRTLaQc3PDpYeX9ewGnBteT9jXeaGbmJ1VzYGT8TsXI,3067
416
+ helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py,sha256=L_ii0n5vWoLGkwrBcqaAyaaigX-7y6Quu6piXflX8EI,3979
420
417
  helm/benchmark/scenarios/infinite_bench_en_qa_scenario.py,sha256=5fJHFonb7Ko7exHFtoUtvHar_7PhK2HjW9uDlU8Ljj0,2872
421
- helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py,sha256=6z3VlcucrwK2B30artWiSpo-mOTr9tiwYV6Fu8XD0VY,2657
418
+ helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py,sha256=qs3ID1f9Nobba2Mv3DxXzVVJmSU6RxtZW-DMJEAbkRc,3427
422
419
  helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py,sha256=F-gDO6r4GPBJTLirhF5noRaV0edvoIT7tiIDlovBFfE,2253
423
- helm/benchmark/scenarios/koala_scenario.py,sha256=A5M6SD7Jjg7r9QlbHCtMaydBe-wpOtB6oc6gFXuZ47o,1389
424
- helm/benchmark/scenarios/kpi_edgar_scenario.py,sha256=23rZM3IA-phf2VnuPY9QWd64scE6eaJks49apDUNfic,6355
425
- helm/benchmark/scenarios/legal_contract_summarization_scenario.py,sha256=xjw3iKRf8P50Wo58n7ssnFiWHR2QFehzHlZhh9P1XKs,5374
420
+ helm/benchmark/scenarios/koala_scenario.py,sha256=h-dTHQrNVoi7p7sTXZDqWcpjlznfUgxNrgr4nW8Hrk0,2212
421
+ helm/benchmark/scenarios/kpi_edgar_scenario.py,sha256=DE8efUmcPW5R62tZ46Rdsjv-EQs4lXm403O5XxM9heQ,7303
422
+ helm/benchmark/scenarios/legal_contract_summarization_scenario.py,sha256=JTm4Zkwqed7PijdeHzSbQ2l4YZY037OYF_fbnKmlpKg,6185
426
423
  helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py,sha256=q_iezJo23_HNNoIXYT4cLYCbwNzLYJx6uvxgPSE5bQA,2804
427
- helm/benchmark/scenarios/legal_summarization_scenario.py,sha256=BFK524H7uLfz_ZURuRS7KrhzRCP-WyhIcOgdcBrsldA,8709
428
- helm/benchmark/scenarios/legal_support_scenario.py,sha256=Ty93M8yTr_VEHomJ_36htqqBDZZKWI7PHtXA4qkSJGk,3990
429
- helm/benchmark/scenarios/legalbench_scenario.py,sha256=Yfyouxb4ir16zlBea77Xzjc0BlcYPkiXoBoVI38FXwM,4779
430
- helm/benchmark/scenarios/lex_glue_scenario.py,sha256=-3fsSjTXjgRN96Hl4GzDIMB_dlxSR9NR0ATUb-CiU3w,10357
431
- helm/benchmark/scenarios/lextreme_scenario.py,sha256=gVTHoMYX6Q_Itt5rOVO5lYmqWfAtuuf63CnKAF8b_ak,20461
424
+ helm/benchmark/scenarios/legal_summarization_scenario.py,sha256=0DraJdQebbl8tv7S3WmLos98wnQFGJOzY6suGPoxR40,10954
425
+ helm/benchmark/scenarios/legal_support_scenario.py,sha256=cM98PnIAfjQzciUYGtgHqHYnWIdbdJfh3uy6uEIo488,4567
426
+ helm/benchmark/scenarios/legalbench_scenario.py,sha256=TaFwrzJzhPrnHrOV3GF7PYETRR5ywmMmn7oOJtZokZU,5604
427
+ helm/benchmark/scenarios/lex_glue_scenario.py,sha256=H7f3F7gK7bgf6FXvqXGTQrecTE6RtZaitIKmwQLksck,10736
428
+ helm/benchmark/scenarios/lextreme_scenario.py,sha256=dR5UUIymth3J3RInoNybygZg0rNZ-8wwzVHneuTTOGE,20843
432
429
  helm/benchmark/scenarios/live_qa_scenario.py,sha256=TnWaOPOcA4U1_8JdahQOUZ9KBj0MpMf4BcK2TDBl3BE,3666
433
430
  helm/benchmark/scenarios/lm_entry_scenario.py,sha256=kQTnj5gKJmDxCgynmzQOmghwNySpna7aTY7K7RPD2x4,9109
434
431
  helm/benchmark/scenarios/lmkt_scenarios.py,sha256=K51CdOZqMOMOozUmADjrJuNCpUtXVEZwcOeIY-EZrwM,11162
435
- helm/benchmark/scenarios/lsat_qa_scenario.py,sha256=2VUJ36vHUZp6fZuLfRIuPSsU_K6Z3Im2ums06sZENqo,6153
436
- helm/benchmark/scenarios/math_scenario.py,sha256=tW-nGKxyDOwOo2siqu1ZzPrCGzw_lFYGK5uiUK0lF7A,14525
432
+ helm/benchmark/scenarios/lsat_qa_scenario.py,sha256=ZtheFEcsBMSqGIPw5UPOO_b3v93mPFar1yqxVnsLq4E,6785
433
+ helm/benchmark/scenarios/madinah_qa_scenario.py,sha256=W7YEQTHyNWUJD8sKFmXU9e-ubzvleWQs7Cj_1zdq2bk,2482
434
+ helm/benchmark/scenarios/math_scenario.py,sha256=4BBhEvgfqPDrXxxW-4x4I0v3lWjscoLCf9vCURXs7hA,16043
435
+ helm/benchmark/scenarios/mbzuai_human_translated_arabic_mmlu.py,sha256=Gtc9DgV2bLPIDngROmizTWQHbTftnwVodi9CYT0_P2A,2146
437
436
  helm/benchmark/scenarios/me_q_sum_scenario.py,sha256=7DOqQmO70BpDeJy_S4fJ5i2UcCH8tunxzjFgTIim9bQ,4062
438
- helm/benchmark/scenarios/med_dialog_scenario.py,sha256=AE10W1UWhOrgKUnz7e2brKSaQR1WJkQUcPoo4s6n0Fs,7553
439
- helm/benchmark/scenarios/med_mcqa_scenario.py,sha256=XEipvuIA-QoyZrtlm8nnaPuyZzdDaeTskAhnseD3Q68,5096
437
+ helm/benchmark/scenarios/med_dialog_scenario.py,sha256=MKDlZLJEUq1nDRzlkHlpTWOxHwgghWMXcQvHJcM2LP0,8615
438
+ helm/benchmark/scenarios/med_mcqa_scenario.py,sha256=tvF6d6e4WQi_mUIlZoLQvbOpVIfHR4nyMVVR8z4AkAE,5752
440
439
  helm/benchmark/scenarios/med_paragraph_simplification_scenario.py,sha256=0Z1JrizLygjd9v_LLFMk8uZ805IWjJPvg-ZvPVhtMm4,7652
441
- helm/benchmark/scenarios/med_qa_scenario.py,sha256=m0W-FgFi58psLglZyQy_ouMQIDP-2j3aL7uInkdVtms,4478
442
- helm/benchmark/scenarios/medalign_scenario.py,sha256=mhd8REXpPwxftH48-KKb0ZURJ1mdOlvPRmvN4g4M9Ho,3383
440
+ helm/benchmark/scenarios/med_qa_scenario.py,sha256=w7xTavAi7v-xmQXpxXM3Z071qR-eVbj_0yxwILAcLHk,5294
441
+ helm/benchmark/scenarios/medalign_scenario.py,sha256=5ALak5Hq2XQbqwTF3fQYKg-QPtL_vjY7J1UsMm9SOFk,4481
443
442
  helm/benchmark/scenarios/medalign_scenario_helper.py,sha256=fKXJFVLGnLcZKRBLsbjJA6YA4WqMaQAjkEU-i6YzSTQ,11626
444
- helm/benchmark/scenarios/medbullets_scenario.py,sha256=8O0UsPWw-ESkrgiuWz4f8gR99jH5-wS5HtCKYwZ1ycs,6713
445
- helm/benchmark/scenarios/medcalc_bench_scenario.py,sha256=vwmEQZ119tOVeZtl6Zt-nXKwkA8Qt4WRiH2HogIkV0w,5560
446
- helm/benchmark/scenarios/medec_scenario.py,sha256=Lo7iVkek7C9omJ5LX-C83pA_Q5OrAfdNhJY4rslJWTQ,5270
447
- helm/benchmark/scenarios/medhallu_scenario.py,sha256=d4HlEi1cQtvh1a39jvIHezDDmjuIEsSPdqDLLkDTzw4,2544
448
- helm/benchmark/scenarios/medi_qa_scenario.py,sha256=FmXI3UwfbL8zinFPtSyTyw4X5VIe2d32HAg93vbXR94,4118
449
- helm/benchmark/scenarios/medication_qa_scenario.py,sha256=StQmfHTYi8pZLP9FMPzyS-VB9gilZS0XBme7MzAL2QA,2583
443
+ helm/benchmark/scenarios/medbullets_scenario.py,sha256=oMqnF3Ri9dghEWpGQYzfcTnYGMK5b2cJNVpJoqdtdUo,7694
444
+ helm/benchmark/scenarios/medcalc_bench_scenario.py,sha256=EDeeBKmbosUaMo3dg2MNVs_Cb_ws6WfnBYk15_B3lkU,6608
445
+ helm/benchmark/scenarios/medec_scenario.py,sha256=sLx6tcFXcvhDIThGNVi-425znECAn5pkUgRk83CM-Q8,6343
446
+ helm/benchmark/scenarios/medhallu_scenario.py,sha256=0EgeIxGuYMyBzM8xIOF4WcxfCOVqCp-oOuZe4Ai-CRM,3660
447
+ helm/benchmark/scenarios/medhelm_configurable_scenario.py,sha256=vxvvAaIFW4cWaMez1xbEOZBh6S2wEH6Ws8KcGpnaZbs,3852
448
+ helm/benchmark/scenarios/medi_qa_scenario.py,sha256=iv4_GUZJ9mGS7JGOMaPL747ujjrvnmeFjg1LbCpeMLo,5210
449
+ helm/benchmark/scenarios/medication_qa_scenario.py,sha256=uyYxtCm_dX9Jt6X-3ha2gAUyxF55wKn3_k95g7VAzHQ,3636
450
450
  helm/benchmark/scenarios/melt_ir_scenario.py,sha256=d88DEGKVJZCeGnbrXrQZO_W4VJeqW8XNaYc8wIUiJtA,5978
451
451
  helm/benchmark/scenarios/melt_knowledge_scenario.py,sha256=FDG4OGYEV6Ac40VC7KAeikzbFKAK2XXFhH1-QUTw8jo,7923
452
452
  helm/benchmark/scenarios/melt_lm_scenarios.py,sha256=kSm0lRRixhnXctMprPnzi09PLOmgfs-C7TAW3QI8RmE,8969
@@ -454,60 +454,63 @@ helm/benchmark/scenarios/melt_scenarios.py,sha256=_WShDpmPaKrujGbZcazCqleDn0TKDh
454
454
  helm/benchmark/scenarios/melt_srn_scenario.py,sha256=EQSOZIXbfvVWCJMJ4H2e_CiBz6wc8THJndnbK2WwTHM,14674
455
455
  helm/benchmark/scenarios/melt_synthetic_reasoning_scenario.py,sha256=ptMQWgNn6R-XpAVAAjutSdZg_9ZUqG6fVotzAgeead4,7945
456
456
  helm/benchmark/scenarios/melt_translation_scenario.py,sha256=j9YrY60DQHZz4m1MJZaGLzyI6FERlHRx2wy9auyAVB8,5415
457
- helm/benchmark/scenarios/mental_health_scenario.py,sha256=O1Lfd0MxqawLZLKUDSynaqqbaGHRjDglmePIqepnJI4,4961
458
- helm/benchmark/scenarios/mimic_bhc_scenario.py,sha256=GSUlvK-NVPYB83emucc1cPj-HgAQVu2aXGuutfXJUHc,4098
459
- helm/benchmark/scenarios/mimic_rrs_scenario.py,sha256=bxwVWjE_z4I_Nk5eD78g3QAGyjpsNg7DVWpkp8IGWXM,3841
460
- helm/benchmark/scenarios/mimiciv_billing_code_scenario.py,sha256=tZBUZEaUMZvfSlsU6hcPs-pxQ0kDIL6qebGd7JmpDbk,2699
457
+ helm/benchmark/scenarios/mental_health_scenario.py,sha256=dwirS093vIdS1VG5yKqUw863TJoCF_keO-pr7ysTIxA,6066
458
+ helm/benchmark/scenarios/mimic_bhc_scenario.py,sha256=OIDB-f8wyn0ApsPqwpP11yJEpEtSpf3aYc6VVap6Jr8,5275
459
+ helm/benchmark/scenarios/mimic_rrs_scenario.py,sha256=pG_NK1Et0QZosQAOLAxbciyNSq_wIdOT7hkXsBb4mTg,4902
460
+ helm/benchmark/scenarios/mimiciv_billing_code_scenario.py,sha256=KRl1lYX-ITWTGxWS_NNQ0o3I4E__jlzNDhAYvI1by7g,3749
461
461
  helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py,sha256=-OkPMRyB7aO6QBFwoTl6a2rpzcoHeEl84tqz7k9kpCM,2982
462
- helm/benchmark/scenarios/mmlu_pro_scenario.py,sha256=pwpp0wqNhsGc8v2V11aUyEWbwdkmIm-42N676j1T3Ws,4031
463
- helm/benchmark/scenarios/mmlu_scenario.py,sha256=_5cX2uI7CxD7K_GvO3MD8CRJLuN4EzS2o_EFvbrfjSU,3855
462
+ helm/benchmark/scenarios/mmlu_pro_scenario.py,sha256=2FVL-6Umn0BufFpJ0e405q1ZgeeP8Np1kCvsE61GaOE,4686
463
+ helm/benchmark/scenarios/mmlu_scenario.py,sha256=uHJny3NXaqqUfBav30T7ip0FJJ1hxqcUk8spEpUq818,4772
464
464
  helm/benchmark/scenarios/mmmlu_scenario.py,sha256=CyOISLOsXF9IEYGfeqWyYYkWGvrUvGivlWSJ5ttN9qY,2762
465
- helm/benchmark/scenarios/msmarco_scenario.py,sha256=-l7_rIMQjMWcpTyn6dGotmNJ5XxN_Ze8dEJyv5ftWFA,34050
466
- helm/benchmark/scenarios/mtsamples_procedures_scenario.py,sha256=13pXjs9lFduM-QL03mpM10hU0iA8Vr2jJG2FVBQdKOI,5577
467
- helm/benchmark/scenarios/mtsamples_replicate_scenario.py,sha256=RlyWrlI9e5MLsGbkQWpO2WRsIOZJi39xHskOIBypHdo,5399
468
- helm/benchmark/scenarios/n2c2_ct_matching_scenario.py,sha256=-Et7hJnQJOGl1U9Xdb5mLckYTpU_Ve1sCe450M-5haw,13513
469
- helm/benchmark/scenarios/narrativeqa_scenario.py,sha256=MiSq0UnUllJxHFU2gO7m4vr_vmulavJxc4ruZhsAt2U,5632
470
- helm/benchmark/scenarios/natural_qa_scenario.py,sha256=g-fP8L1lXs7zwNVQOc0ZUnbYkCyElQtLVt5fe5dtvSE,12564
465
+ helm/benchmark/scenarios/msmarco_scenario.py,sha256=p9YNL5oTa9isCGVvmqHHVofKmiwitjPQd28ElXmRAN4,35601
466
+ helm/benchmark/scenarios/mtsamples_procedures_scenario.py,sha256=gtVSZxrs321tOolyD0gOoLzc0--uTc--3_HdlBVIuHo,6607
467
+ helm/benchmark/scenarios/mtsamples_replicate_scenario.py,sha256=FIdI509nn0LN9opC4yJ8UsvWmh6-KECUMZF88duIEq0,6395
468
+ helm/benchmark/scenarios/n2c2_ct_matching_scenario.py,sha256=o7RydazvQkYK90epvuXsdEyE02fmpsDEwS6253fNptk,14365
469
+ helm/benchmark/scenarios/narrativeqa_scenario.py,sha256=I4hjn0czmygPLB3tE-VTgCHWC28PaB6BdxL6eSBLL_I,6431
470
+ helm/benchmark/scenarios/natural_qa_scenario.py,sha256=3wkXvYm7m0Isxv2EW6SIuIEwZEV2lihsSLQZaANsKZo,14017
471
471
  helm/benchmark/scenarios/newsqa_scenario.py,sha256=G25VYaLrV_JyyoT0jpzJ6p4l5qsOydm8rlzTvSptNKQ,7284
472
472
  helm/benchmark/scenarios/oab_exams_scenario.py,sha256=vbjUzQP0zU4ckvMbsk4lh24NddVWbUAtfWmsq1h24_w,2101
473
- helm/benchmark/scenarios/omni_math_scenario.py,sha256=5qb2cO-Ibb3kDbwYvkzsoU_aOsoKV3ROLgZbi83OyGU,1955
474
- helm/benchmark/scenarios/open_assistant_scenario.py,sha256=zd8T6eLOlYMZiFyKrRjc-EPwk5_KpbBedAcKDbZ-TdI,5609
475
- helm/benchmark/scenarios/openai_mrcr_scenario.py,sha256=XbO8Wpjjq2e8OsC2s_ZScV4TcZg3hlpVGy56hgxXY9w,3253
473
+ helm/benchmark/scenarios/omni_math_scenario.py,sha256=nB2miRRQ-cWwhpqUkypOZibYugD56wZ299nxE5bty9Q,2582
474
+ helm/benchmark/scenarios/open_assistant_scenario.py,sha256=Z9eyaaHGRtFZTogIkOe1Pr6d70lqSe80tMsNPWR_jog,6577
475
+ helm/benchmark/scenarios/openai_mrcr_scenario.py,sha256=MTzTZVGN-5c6wASKIK5kBFiGywjvAzRR2rjSbgiELhw,4064
476
476
  helm/benchmark/scenarios/opinions_qa_scenario.py,sha256=JK39tq306tKe0RDBDLz1AfAdZwNjK_Ng-rHvu6bTRY4,7395
477
- helm/benchmark/scenarios/pubmed_qa_scenario.py,sha256=73D9D_q4Zw904qfd3tVPPhHxpGN4IZcWjlA6ZHEfp2s,8070
478
- helm/benchmark/scenarios/quac_scenario.py,sha256=RpJpOPbvhB0jv3R91Odc20LcNyZsny9J4IF24GNEygQ,6689
479
- helm/benchmark/scenarios/race_based_med_scenario.py,sha256=vZB43jtM47PWrl9L4HYOf1i7orpscKcHX01m0oVmk2g,5778
480
- helm/benchmark/scenarios/raft_scenario.py,sha256=Yk56dUMqDGXpp6SxoGWhyxa4lAIniSQfivjkoPqMuFA,4644
481
- helm/benchmark/scenarios/real_toxicity_prompts_scenario.py,sha256=zpQthgDi-AyEgOUFO5F0qaWCctLEI5WGHBEGlPEVpqc,2424
477
+ helm/benchmark/scenarios/pubmed_qa_scenario.py,sha256=Z8gR19kiTIugBTvBj6g9LiBXicfAxZ1AFh_GF_axgQc,9043
478
+ helm/benchmark/scenarios/quac_scenario.py,sha256=y5bm1LXHIICqPIkWOg3sibnH_sC15b2zYUfT-_Y0V4E,7349
479
+ helm/benchmark/scenarios/race_based_med_scenario.py,sha256=pyeOUjWlQ30WgNr48BuV7kP7fhqZljLfizbTfWjyV_k,6862
480
+ helm/benchmark/scenarios/raft_scenario.py,sha256=BQ-faIiWBuUYmHTMCRbI8XpymtWvKK8DN6oNejjNi7M,5443
481
+ helm/benchmark/scenarios/real_toxicity_prompts_scenario.py,sha256=USsjBVzoL-Bgq8B2clQvl3d-g4XlOlt8gvBje9VD7Dk,3077
482
482
  helm/benchmark/scenarios/ruler_qa_scenario_helper.py,sha256=jgVf1D4eTSxwxQsW0GBou5hfSo2dnlEJvHpVJqk3BxM,6327
483
- helm/benchmark/scenarios/ruler_qa_scenarios.py,sha256=Dy0INRMzxSiIs9Pm3fa0hYodN-W--WPSv4kcmeQhucM,3270
484
- helm/benchmark/scenarios/scenario.py,sha256=kSy7tmtFeC6-QSEsBuvlrMTA1PB6fOY9jycMld-vBVM,8592
485
- helm/benchmark/scenarios/seahelm_scenario.py,sha256=i8SnuYDQtFGFkm686h3_FF9J3vr-Bd9w_jd7h5tV5yY,78152
486
- helm/benchmark/scenarios/self_instruct_scenario.py,sha256=3Kvi3pLL6eGOEezjoQoGv9c1UxKiRVlFmILKzqst4pI,2309
487
- helm/benchmark/scenarios/shc_bmt_scenario.py,sha256=wF_sD61IZ4RDznBVQ1HYbGh3Vc2qjbcBuU0jdmp1aD8,2803
488
- helm/benchmark/scenarios/shc_cdi_scenario.py,sha256=5aVEiRgFCutEWW9yMcJBxEo11FlwW0SiZTaOyXY6ioc,2693
489
- helm/benchmark/scenarios/shc_conf_scenario.py,sha256=3LDB2pT6yi-ubSooGAD_0Ao7sYLo_MMAHNfm5Ux9Yvk,2889
490
- helm/benchmark/scenarios/shc_ent_scenario.py,sha256=PS_O_keZ5s5_nSKxAC1k_WV2W8umEbyyKmlFtxvaReI,2855
491
- helm/benchmark/scenarios/shc_gip_scenario.py,sha256=cxMpMmS05QpZ4xW2eogPH1hcDv6GzA6UQoAi9OSFO_Q,2702
492
- helm/benchmark/scenarios/shc_privacy_scenario.py,sha256=dbQI_pDqXepV6EyxMUNumIpyQ8oDwnu37qyQ29rxZfY,2998
493
- helm/benchmark/scenarios/shc_proxy_scenario.py,sha256=edepzg5qrN_GKa7u1W0RRhkpmfUi2vFHCvI1ma205WQ,2908
494
- helm/benchmark/scenarios/shc_ptbm_scenario.py,sha256=QOQdz21s_YaRyGz-ciCPHH-fCy6hiGIrHUZz0SWPm5o,3391
483
+ helm/benchmark/scenarios/ruler_qa_scenarios.py,sha256=sUJs9eocWUrnBBOEFK4pUq4KgozL-QTra67zkcTHn1s,5048
484
+ helm/benchmark/scenarios/scenario.py,sha256=6zYT0heGPh1HXmv9l2g360Y3CwcV4xjA6jUq5snNF5I,9482
485
+ helm/benchmark/scenarios/seahelm_scenario.py,sha256=Pgw05ZT9NByV7GL0031vGImbhGOZPrHv8aOR5DmP7sA,94098
486
+ helm/benchmark/scenarios/self_instruct_scenario.py,sha256=uPVclF96zh0P2VJ163nLa7XuTKlMKGaTDFN-6IcdbXQ,3164
487
+ helm/benchmark/scenarios/shc_bmt_scenario.py,sha256=kLnoSmpNaPKUcHDPhS6sDP38TC0YII5dlvEKpiUZYKY,3787
488
+ helm/benchmark/scenarios/shc_cdi_scenario.py,sha256=Fg6PKKLLtmVxuu8pTOAmmoRpPIlFhxWl4VzIUNr7w6Y,3519
489
+ helm/benchmark/scenarios/shc_conf_scenario.py,sha256=605KB8lTHlJh44XwbkilKXXAfJQGD2XVnZJmFoaV4Vw,3948
490
+ helm/benchmark/scenarios/shc_ent_scenario.py,sha256=Sr4E3z0keK69b0DIZ1QFISvG0TsEQ6S567h84eSEHcc,3737
491
+ helm/benchmark/scenarios/shc_gip_scenario.py,sha256=MhQ4mdKMJOtcZJ0gKxoVCg2RVyM8OKfjW_EA3wna_2c,3564
492
+ helm/benchmark/scenarios/shc_privacy_scenario.py,sha256=OTYdD5mifaEZeI84RF5fz3Q10M8cE74H0GR3a7QisAE,3974
493
+ helm/benchmark/scenarios/shc_proxy_scenario.py,sha256=bM_qSCv5Qp_03TiDezgl1gUSSs49IZ_M1L4xZnMzToc,3915
494
+ helm/benchmark/scenarios/shc_ptbm_scenario.py,sha256=BttMbH39uai4qg621W0ySAFX-UtoRLuyEi-f4bfSrFo,4461
495
495
  helm/benchmark/scenarios/shc_sei_scenario.py,sha256=pTcb7n97VkesyRuqUqe5JGed1jDsQEd19udciDras8E,4532
496
- helm/benchmark/scenarios/shc_sequoia_scenario.py,sha256=vjDyRZXP9UjkQzmA6u7SmKtMBuUwwn6KRQ4rT3vZqqc,2796
497
- helm/benchmark/scenarios/simple_safety_tests_scenario.py,sha256=sjIHT5NZlHv_IcXr_15-pOiBUPKKwykyH-QpMfvrHAY,1247
496
+ helm/benchmark/scenarios/shc_sequoia_scenario.py,sha256=IPOuJ74AIWOLDVIQ5lNUjMswcU9zeB_gOXg-K9HLTO4,3703
497
+ helm/benchmark/scenarios/simple_safety_tests_scenario.py,sha256=IjBVVLUG4muHvU_wd-12ML-YZqN4Qe39TLwb7hiYT4c,1743
498
498
  helm/benchmark/scenarios/simple_scenarios.py,sha256=ersSzp9bFEFfpJ-SNy368AuonwswLnuyA1n7FOgkw4U,6459
499
- helm/benchmark/scenarios/spider_scenario.py,sha256=mhiV3XWGwpnIQkaHFM_rvZlrwE7nqS12-F9t1eB8kdI,3306
500
- helm/benchmark/scenarios/starr_patient_instructions_scenario.py,sha256=zdokiMy2Lrg5mS3V2QEakcZyJxIkqcoT5CqVCAtyoKU,4146
501
- helm/benchmark/scenarios/summarization_scenario.py,sha256=WZnqhMQED6UBmRjHSboygdenLecOqIhvgdYVXzy6Q-I,6912
499
+ helm/benchmark/scenarios/situation_prompts.yaml,sha256=nJA3X_I67PIpXgd7LTekWwEr5zn1ryqIHgvqCpAwoGQ,1790
500
+ helm/benchmark/scenarios/spider_scenario.py,sha256=qN1TTLfJmsOdRwf6a-sL9cMzXmJsu09nQPvSqn9L0hk,3932
501
+ helm/benchmark/scenarios/starr_patient_instructions_scenario.py,sha256=ZiXGXeKelEm9NrFsHQS5ft1L4oL6a_IlAJm_flRv-Z4,5228
502
+ helm/benchmark/scenarios/summarization_scenario.py,sha256=wry6hAO_YXk56gS79jJ6HP6VhrRjpExvEZSsl2vM910,8883
502
503
  helm/benchmark/scenarios/sumosum_scenario.py,sha256=HG3wrKj5alV0a2aKb_nau8bB4oKDtTOLtdf3bx8h7sw,7695
503
- helm/benchmark/scenarios/synthetic_efficiency_scenario.py,sha256=OaxEvT1H9VjOjBSw_yKs3dcYt33vFE_UARr-UIP9pBY,3120
504
- helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py,sha256=pt2Aln_dX1YMSl-9hV1HJmwW90MC3fWwGsMxZg-Q-UY,16391
504
+ helm/benchmark/scenarios/synthetic_efficiency_scenario.py,sha256=HbCeVUzPm3miSZoIDivTcAkP-fwi6X4TnyaAx0jUumk,3737
505
+ helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py,sha256=Ir8CVC0aD7Cy7H_ZKGMd1c0iLK-dWbkuMuUl2D7kcR4,17048
505
506
  helm/benchmark/scenarios/synthetic_reasoning_scenario.py,sha256=7STCSHiHGIQ2aaN_PwDE5jXUJ-qcu8PaS4pC-pbOceE,8410
506
507
  helm/benchmark/scenarios/test_air_bench_scenario.py,sha256=9o92CK57xxgPaA9Xt9uJPPie4Cxllzq-KbMt3G35UQ0,1320
507
508
  helm/benchmark/scenarios/test_alghafa_scenario.py,sha256=ARQyzjmEpX_FpN2QLnIB7P-ToAeMtE4dqsolzlq8KPQ,1696
508
- helm/benchmark/scenarios/test_aratrust_scenario.py,sha256=3rsIBfFCAmemNT_IJQ6RZ5liwrHseKGvtGmm2VHoBo0,904
509
+ helm/benchmark/scenarios/test_alrage_scenario.py,sha256=9ofFc05Sy1mdfU9VgHdL_SNp8olJ4ComnZllkMU6itU,6711
510
+ helm/benchmark/scenarios/test_arabic_exams_scenario.py,sha256=nD221WpNE3Ddy-VOdLQGYOHiYVBAcyJxeMc__lVNRLo,985
511
+ helm/benchmark/scenarios/test_aratrust_scenario.py,sha256=6Ks4DA13gU4BAP46qKwPISkqIJw-RiZt4ZhyviXdrUY,918
509
512
  helm/benchmark/scenarios/test_bigcodebench_scenario.py,sha256=q9FWJsxLJoFaB3PSMLjI_-YyPoZYusOsMPwn6X6NKXw,1304
510
- helm/benchmark/scenarios/test_bluex_scenario.py,sha256=fLTyMTmSiJ8MoJKYJ2pcE39yXwZm6zv3_oWsQbRbwH0,1930
513
+ helm/benchmark/scenarios/test_bluex_scenario.py,sha256=QCIqq0GRRrjb55lwLpBiEkDwSFzEZxBKbCQHvyYO_Fk,1928
511
514
  helm/benchmark/scenarios/test_commonsense_scenario.py,sha256=V5Mq4cxWqU6j1U3icfIuzcnCZsZO7NTKLQgF0lEpdyc,924
512
515
  helm/benchmark/scenarios/test_czech_bank_qa_scenario.py,sha256=bZNLEGu58iHmutGlSp-2uVC2931TO6Rxw7giqFh9RHY,828
513
516
  helm/benchmark/scenarios/test_enem_challenge_scenario.py,sha256=XfPkYaSwdGa63ToC_BLuVKTRSldWNBlKsZYK6CFzL3w,2000
@@ -537,19 +540,19 @@ helm/benchmark/scenarios/test_simple_scenarios.py,sha256=9b-gtuRnd638q_JevVlEVsH
537
540
  helm/benchmark/scenarios/test_tweetsentbr_scenario.py,sha256=V6ZsT405ltgC3pYXW-FVN7Z4nGH8ZLiFfh_F9OPXZjw,789
538
541
  helm/benchmark/scenarios/test_wildbench_scenario.py,sha256=pmQ87MNoGAXwAmPf0eoep5qf9hk6BPP2zzgzGuKXwzs,527
539
542
  helm/benchmark/scenarios/test_winogrande_afr_scenario.py,sha256=LZfE4J42BZ7OF3BvfKgMWuCHpdw4-LpWnFiKyrHGXp8,910
540
- helm/benchmark/scenarios/thai_exam_scenario.py,sha256=YjFsom1yiu-xBZ3SGenNuczVCwQcmyoITTMavGv-QEk,6069
541
- helm/benchmark/scenarios/the_pile_scenario.py,sha256=X3GWABiJ5cSoZzeNpgNUVAz7_A9SyM5MhgpJseKpZow,5019
542
- helm/benchmark/scenarios/truthful_qa_scenario.py,sha256=kUQ-Bpu1N1s525EP3pa7v3sp9Wybl0RuJv2pVu0pAGQ,6155
543
+ helm/benchmark/scenarios/thai_exam_scenario.py,sha256=7FALls8tnT5QxC1TT8A0Mce9kmRT-icsQ7SPU4oqWPs,10461
544
+ helm/benchmark/scenarios/the_pile_scenario.py,sha256=Dz51JxxazqPiX_fk6viOav8hQ2n6Iw0LIPhouquu6aw,5632
545
+ helm/benchmark/scenarios/truthful_qa_scenario.py,sha256=0U7q8E9XB0H9oSN3OzhfsiZ-8PJrYXCCC04dffjicB8,6822
543
546
  helm/benchmark/scenarios/tweetsentbr_scenario.py,sha256=ppugbPWd_3hHesLC52QbC-wUknctr9ZX4tmHefnPf6w,2879
544
- helm/benchmark/scenarios/twitter_aae_scenario.py,sha256=wnP-zH38J62zmbdeOLzdU-E3iclbQPApgEk4AGyhdoo,2120
547
+ helm/benchmark/scenarios/twitter_aae_scenario.py,sha256=ydG8MvBF3v6TXHScMK0_-HPAhmPhMWh5G4foBEHDp84,2905
545
548
  helm/benchmark/scenarios/unitxt_scenario.py,sha256=uL8Gni-Uw_eIp9xKQefp4J7XtKSttjJHzJE4USyoC2U,1930
546
549
  helm/benchmark/scenarios/verifiability_judgment_scenario.py,sha256=2iCJplnxdR7NNKjhsLR5o51pL55Q0bcbjjWlvrk5lw4,6067
547
- helm/benchmark/scenarios/vicuna_scenario.py,sha256=RFLUXx4zTfVPl5nT5j_DZ9TuHzk216PQcktomXqqR50,1685
548
- helm/benchmark/scenarios/wikifact_scenario.py,sha256=tTIHk7-xEsi-CGTobcEdbsjVrtAXTZOeWXRVj0hOeWA,5856
550
+ helm/benchmark/scenarios/vicuna_scenario.py,sha256=OWwbFkhgEMHd5YH2G3v2E_p22DmYmPVsDbKKhBbyTDY,2478
551
+ helm/benchmark/scenarios/wikifact_scenario.py,sha256=AHHZz_trtGf8HRoCnE6vukqrTD_Of9XQ1GcrqyctgR0,6702
549
552
  helm/benchmark/scenarios/wikitext_103_scenario.py,sha256=k13TxITriwqoBrMzf-JzPKr5wHaC9M2A_HyxxBaASnk,3111
550
- helm/benchmark/scenarios/wildbench_scenario.py,sha256=Qd9b1SC9ZtY1spf1vVuuFXXVxSJ0FlmR_DP7mIvAs8I,2981
553
+ helm/benchmark/scenarios/wildbench_scenario.py,sha256=dWJSqF06ZWAyZhaejNmrZ0Uu4Vlh5HMdTaMLNkMfe8Q,3668
551
554
  helm/benchmark/scenarios/winogrande_afr_scenario.py,sha256=3SOVyrQ8D7Wzz06uSbczDE-IN4sjKSEAJ7Po-_-O6qw,3131
552
- helm/benchmark/scenarios/wmt_14_scenario.py,sha256=1YYjz4x2RbYfJAXBTux9X30dxYTSC-YNngCCLhEiNfI,4646
555
+ helm/benchmark/scenarios/wmt_14_scenario.py,sha256=7V2AFfd_LlbYTyi-tLXi5YvE8b1zrTxQJ6Z6e1xONso,5401
553
556
  helm/benchmark/scenarios/xstest_scenario.py,sha256=ndRNB5ApW4th5iltlmT9-Nfw9eTaVZQw5AMC4HZCI-k,1309
554
557
  helm/benchmark/scenarios/audio_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
555
558
  helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py,sha256=NtTEHzmbeCicbjTRxPBUueZrBGOPwF6RVc2Yftc-VKs,5634
@@ -573,11 +576,11 @@ helm/benchmark/scenarios/audio_language/mustard_scenario.py,sha256=7YHgfSpua5OdE
573
576
  helm/benchmark/scenarios/audio_language/mutox_scenario.py,sha256=bDCQbhsRDR6iQGNlCu_35kjmjGjuzjOIoraSncfOlOY,10277
574
577
  helm/benchmark/scenarios/audio_language/parade_scenario.py,sha256=UuOa5cSrHh5n3VF_SuJp4cy1MxlI3uEKHLrNEhGuyuw,4186
575
578
  helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py,sha256=oN4vBkElVzjccaEK2JFqoXMCGFTTHD0gcYwSDhvHTpQ,5438
576
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification_scenario.py,sha256=2qzPYfn0YYzzOtffD50kQu_ePpFJj_sSW7Bq8ZS6M2g,3559
577
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_transcription_scenario.py,sha256=TEyfAsas3ihN4b4bpGkbK_M_uDt39fVrL5k8vl2Cdyw,3389
578
- helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py,sha256=qPOP6eIEwxPKu6q5EzcrRmhMxMUQk5F9iq8zdJ1Ccrc,4819
579
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py,sha256=CGteDFCd31vbu_eg5oal1cnfjQ2J0Ty3C2HYyBLhI5M,4186
580
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py,sha256=sL93Q2ERzYiWcTOFEyvjUNbX0BgPdsyHKt6eTr51-Kc,5177
579
+ helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification_scenario.py,sha256=Xw256FUD3mNZRtrnR1N9q5oSbHwGCP9KzLlcNjb5vn4,2740
580
+ helm/benchmark/scenarios/audio_language/ultra_suite_asr_transcription_scenario.py,sha256=MrjlgmeVFhdmvVZclFmOGK0wZDQBFK5E2wBG8bVyj-c,2565
581
+ helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py,sha256=xBrqLDVU-94NNRsByLa8BovFc7fblWa3BO3eftcw-TU,3603
582
+ helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py,sha256=-3ZKJVoNRLpnooI9Nl_cMH250r7Pg0hxtgcjYN4LbSE,3993
583
+ helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py,sha256=2w1RuNMO2I9zhphO0LI5wgYVH9K7gbos_DeBilucakM,3960
581
584
  helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py,sha256=wkKyTCtx4isQSMufap_6DsNdGkHi7L8FQ2p7n58kKYI,3124
582
585
  helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py,sha256=4M_gTWs4CoJ1Ce9dDFBTAe9dzSovpsve_sN1eco2V2A,3155
583
586
  helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py,sha256=L04ee5bM5E0UNNmkwEzVwug4HJXQoIcVjujPgxtU2h0,4366
@@ -600,6 +603,8 @@ helm/benchmark/scenarios/image_generation/radiology_scenario.py,sha256=7JN8OYap8
600
603
  helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py,sha256=DoabanZhd-2MHFDZeR9EoPit0T2TvbVwZGUR0RfJyW0,2362
601
604
  helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py,sha256=IB4_GbzQjjXBp-551XZ6PTNUCRX1jLcGfB3bVFI5lo4,3547
602
605
  helm/benchmark/scenarios/image_generation/winoground_scenario.py,sha256=E2xPQNQzylDSmqLjjMkQB8D7A6g7bzqtSF4bXPgfVbI,2889
606
+ helm/benchmark/scenarios/medhelm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
607
+ helm/benchmark/scenarios/medhelm/judges.yaml,sha256=g_O6zVgOMSL4_f1yNz8muDuUUBzcsM8e5gpfe56eI4Y,663
603
608
  helm/benchmark/scenarios/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
604
609
  helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py,sha256=s-sdEFVx2BgqDFTzuQCCQr4oXaYHUUeQpFgblcCU97I,3052
605
610
  helm/benchmark/scenarios/vision_language/bingo_scenario.py,sha256=6YlGGGZW04Oy5A1-UG8JrN6jwR5eBuzrQ5qAise88o4,4108
@@ -644,7 +649,7 @@ helm/benchmark/scenarios/vision_language/image2struct/webpage/jekyll_server.py,s
644
649
  helm/benchmark/scenarios/vision_language/image2struct/webpage/utils.py,sha256=UYe3PnxCKBYEbZTTEzdIoTY9gW7ZZAWmVISRIdItD-A,940
645
650
  helm/benchmark/static/contamination.yaml,sha256=rAfh1DqwyUcDtyzHPQ2QiUK5eY7QfuuRtBXpZMn4TeA,3171
646
651
  helm/benchmark/static/schema_air_bench.yaml,sha256=LapSMj3Ecl1Gp9XIwVCYfrerqS93GNErvp6oDnBCtgw,142378
647
- helm/benchmark/static/schema_arabic.yaml,sha256=PoudK_u7hV2lalGRvYDI5b89tSfch6Dx_bn5681Um_c,7688
652
+ helm/benchmark/static/schema_arabic.yaml,sha256=Iui-4_M4tV45Xzs3bz0diI3UZwVAuaLAxD5uNhjurgs,8925
648
653
  helm/benchmark/static/schema_audio.yaml,sha256=lVslZX7JmFo0ZgLU4n6amrs9DK8y43Ux0I9QyDUG-14,29119
649
654
  helm/benchmark/static/schema_autobencher.yaml,sha256=yb-NkF5w5R2YOg7RIsadNHJ_5G7lG1gbcDVq_25luEk,5716
650
655
  helm/benchmark/static/schema_call_center.yaml,sha256=i30aFzWqdOJRyAHN8vAzyHEX1v95DEK0TI1SMKTN4TE,9106
@@ -662,13 +667,14 @@ helm/benchmark/static/schema_image2struct.yaml,sha256=cD1X99YcPI8BMAnNfDmXlM-FN0
662
667
  helm/benchmark/static/schema_instruction_following.yaml,sha256=mYLpMv-iNtsmrv9ewfN9ceDOBBg8nSxOWfc6ByATmIk,6056
663
668
  helm/benchmark/static/schema_legal.yaml,sha256=RpoFOuVSIowNgxlPn3UMfJC-68RFr3CGDciUGLPfVqc,28806
664
669
  helm/benchmark/static/schema_lite.yaml,sha256=rFSoG7zGPNOtKkJyGgOViWf5WJbMiJMAXrgmqCAi9X4,36611
665
- helm/benchmark/static/schema_long_context.yaml,sha256=3YjlNkQBgp4hS4PE1EjZvjpvX9v4QjaBPALtOYLpPCs,11486
666
- helm/benchmark/static/schema_medhelm.yaml,sha256=84BrIengbq0m42ICWvyEWoYtdERR-8J8-8QbPOqUzvA,50747
670
+ helm/benchmark/static/schema_long_context.yaml,sha256=NH7poEOCpmbqOZvbHeWrnSB7MWavh7EX2NU-Yl-nXNg,10829
671
+ helm/benchmark/static/schema_medhelm.yaml,sha256=e3vVHdEXcS-joOUMUoIoFA3x9hEE__svDoajbjfqpLE,51793
667
672
  helm/benchmark/static/schema_melt.yaml,sha256=mmPqwDa26DVZXsRJkmKQSyD0OStvjlxaMoSPM25SpD4,47494
668
673
  helm/benchmark/static/schema_mmlu.yaml,sha256=KI3XnzEwBRpzfYGjP77yKL-hBklEg72D3vL0kVl1BeI,49666
669
674
  helm/benchmark/static/schema_mmlu_winogrande_afr.yaml,sha256=YIVYf-mOFPq82UVBdMhnCWNOr4sV8Oi3-ozOszJ2tWQ,40143
670
675
  helm/benchmark/static/schema_safety.yaml,sha256=7RfZDX4wr8Xr1BJ149ZwmplPzPkNL0-BKbEZuzUsl_0,9278
671
676
  helm/benchmark/static/schema_seahelm.yaml,sha256=9XF9Rlr7I-g-uW6R0LNh7Xg52Xs3_058QybXEiN-hnM,28296
677
+ helm/benchmark/static/schema_slp.yaml,sha256=5AV2leKoSBZwP3rIfXcwiqqpXPQbyWjXKE5kU73IAt4,7122
672
678
  helm/benchmark/static/schema_slphelm.yaml,sha256=3avOfp-ZEmVRGei3_M_WX6cSP5hQjbfHsDr1XrjayMY,5294
673
679
  helm/benchmark/static/schema_social_audio.yaml,sha256=Nj3ORXDT4RHD52cyo1RHfueWwbhqp1qW06TaVJ2lUfE,8653
674
680
  helm/benchmark/static/schema_sql.yaml,sha256=8rRff6p_i1CsH7oDbUjau2qRWbLGspuM1Hy-g5pOQiU,6047
@@ -680,25 +686,26 @@ helm/benchmark/static/schema_vhelm.yaml,sha256=0slYep2eepUefgtK_m4iSS785sHdJzljm
680
686
  helm/benchmark/static/schema_vhelm_lite.yaml,sha256=4I68Em9q5wW8sFzj5GCJz8m49fBEuMyVmSZM0-wbfOk,4024
681
687
  helm/benchmark/static/schema_video.yaml,sha256=FkpI5Slc4w-ty4hns82ArXIvTdqppWDnkJSpIp74QN4,9713
682
688
  helm/benchmark/static_build/config.js,sha256=o98g6QSly1NAfqhYWbU4lEoZB4LEpIrePZtmimiuoXc,165
683
- helm/benchmark/static_build/index.html,sha256=hlkvPO8WVcvIJXentHj3Kn5Cd3QwOoi7OqRAou0pRVQ,1178
684
- helm/benchmark/static_build/assets/air-overview-d2e6c49f.png,sha256=0ubEn4J0T51-jx7IlwjaEGSrofZWlW_e67MJw47Ujzg,733055
685
- helm/benchmark/static_build/assets/crfm-logo-74391ab8.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
686
- helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png,sha256=Pl46pKbC_TU3L6kZQ_3G-0wTseluAhIYwb3EqpdQAjQ,1344452
687
- helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
688
- helm/benchmark/static_build/assets/helm-safety-2907a7b6.png,sha256=KQentq_1e3uGwiWMViAPxHu2XZ60gqFgovP3UWTyMmw,72312
689
- helm/benchmark/static_build/assets/helmhero-28e90f4d.png,sha256=KOkPTf-q28PdvGOBp1G5O4q1eWUJjuij3z2h_SUUf8s,55314
690
- helm/benchmark/static_build/assets/index-b9779128.css,sha256=uXeRKCUzQAC32ofNoaK3-WC7kRWR--KnR6--1m9NdQA,491471
691
- helm/benchmark/static_build/assets/index-e439d5e1.js,sha256=t7AnJSBjGs43kxIev2uLVumaInyBUxad9KVtvA86oUw,124597
692
- helm/benchmark/static_build/assets/medhelm-overview-eac29843.png,sha256=6sKYQ79cN07-cUsnt-JPsdoVwUBWu5KxOaHWSdwjdgA,284408
693
- helm/benchmark/static_build/assets/medhelm-v1-overview-3ddfcd65.png,sha256=Pd_NZfAf1ZeU2BIGx9zNT6WmypZNP2bk5z5AxDkbwoU,270625
694
- helm/benchmark/static_build/assets/overview-74aea3d8.png,sha256=dK6j2Nn3j9O-FMUIVRT5HGBpR_GL78vrKi8oHdG1eaI,74685
695
- helm/benchmark/static_build/assets/process-flow-bd2eba96.png,sha256=vS66lq700aPEKTJR7maMrmepAyBZySaL42tBNCRjFWA,190822
696
- helm/benchmark/static_build/assets/react-f82877fd.js,sha256=ijg4n6eANaZKXPWIVTQITqrtf-zzicjslJMm6DniDkA,275149
697
- helm/benchmark/static_build/assets/recharts-4037aff0.js,sha256=SP08CFvsw8cMMMMdqcXvsLviuOxkAhXGwvUIMvYUdxk,432466
698
- helm/benchmark/static_build/assets/tremor-38a10867.js,sha256=prOrg5S4EeKHSd6RkgnBIbVfXIUq3xjeVE0MRdqvenI,293019
699
- helm/benchmark/static_build/assets/vhelm-aspects-1437d673.png,sha256=FDfWcwGcJhJco4qmZli_ROomLiASrrnsX-wtKSDvMkc,542231
700
- helm/benchmark/static_build/assets/vhelm-framework-a1ca3f3f.png,sha256=oco_P6kwqp0cC3YaT_2H2RhJ6p1sh3sEQq3R0RA_cT0,71934
701
- helm/benchmark/static_build/assets/vhelm-model-8afb7616.png,sha256=ivt2FhDk8dwnzp1MAle5WfbXzht_Mxg4rpy-xHRybjs,180285
689
+ helm/benchmark/static_build/index.html,sha256=BaMObuai-TufVapXx7P4wX8ZGvoQuyQh4bdD2ZDukoE,1185
690
+ helm/benchmark/static_build/assets/air-overview-DpBbyagA.png,sha256=0ubEn4J0T51-jx7IlwjaEGSrofZWlW_e67MJw47Ujzg,733055
691
+ helm/benchmark/static_build/assets/audio-table-Dn5NMMeJ.png,sha256=_I8OI_2Fy_Vkmxl74qbSKtOb-C5mbHMye4JaC6LylDk,377331
692
+ helm/benchmark/static_build/assets/crfm-logo-Du4T1uWZ.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
693
+ helm/benchmark/static_build/assets/heim-logo-BJtQlEbV.png,sha256=Pl46pKbC_TU3L6kZQ_3G-0wTseluAhIYwb3EqpdQAjQ,1344452
694
+ helm/benchmark/static_build/assets/helm-logo-simple-DzOhNN41.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
695
+ helm/benchmark/static_build/assets/helm-safety-COfndXuS.png,sha256=KQentq_1e3uGwiWMViAPxHu2XZ60gqFgovP3UWTyMmw,72312
696
+ helm/benchmark/static_build/assets/helmhero-D9TvmJsp.png,sha256=KOkPTf-q28PdvGOBp1G5O4q1eWUJjuij3z2h_SUUf8s,55314
697
+ helm/benchmark/static_build/assets/index-oIeiQW2g.css,sha256=k1JZXkXPFsUerOZ37oDhxjcb1ypOFEdDogJUP6H-NAQ,491553
698
+ helm/benchmark/static_build/assets/index-qOFpOyHb.js,sha256=AwlcCxBOsMoY19-58uUNxyZC9llXwzDheC5ARsE98kM,129215
699
+ helm/benchmark/static_build/assets/medhelm-overview-CND0EIsy.png,sha256=6sKYQ79cN07-cUsnt-JPsdoVwUBWu5KxOaHWSdwjdgA,284408
700
+ helm/benchmark/static_build/assets/medhelm-v1-overview-Cu2tphBB.png,sha256=Pd_NZfAf1ZeU2BIGx9zNT6WmypZNP2bk5z5AxDkbwoU,270625
701
+ helm/benchmark/static_build/assets/overview-BwypNWnk.png,sha256=dK6j2Nn3j9O-FMUIVRT5HGBpR_GL78vrKi8oHdG1eaI,74685
702
+ helm/benchmark/static_build/assets/process-flow-DWDJC733.png,sha256=vS66lq700aPEKTJR7maMrmepAyBZySaL42tBNCRjFWA,190822
703
+ helm/benchmark/static_build/assets/react-BteFIppM.js,sha256=rtvePuxI4R_ecUu6MekBI3bolSJCKhriCQYdFqg6HuI,275079
704
+ helm/benchmark/static_build/assets/recharts-DxuQtTOs.js,sha256=h1N20jF_qA400VP6AQSdA1GhlNsFEuXqQk5hMpInUjg,430871
705
+ helm/benchmark/static_build/assets/tremor-DR4fE7ko.js,sha256=xL2aEMpCmZYl2FDAA6G2MOjOIjfwp4v40hxilO10j1I,288436
706
+ helm/benchmark/static_build/assets/vhelm-aspects-NiDQofvP.png,sha256=FDfWcwGcJhJco4qmZli_ROomLiASrrnsX-wtKSDvMkc,542231
707
+ helm/benchmark/static_build/assets/vhelm-framework-NxJE4fdA.png,sha256=oco_P6kwqp0cC3YaT_2H2RhJ6p1sh3sEQq3R0RA_cT0,71934
708
+ helm/benchmark/static_build/assets/vhelm-model-ypCL5Yvq.png,sha256=ivt2FhDk8dwnzp1MAle5WfbXzht_Mxg4rpy-xHRybjs,180285
702
709
  helm/benchmark/window_services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
703
710
  helm/benchmark/window_services/default_window_service.py,sha256=HlLI3be8s-GNxDygNGrvo9exEhbrO8Vtr3w0rnSIx7M,181
704
711
  helm/benchmark/window_services/encoder_decoder_window_service.py,sha256=wfdydJY6AmpYCfAv5PQu9D6nFXbuxIRum7Tsv0DemJE,2148
@@ -731,89 +738,115 @@ helm/benchmark/window_services/image_generation/openai_dalle_window_service.py,s
731
738
  helm/benchmark/window_services/image_generation/test_clip_window_service.py,sha256=domn2MRduHVAdruSUuGPDIGKyDrh-gFxW-fZaBYR7cg,1430
732
739
  helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py,sha256=nSyKK-cQxZnase3Bw4X6DyAWZEy1OZi4stDZpKtolF4,1411
733
740
  helm/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
734
- helm/clients/ai21_client.py,sha256=RAXQufajYnxr3b_1Hl-wAZkeE_j6O8zX-vngWEits6c,8158
741
+ helm/clients/ai21_client.py,sha256=wDhdlPvmRDqY4v5bLzL1TDu-HwZ9vuqEy2FbmH-jg7A,8239
735
742
  helm/clients/ai21_utils.py,sha256=mlg3h615kyckccGZv9rqsP4Y60O3XpwyE-UURRMrxII,471
736
- helm/clients/aleph_alpha_client.py,sha256=BK2eQIHYMxLMsZNWld85ZCj17JAoy5lU7rHuSBa4fOM,4981
737
- helm/clients/anthropic_client.py,sha256=R85gLYrheN2YWSGTnf3pkYTjCkTl300ktdlGLe1_1-o,36181
743
+ helm/clients/aleph_alpha_client.py,sha256=yqVBGDg5N_py0CB02GezK3lwJ2j2bFLm5qATO_d7R5M,5062
744
+ helm/clients/anthropic_client.py,sha256=XEHfZL8jc8ii8RC4ZTnxUdLi6c5gk9TYHMLOS4laJDI,36492
738
745
  helm/clients/auto_client.py,sha256=J5bCxIDZJUdV1dCv_EtbvwPzd1p2Ogtg207vpb3PhgI,11624
739
746
  helm/clients/azure_openai_client.py,sha256=mZ0udOAjadp7ZyE2KEtq8XuQp45eHlX_qM_getyzbA0,2009
740
- helm/clients/bedrock_client.py,sha256=sXxzNTs3pwVIwvir5lyJWLRajI9p2lMiJq21XsZ_FZo,12267
747
+ helm/clients/bedrock_client.py,sha256=CswF6TOq4M3We-ukDY4kjTUBiPUkdyczinZEYeZAdVI,12352
741
748
  helm/clients/bedrock_utils.py,sha256=8ZZfyOuZkgxL_naJ-wwBnH4GKv425fu3MfyakGHxeb4,3764
742
749
  helm/clients/client.py,sha256=fWJ_Eg4NyhPqlvpDvM7AjWN7cr2LU2uWdsnENLJXlTs,8963
743
750
  helm/clients/clip_score_client.py,sha256=ct3GHZ2Zh3fGwyvQ9DyoIPT6PwDPI-nUaFkUFuc8PIE,1622
744
- helm/clients/cohere_client.py,sha256=edQO5raoJYmYzfVREqHhNvjTcqPevG0M8EPMLOANqXY,10975
751
+ helm/clients/cohere_client.py,sha256=HQ8MeQKZVa-A3zZhpGK3sGzxJ8uE7EsJWCINzZus-kI,11082
745
752
  helm/clients/cohere_utils.py,sha256=aYmj60m0e9RF9BIdxp1vmA-uZv17TEALw0dbgTUSpCc,504
746
753
  helm/clients/gcs_client.py,sha256=1sK5x5uWtThgz9gqBLaA8oyiXGD_9nn1WyfMzJRyPQ8,3231
747
- helm/clients/google_client.py,sha256=mIaUzK7GHCa9pqK1BEVhdt6dZsJfHv1Qdsf3I0Ayq8A,2912
754
+ helm/clients/google_client.py,sha256=8O-98kt7SLSZ-_bkPMTBdQPsxTb_UgfFrKV8tXidUuU,2993
748
755
  helm/clients/google_translate_client.py,sha256=TgiQEscjOae58Ptgp9f4n0LXUtl1Jf6v9BI-Z1_wcuw,1304
749
756
  helm/clients/grok_client.py,sha256=SbVB6AduTwfElzUgEMnQW2kQUFVTCv4TpPPJvElQEe0,1127
750
- helm/clients/http_model_client.py,sha256=_F3_y2UWqbzESQdzV0FMEsECIKjporVSAW6iUQhJ35c,2818
751
- helm/clients/huggingface_client.py,sha256=oWR4yNFk28nrnB3IoznrhcEuU0pZkNywP0E82z1-NGM,17671
757
+ helm/clients/http_model_client.py,sha256=rDCzfugWoZegOSt-ZimKePYs4YA3SZ9CJ5Xuycjuccc,2899
758
+ helm/clients/huggingface_client.py,sha256=a4QV2h-voZ5MC0agrM4AibLdVUbyNWp1Pk4XUFMVc04,17709
752
759
  helm/clients/huggingface_pipeline_client.py,sha256=ivFTMNHBwwIUjkeOHkl-veZi5nNAjtnkYvneRFWs-6Q,6154
753
- helm/clients/ibm_client.py,sha256=4W4fbjnDNjXrP4gVwSfBHPus0QcqFOQzFvfaST1BE1Y,9701
760
+ helm/clients/ibm_client.py,sha256=9pQh0Ho5DN27bHYt6NrUHVdgD-iQyP8m_-OlraYWxok,9765
754
761
  helm/clients/lit_gpt_client.py,sha256=pgLfSvusNpdj8F5DVxzQdHxTDRNX4RVt6unegao803U,6229
755
762
  helm/clients/lit_gpt_generate.py,sha256=8DdBE9ReQ00NbV3KMFYc--PlO9X-HMOR0Rhm5CADWEA,3103
756
- helm/clients/megatron_client.py,sha256=KFL1BBBDqxr5mtd5iu0dA6uK8_v6d4g_D6RsZrHx3a0,4107
757
- helm/clients/mistral_client.py,sha256=ceM8KLAcniAqK1BNVdUGzqy4av2SEEau6PVmPivxc0o,8369
758
- helm/clients/moderation_api_client.py,sha256=I5pYWRb2MmcLDYrScnC3P5N7OUFzQiVQ828_hf7zjM4,4719
763
+ helm/clients/megatron_client.py,sha256=Zk80yeDFWSFDy2ILtuOC5hs9ruH-AUDhxZiMWw_IJi8,4188
764
+ helm/clients/mistral_client.py,sha256=Nlh97asTsMSHo7-m1JgbYdqnUjSeQ83spaNeHVNkvzg,8454
765
+ helm/clients/moderation_api_client.py,sha256=1lB875B5F2I32u8j-Q8DoA9CQA5-kMJ6RnPfkOqS6AQ,4800
759
766
  helm/clients/nvidia_nim_client.py,sha256=Z1UAqR2jHacIO_QGqQl1JUZ_82JiSPstBOtj6xURmQk,902
760
767
  helm/clients/open_lm_client.py,sha256=qFgYqlV_3UiW8WJKz66lLqRqg2jt1qtJ1bHMRAtBn40,1749
761
- helm/clients/openai_client.py,sha256=prSASL8IE3XO0_CaAuZ45iYSITAMQIwtD2q7UqM4qjA,27803
762
- helm/clients/openai_responses_client.py,sha256=aixsZwO_swP4dhOhJPe1ZcOIav3rxmovPGY0Ug6s5ZA,7308
763
- helm/clients/palmyra_client.py,sha256=4AaZcV2tPHU4HJ9FWSkOY8_C9ndEckH3PH715QxJQ8E,7086
768
+ helm/clients/openai_client.py,sha256=f_RY84FPNt04vfR7py4iXDr9i6cB7824v9PYwTna-Q0,28650
769
+ helm/clients/openai_responses_client.py,sha256=FhQcOcXNZc5AuDMh1KBD3ZoRdEREy73dIeFBjUg9YDo,8444
770
+ helm/clients/openrouter_client.py,sha256=oK8gXBhBs1y0AriZ9tVp8kx5lSY7gUgQJv-mfywSTfI,980
771
+ helm/clients/palmyra_client.py,sha256=_dUeVY-64C94aJdbgzpGWy6b2AbmIxCG4ZqTSgRRLcg,7128
764
772
  helm/clients/perspective_api_client.py,sha256=o_1FFTCrTny6AZ4EJTstX1H9t8SQSQ8dvhi321RTcL4,6105
765
- helm/clients/reka_client.py,sha256=hA0tq3Hc9669q2sYa4Jr5yWy2NAbvoFDnVqQ6vds62w,8334
773
+ helm/clients/reka_client.py,sha256=6FNiH7b8ADO8NHS7759rDeIGGbgVFagpDZ7_u_rYgaA,8376
766
774
  helm/clients/simple_client.py,sha256=55S_y1eWD1bjktcG21Vs8G5bF6QbKKwmJyqs6lCUJeI,2048
767
- helm/clients/stanfordhealthcare_azure_openai_client.py,sha256=NGbeI6sMenmgqPQTWxYF3C1Aen29LybRcHcsmS3Jqmg,2059
775
+ helm/clients/stanfordhealthcare_azure_openai_client.py,sha256=EhgDbDoDNwTow65jea4dJNqnBn5CjYUl_N1MueeB33g,2057
768
776
  helm/clients/stanfordhealthcare_claude_client.py,sha256=ShhbLttPDRa-Pnvr35_2WmVx5s0XpsJMGzu5qhzLoLI,1020
769
777
  helm/clients/stanfordhealthcare_google_client.py,sha256=cJK_uH-YBQpBJsltNuiUi0x77bh0eCM5UNBaJQ1zai4,1475
770
- helm/clients/stanfordhealthcare_http_model_client.py,sha256=LEq1fIxHnTnwCsvkF1AUlw6L4Gwv4egx0-PUvNiMNTo,3047
778
+ helm/clients/stanfordhealthcare_http_model_client.py,sha256=2ppahR35twHqxDMb7Dzy2rfjoFVuHZTB05MZJeKOy_8,3128
771
779
  helm/clients/stanfordhealthcare_openai_client.py,sha256=Qyl8voGz1hJPqT6g4PunMuN99EYaW8U-NXQQSgJbiiM,2169
772
780
  helm/clients/stanfordhealthcare_shc_openai_client.py,sha256=V7K4KZaSjIiE0FkoY4qy6ifJ8pUiNa3vBcWiDsIwXFI,1343
773
781
  helm/clients/test_auto_client.py,sha256=bc-rsMJ8JM0MFnQ4B48hBJ1jL3RtRyVvmPwOgzF2mF8,3155
774
782
  helm/clients/test_client.py,sha256=T27UsIPWsbE1JK_8DN_DW9LkEcIGRbgDjio14YOIAb0,3854
775
783
  helm/clients/test_huggingface_client.py,sha256=8Shzrf1Pad1UsiUAdeOSqsTPQaay0CrWXmdNeIfrJ2Y,3418
784
+ helm/clients/test_openrouter_client.py,sha256=gCzchJMQZi4kkgtpGe1Ma0xF2nsP1uDevJcqbprZ6RE,2414
776
785
  helm/clients/test_simple_client.py,sha256=G0JRQX69ypQN2VxhlNQXs5u2Tdtkcl_aeHqudDUVKi4,702
777
786
  helm/clients/test_together_client.py,sha256=kyBLu-2i4EJyuJm5ft0yg8W-H1IqmULRXggEbChuxdo,6178
778
- helm/clients/together_client.py,sha256=ByImeitpWRhXpZ9U6c0Kol1D8X7Fxno5xgo6D7sZYOY,24201
787
+ helm/clients/together_client.py,sha256=Nj1FY1nMN5pYiHOG6lKPinVwgqBCTBT-9kHdgq953KU,25667
779
788
  helm/clients/toxicity_classifier_client.py,sha256=AI_FizxMurubTIyeceRdkixSnhWQbcD-oEEONj5ve7o,464
780
789
  helm/clients/upstage_client.py,sha256=iSL1G8G3jWSbrpacz4I0l6Lwc5T01fsLR-wZzF39ftM,679
781
- helm/clients/vertexai_client.py,sha256=AxZRpZTRrzxwPs2xwKTgHH0eh7WEmHSS1ArTZwI_q3E,23268
790
+ helm/clients/vertexai_client.py,sha256=Hf8ncfCrpoG8ZLl_TRTX5vdxjf0kyzwIaFR8V-qfSEc,23768
782
791
  helm/clients/vllm_client.py,sha256=xmXf35WX2oOZhpQnRxeooXGshENySOHZCUQ1E4pbQbA,2647
783
792
  helm/clients/vllm_granite_thinking_client.py,sha256=fds2i8LUG78OJYke1uYdDy6XRFqE3rZgSornFjzu4Sk,2172
784
- helm/clients/writer_client.py,sha256=flKLeMbFkyGfNmv1ozZGU4dxNy-QF5bFJF0mGHqpU3c,4467
793
+ helm/clients/writer_client.py,sha256=Eyae245YkOcW1yHtVMqLPRRvzddr9IcXZLstr8UO5iw,4552
785
794
  helm/clients/yi_client.py,sha256=nC60d2HiUL2W59FTne9tWmZ9bGGY1OvI7Ob3Ng4wSPE,750
786
795
  helm/clients/audio_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
787
- helm/clients/audio_language/diva_llama_client.py,sha256=Bvcf4wE7yMZlqETgKEMtCug8-2fQI8QCDdaGWSeQ2X8,4864
788
- helm/clients/audio_language/llama_omni_client.py,sha256=OCak716q97uEk9CBXQqnmUsbLFR-dddMzg5eyIZ4gzE,8718
789
- helm/clients/audio_language/qwen2_5_omni_client.py,sha256=ftAVtOG0azvRQEcFjkSSBMU6SDk9Bi8WIks6o6UCbKQ,9684
790
- helm/clients/audio_language/qwen2_audiolm_client.py,sha256=s9eH8fnVgw5xV39b_8AGt6IyNN3q9Uhcx6HZVxt7TM8,8981
791
- helm/clients/audio_language/qwen_audiolm_client.py,sha256=RvYweXANEyzhHYDx38H10F0ZEFaL8kj7n7TZ-UrRmZs,6338
796
+ helm/clients/audio_language/diva_llama_client.py,sha256=NoBeG74AqDLxJXZuyBtdEwXmAprj3cbTjon-_-_C4oY,4945
797
+ helm/clients/audio_language/llama_omni_client.py,sha256=JIOxGd1iCyCP8LPqA5KupwSsXKXb64GYmllZ5UtKgJc,8774
798
+ helm/clients/audio_language/qwen2_5_omni_client.py,sha256=3rWuCKfrvwpRzQoDxWJMhThS0cX1hCg9fCWiyVstXSk,9740
799
+ helm/clients/audio_language/qwen2_audiolm_client.py,sha256=AZKLQY6ipfmpPm2Kg-Ecmm0Nut_Ni6syx1P57XCmYLw,9037
800
+ helm/clients/audio_language/qwen_audiolm_client.py,sha256=j2OQaLnHU5v4IaVZrpzV7D5JFzXYXYHxcLvO1rnO23E,6394
792
801
  helm/clients/audio_language/test.py,sha256=FrKpirOwJW1__E2egq4VPgsTrgiSHZHBwfUCvxNjC0o,1969
802
+ helm/clients/audio_language/llama_omni/arguments.py,sha256=MxzZKE8sNsOe5eUse96gejOsmu_MfTJGiuOwR87xiSA,2334
803
+ helm/clients/audio_language/llama_omni/constants.py,sha256=IjFS9EUI5p1DLtGcX0B1lSxESkxcx5dMbuMkMm1UaSs,183
804
+ helm/clients/audio_language/llama_omni/conversation.py,sha256=SgoMEf1Roi_8ZxiIM6DXwY3ozw0ExOCYdFFX-5rRA0g,6881
805
+ helm/clients/audio_language/llama_omni/preprocess.py,sha256=2-YA6czgO1Zr-C1ChHvqVEfYa8qHhHp6n1Lb1Uw67qg,10764
806
+ helm/clients/audio_language/llama_omni/utils.py,sha256=GycpuTkNEZtMNG2ZTZ7cYVjPEilyC4o2itT9K9kwJFI,7556
807
+ helm/clients/audio_language/llama_omni/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
808
+ helm/clients/audio_language/llama_omni/model/builder.py,sha256=-y7amgUyPMEMknVutSSb_W3Zsm09r3K7u08jgEMinYA,3875
809
+ helm/clients/audio_language/llama_omni/model/omni_speech_arch.py,sha256=-Sgo9fEGHRBfkZrR63i3-uXZ19wkqYbGLqAiDqevRr0,11465
810
+ helm/clients/audio_language/llama_omni/model/language_model/omni_speech2s_llama.py,sha256=CqtEURdHlk6_29iM8WZnsmd7DMrUcnULGD2U2inWIxw,8426
811
+ helm/clients/audio_language/llama_omni/model/language_model/omni_speech_llama.py,sha256=ZmtQY7JT74O4OH78UYSuBnmxq5Hi4-86kEY5-svfU-M,4564
812
+ helm/clients/audio_language/llama_omni/model/speech_encoder/builder.py,sha256=TwSVGfSOA5N82pB2_P4cElN7w_4w2XHBXr9qicluM2w,389
813
+ helm/clients/audio_language/llama_omni/model/speech_encoder/speech_encoder.py,sha256=LF8znt1puoExQ87ovtoyc1-pzO9kWNqTu_CvUWr3nBE,965
814
+ helm/clients/audio_language/llama_omni/model/speech_generator/builder.py,sha256=nIjOSYgJTrdnqDvy5jnYjMcHvpOirAyvMNLuUbnL9pY,358
815
+ helm/clients/audio_language/llama_omni/model/speech_generator/generation.py,sha256=Rka7iVephHHj0z0mPPQLfe-3Tt_UsWbTI7VRevs1ek4,30644
816
+ helm/clients/audio_language/llama_omni/model/speech_generator/speech_generator.py,sha256=mllXYemRl-laMRntRsKtak8SIWEbVfWk0EpxPqs-su0,4612
817
+ helm/clients/audio_language/llama_omni/model/speech_projector/builder.py,sha256=rmzWg4yZIfGpYD7VhfSrRNN7t5U4xNq8TVugq0KLYWc,372
818
+ helm/clients/audio_language/llama_omni/model/speech_projector/speech_projector.py,sha256=naunMdDZXzK8VpASZJYsY6TwvuxQn3Uw9r_MUouUG5k,950
819
+ helm/clients/audio_language/qwen_omni/configuration_qwen2_5_omni.py,sha256=oIaVRv1KlFYPqbT1nPtATgTcVomfNvtHmxnIZ2wcTC4,19088
820
+ helm/clients/audio_language/qwen_omni/modeling_qwen2_5_omni.py,sha256=s08H7EY_TzHqVk1b6DZv_gI4VVwP_ub_FwF6JJu0z-c,180552
821
+ helm/clients/audio_language/qwen_omni/processing_qwen2_5_omni.py,sha256=n8by91xA1xTYz8BfsbYAwCL5G0x1FuLhSGDAP33Qyyw,12216
822
+ helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
823
+ helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/__init__.py,sha256=ZZ5I9X_p1-ttDbYsLBxImO_CxbC5LESLI8AAIe9kKv0,365
824
+ helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/audio_process.py,sha256=VKATc5W9kl0fo9TuU19MaXYSObGxX2V2Fo1NlD4GC4I,2516
825
+ helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/vision_process.py,sha256=TFvQvPiP0X8Zt-agQR84o75LUZp0uXDZAUqUl0vhPM8,14635
793
826
  helm/clients/clip_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
794
827
  helm/clients/clip_scorers/base_clip_scorer.py,sha256=NfXe79g6M4Wype3Xf-oXxscaUgjbZLmy9dRnBaLiWwk,695
795
828
  helm/clients/clip_scorers/clip_scorer.py,sha256=5KzYTrGuy5zA8yHX6c67Is98HLkqQooWhioPxHNLJ7s,1932
796
829
  helm/clients/clip_scorers/multilingual_clip_scorer.py,sha256=LgV1hN6y2FiFQ30UakxRmlwtLs_LCMxrOCewriN1nkk,2066
797
830
  helm/clients/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
798
- helm/clients/image_generation/adobe_vision_client.py,sha256=eFEeuyyQlas9Oz-w7cdQ_mRIwbPJFECwmAu5xkvt3wA,2892
799
- helm/clients/image_generation/aleph_alpha_image_generation_client.py,sha256=6HXLS_kxJouNyLVLfhoepMf5CNIQukWzxkVNV71PJ6Y,4028
800
- helm/clients/image_generation/cogview2_client.py,sha256=ssIrM6-7bbBQq_w6SCE4DtPWnF_S2x_FqMhSjomlBK4,8455
801
- helm/clients/image_generation/dalle2_client.py,sha256=amsgWBzP266UmOYM6QFIxEV4xAybw_OpPVdtH748REk,8430
831
+ helm/clients/image_generation/adobe_vision_client.py,sha256=x8nOWO4oQLy8vp_iiZN0dAKQz2QxEfcDKFgSETH9hHQ,2973
832
+ helm/clients/image_generation/aleph_alpha_image_generation_client.py,sha256=91--D-nax3rzcfBYdinMxtH0xa0uwWZs_4jH_HgOet8,4109
833
+ helm/clients/image_generation/cogview2_client.py,sha256=hRNkJjw9DbqEioiA1PKtg5-GX5zqidSabw-M5lvr57U,8493
834
+ helm/clients/image_generation/dalle2_client.py,sha256=58JACUUO0d9EETqyM7k2eA-YsnmNFhhqtVrAlExrNq4,8515
802
835
  helm/clients/image_generation/dalle3_client.py,sha256=sabS7AJ6O5ewmTkGmHr4cK14tlMcmh-xrGgj7J-xa3k,4639
803
- helm/clients/image_generation/dalle_mini_client.py,sha256=wTXW79KJ5tCku46AiotA2x0vQAqKi6g4aX05n-Qm_pQ,8175
804
- helm/clients/image_generation/deep_floyd_client.py,sha256=P566sdeBvLLv6vi9Y4IbtEfmoQykMDh0viUeRXY7Ik0,3028
805
- helm/clients/image_generation/huggingface_diffusers_client.py,sha256=OgkWWNGquu0xTKVSAduAMz8T_pWG-SwwG9iZegPrXEw,12313
836
+ helm/clients/image_generation/dalle_mini_client.py,sha256=axO4mmBZQ22juEwqYFdiFBtH6cbqweXbwmLKy5d-03Y,8213
837
+ helm/clients/image_generation/deep_floyd_client.py,sha256=scEifSsu2fRD08rHzHhSBjHRbaYnKDSC_Z8I2VQXO3E,3109
838
+ helm/clients/image_generation/huggingface_diffusers_client.py,sha256=atj0YBQYHgrTzCkHFZVhNC2SXdgvWEc0Yg-62475xQo,12352
806
839
  helm/clients/image_generation/image_generation_client_utils.py,sha256=N130PbHLLvE9Q1iVefPvTCJzs3hG3osZCeYdJyjLjCw,437
807
- helm/clients/image_generation/lexica_client.py,sha256=zyyfxZdTiBopPLZJ-uu5ewQYf9tUhtrEIHZddq8gCBI,3681
808
- helm/clients/image_generation/mindalle_client.py,sha256=IH9XM6n-ZHXyidTG66ew-pAgb9XaXLu-_AD_-YBydU8,4653
840
+ helm/clients/image_generation/lexica_client.py,sha256=7uM9Zq5JXbsjriJyYnVA_S6_3xCKKyGw-lMZAKtfENo,3762
841
+ helm/clients/image_generation/mindalle_client.py,sha256=6YWzCjyV5ELRvmIiq-WjHO-rVdOulcC9PH7ughy-H8s,4692
809
842
  helm/clients/image_generation/nudity_check_client.py,sha256=TeFga6HvBKgdX7LitBoioXUD4BQGavVwzr5BFFE29x8,2599
810
- helm/clients/image_generation/together_image_generation_client.py,sha256=onvBeRFuuuzpAVg5lZAbnUzmv5L5HwsPwRcflDPqWUI,4393
843
+ helm/clients/image_generation/together_image_generation_client.py,sha256=MkVwuK9iTGjMjtnqv3RFJpIm9_RHXb6Ys4te_WRfkO0,4474
811
844
  helm/clients/image_generation/cogview2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
812
845
  helm/clients/image_generation/cogview2/coglm_strategy.py,sha256=P3NU3Z4jsj171PrHPtGDiCRq05kEh-KHjSTgxPDw6R8,3766
813
846
  helm/clients/image_generation/cogview2/coglm_utils.py,sha256=EJPOEQJInCDVi2LHqkjEUsgw6GgVlLDrIptlT9cXk-Y,2900
814
847
  helm/clients/image_generation/cogview2/sr_pipeline/__init__.py,sha256=qWuNwKlcvGwEFcw5932wk_t0_baNwUILIJzQWJjgh2A,488
815
848
  helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py,sha256=1DwcUw9Tb563JpKpkPNIB5Ew1djozvPiGASShffiABk,3716
816
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py,sha256=xYn3acxU4BRdDeRjk98Vj0qq8qqty93kPCLdz-bOMKs,10818
849
+ helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py,sha256=IUTvHpIaaYrH00CQZZX9L45JMRb-twYir99K7LLnOzQ,10819
817
850
  helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py,sha256=OonYjdtNKJo12cNb-t-gFHLXRFxItCXjKgS9YxWAI-k,7718
818
851
  helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py,sha256=LSvAHRupsOqk3yb4GxyTsubRxrnPOEfObFym2j4eiKc,5120
819
852
  helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py,sha256=5D1QWyAcY0CpwITk7EBN6ylUtc7mvZaE9iHG628AqMQ,10390
@@ -837,7 +870,7 @@ helm/clients/image_generation/mindalle/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JC
837
870
  helm/clients/image_generation/mindalle/models/__init__.py,sha256=1UieFJ0LGinYSB-idy3atl-gFAmS_ouiiGX6TM2Mh-I,8372
838
871
  helm/clients/image_generation/mindalle/models/tokenizer.py,sha256=NFFdLUhoxEkv9SZqU3QIFk0ukaCcn6w_xFWQIRGhZJ4,1190
839
872
  helm/clients/image_generation/mindalle/models/stage1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
840
- helm/clients/image_generation/mindalle/models/stage1/layers.py,sha256=cg5c6KShCMbMmHFuzDBIG-WgIBBkDrG9XvXC1DxqowA,11044
873
+ helm/clients/image_generation/mindalle/models/stage1/layers.py,sha256=Q-yZeB8ZIxwOdQaKpEeBVbwF9nXeQJ2xJhiD6KjqRi4,11046
841
874
  helm/clients/image_generation/mindalle/models/stage1/vqgan.py,sha256=KcarvKoMuPBpP0H8F8W67FogdvHaAQuo9jP3rFRxc5E,4035
842
875
  helm/clients/image_generation/mindalle/models/stage2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
843
876
  helm/clients/image_generation/mindalle/models/stage2/layers.py,sha256=LvDADun5nMaencaRT0pm-dq78xHpPPkpi8rlu7RLHco,5306
@@ -847,14 +880,14 @@ helm/clients/image_generation/mindalle/utils/config.py,sha256=lh8dXvL7ctKmuYEbeT
847
880
  helm/clients/image_generation/mindalle/utils/sampling.py,sha256=soTHaJrN4FV1lDdh9HMveJs6F49UMK57Xfa0ccnHqI8,5029
848
881
  helm/clients/image_generation/mindalle/utils/utils.py,sha256=ESugpzG-_73GKl07mj-8o-_nim_FOICxfYkczy3s9x4,3119
849
882
  helm/clients/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
850
- helm/clients/vision_language/huggingface_vision2seq_client.py,sha256=S4FDbSO917bUw3cK64xnxwH5HFH_Eb-w2zQ8ZL4eSSk,6588
851
- helm/clients/vision_language/huggingface_vlm_client.py,sha256=OHV41AA-WZo_CnsHymwslgjDcVK0uHmIrvGbrxBDK5w,5000
852
- helm/clients/vision_language/idefics_client.py,sha256=DURync-8rh2ccdlGDPl3NMgryBcMn5yCrrmFZisf5m0,7784
853
- helm/clients/vision_language/open_flamingo_client.py,sha256=QH6el-wkEl4PMZM9b3_H-o2PRaMvumGbN29ee9dmkMU,6519
854
- helm/clients/vision_language/paligemma_client.py,sha256=K9MzXlgjXoiVafA8bbu-mKNt3Z9kq8v8AJL286DyQqI,6867
855
- helm/clients/vision_language/palmyra_vision_client.py,sha256=4elEdmwllMr2qzTzBdlRC8L5Ut3vOXFtanGGYrx4lv8,4074
856
- helm/clients/vision_language/qwen2_vlm_client.py,sha256=jvh_-jyvFL4r3LPX-gWPCYHT503JtJ73FVHQS2KyQ2c,8325
857
- helm/clients/vision_language/qwen_vlm_client.py,sha256=wNxEuYOrhjaW5s4vtdRxKvJ-LCTTGyKqiqD84j7H1Do,7565
883
+ helm/clients/vision_language/huggingface_vision2seq_client.py,sha256=dBf-tQJSwjHjZ3-eOaf2xfpltMRSWfczNLh7_OOPwVw,6640
884
+ helm/clients/vision_language/huggingface_vlm_client.py,sha256=3qQ4Ks2M-CWWn3h-Kl_xIO8Dyd_2Bc9wvvNkufyfMsA,5081
885
+ helm/clients/vision_language/idefics_client.py,sha256=7TNV7JTXeD6A5SZf9CtM7ugnd910B0-fFYhKxu2dA0Q,7836
886
+ helm/clients/vision_language/open_flamingo_client.py,sha256=w-bUzcSlwn_t_pX16HRFM9Vb3GhY3MhEPVShQKd0dKw,6558
887
+ helm/clients/vision_language/paligemma_client.py,sha256=9SW_QYJm8PDmQpT4iDkbIohbMv0f2-QacJpBRpDknN4,6919
888
+ helm/clients/vision_language/palmyra_vision_client.py,sha256=oUKfD_gJnWYs0l7u6axIt5xIoHwRkEMRK4agq50_JGc,4156
889
+ helm/clients/vision_language/qwen2_vlm_client.py,sha256=t_u36ZjZlORnCxaWsx8q5T2eAzKBW2VTRZAIYTHOf6s,8381
890
+ helm/clients/vision_language/qwen_vlm_client.py,sha256=VVhPVI0Xc2BuHQdWWc89jJTVXyscn62DxyzHpKmbmvs,7621
858
891
  helm/clients/vision_language/open_flamingo/__init__.py,sha256=RTxnxjYnTmTZv-608o66_W74qmKLpEO6hx0cxaZaYv8,172
859
892
  helm/clients/vision_language/open_flamingo/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
860
893
  helm/clients/vision_language/open_flamingo/src/factory.py,sha256=4KRXLV5mOEZ34-Foq2zVgTye3sQD-Buz6NZTSp2X9_A,5790
@@ -876,7 +909,7 @@ helm/common/critique_request.py,sha256=DZhJ_sY2IMluOxz-FeHvuEkA2Ujsx65HXT__7T3Ux
876
909
  helm/common/file_upload_request.py,sha256=OZeAW1_zsiNdXnWDwNNvhPs0b48TUmW_e4kzzCYmyiY,543
877
910
  helm/common/general.py,sha256=TcdPXn_bgPFvXtFP2lJhncz4Q8SdTXnKOinHOTBsegw,12027
878
911
  helm/common/gpu_utils.py,sha256=pmLq6ipYNLEm28VxxSNeZuVt-gAw-WnYmBvxP1P1p6M,480
879
- helm/common/hierarchical_logger.py,sha256=qIbhwh-dlCcnYG10qTSMxIMM7_Q9VJj8ymDqnWlseuo,6151
912
+ helm/common/hierarchical_logger.py,sha256=iGVHqCSOlVijjPNvzQDHOdxP8-2ll2PGA2Y5n-u4_sQ,6827
880
913
  helm/common/image_generation_parameters.py,sha256=nsbuk_-BlRMK6IwP5y6BnTXbTRTOcvZ6uLblL5VHLOo,916
881
914
  helm/common/images_utils.py,sha256=8BsN0fd8pc0rh_TSDvippWhTfwmJJXKNF2zqKLB8cps,3372
882
915
  helm/common/key_value_store.py,sha256=D9ZBORzZncf3zHQOP4AuNbQnV8cZpO_kqHY1mDRugqQ,3174
@@ -887,7 +920,7 @@ helm/common/mongo_key_value_store.py,sha256=G0TIWQcvwMjyXh4TnN6xJ462HKHUAZtQJJYQ
887
920
  helm/common/multimodal_request_utils.py,sha256=n6HgTyHNqfGmU9qmVK-wxQzrkPZ5Wdh-lO_y_ln6VYc,2184
888
921
  helm/common/nudity_check_request.py,sha256=VMsujI_RBy5u_cGEk0teE4KyX1dL2Zt3Pb4U6LpBdSY,728
889
922
  helm/common/object_spec.py,sha256=sKcEdggqRa3a8TovHAS4lf1LaahOFInvMl5DUF4tE6c,5186
890
- helm/common/optional_dependencies.py,sha256=Qam3QCHff8tuXbS-fCw-MVe-pK18gSvHw-uQoXXxT7M,616
923
+ helm/common/optional_dependencies.py,sha256=mM5qeuTq6-BiNJPjAsq29olq8_5TOVF-FIK0EeM25Po,618
891
924
  helm/common/perspective_api_request.py,sha256=WAVwtajNVmi5XJNsPcorGEAVrqkpPSk-Kd3b0hJghbA,2427
892
925
  helm/common/reeval_parameters.py,sha256=exaEucXnSI8a076uq_qhO3CTBztMMRoRzL_7v1N4adE,300
893
926
  helm/common/remote_context.py,sha256=DzFMii9AN03CoWp1J3k703-7oQJYHwEf9TDV5YzM6v4,2825
@@ -895,7 +928,7 @@ helm/common/request.py,sha256=HWj6IizIwJm9_NigO-geira_rI6aqhj5CevQB694m94,9161
895
928
  helm/common/response_format.py,sha256=wIptA8FydZoRjMvO5SFIplgDXhwpZvZmFI-Bi-7mcGU,516
896
929
  helm/common/test_cache.py,sha256=j19p-qzv_98X_TMW4b39ZHwSJ-MX3p91PrkYumarS6Y,4870
897
930
  helm/common/test_codec.py,sha256=igL--k-2DwAy0eoMr8D9Xs8MOjBoT0LutbMPzDlTNkM,5885
898
- helm/common/test_general.py,sha256=c8Lh0mK8I-SfcMprq909B6zWRBxSBngq2nNL1L6-cYA,1788
931
+ helm/common/test_general.py,sha256=ZPuRRkMG0gA95GOVxfd4xvtSV-1T09rBj3Huwi72-Ks,1909
899
932
  helm/common/test_logging.py,sha256=tkb_QDPkKBfaEQ5Y8Xip9PgMYhqOFakcENqyzO5Mj2o,2681
900
933
  helm/common/test_media_object.py,sha256=SUWLfms_vkXNivRYM0ZT8AI3_2ru6GON5l-Hb-lk-t0,1661
901
934
  helm/common/tokenization_request.py,sha256=NND9ESiiDE0H8QRNpfHVjXS7MQfKKIwtVRKDIjPnnJM,3344
@@ -904,16 +937,16 @@ helm/common/file_caches/file_cache.py,sha256=QfF1hlF8FQ-rcPn9Zyl6L0dOCokvYgd-dFq
904
937
  helm/common/file_caches/local_file_cache.py,sha256=NiXbat1BBGl5P27oERqSLFfhIHpYqA1IQrvE_N1sWR8,1944
905
938
  helm/common/file_caches/test_local_file_cache.py,sha256=ANb01ctUV-J4i1ab3l4uhg9Ce54U_56xq9Hayjt1WhQ,686
906
939
  helm/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
907
- helm/config/model_deployments.yaml,sha256=sB3cV6io0NzUQXuKlA49-H3UzOEvWpFDP_MZ30gH0I0,171682
908
- helm/config/model_metadata.yaml,sha256=0Ps6WlsgElxOpCHVGiWu7QfS0o3Ls4zi1iuwC8PTUgE,269972
909
- helm/config/tokenizer_configs.yaml,sha256=Vq6MY2nplhYgiyLR98xCXBJWQgEpm64yenrskmkm2NI,40415
940
+ helm/config/model_deployments.yaml,sha256=DNY-6M7CVA0SSvS4nMNpK7zLAPhHDDR4NHimRrFrQjA,182596
941
+ helm/config/model_metadata.yaml,sha256=2IGQlmx6GWHveVjfSsSAn8WMdDLji5OD7d7BK4cDG7o,291347
942
+ helm/config/tokenizer_configs.yaml,sha256=TUuOUkORUVnpiHsPFO9BPcKTM3WPCaFcN8IzIBFsvkg,43555
910
943
  helm/proxy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
911
944
  helm/proxy/accounts.py,sha256=gd5cKhKeqklf_cXCAISl65AUvZeD6afBNrs6WK3IBvQ,14764
912
945
  helm/proxy/cli.py,sha256=kEDoHpisFO0EJ0Wfm1FLpJdP9sXk9j8WCILEq42RKb0,8317
913
- helm/proxy/example_queries.py,sha256=EB2vVpAryOUAFiLrwsMiFz0zGl_UAQ8TJ9SkWngvsu4,4389
946
+ helm/proxy/example_queries.py,sha256=A4JKvLwkHQIprsgMFhGvruW1-Ud4YKNqwUWhv6iWfzw,4449
914
947
  helm/proxy/query.py,sha256=eftbiUICMh8QIHVs-7cLtv_rDXKeKdRPmwjLMu0TDxQ,645
915
948
  helm/proxy/retry.py,sha256=o64BZsW2vwu2iewRA18wdsru2xC3eNBQ7WUw3IjC_5g,3698
916
- helm/proxy/server.py,sha256=Q4Mzts8mketktGVJ5AoOEA-_SGCue5QeOlK8dqPUuHI,10853
949
+ helm/proxy/server.py,sha256=PYG8oMb-lq8eGR3Kad2ZTudJxgY4QH4jVbyoOgjes7I,10904
917
950
  helm/proxy/test_accounts.py,sha256=Vs1iOzTPN29LosDAAEs6IagQ3PccvutrJTlR1qNIcj0,1146
918
951
  helm/proxy/test_retry.py,sha256=db0owyGTThmIMhYWU_Eh1U-AJvQ-Wa9j_kRmC9DNjOA,1059
919
952
  helm/proxy/critique/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -933,9 +966,9 @@ helm/proxy/services/test_remote_service.py,sha256=xzkyptctXw3y5d1fgbidBMyw8B4rIL
933
966
  helm/proxy/services/test_service.py,sha256=oDYen-71iwZ6YMNBVbVSdEFsH6GMvZYw5tS5Eg4YHjY,8987
934
967
  helm/proxy/static/general.js,sha256=qcsntanG5UMWK2vznSVAVFy9zd3BMc8DFfNa7KKezew,3053
935
968
  helm/proxy/static/help.html,sha256=2Rn_lGZspqrZhNfLQ4wIAvYO_BK9q67Q_AS2-3WsMpY,6231
936
- helm/proxy/static/index.css,sha256=1OBOJ87LhwI2PtpoIyZoGQbSxQK2dz2vxk8BVmAybWY,717
969
+ helm/proxy/static/index.css,sha256=3z_JuWVuJFngWtHI4T5-EVyk4LyaCPDcSzlalvUYhmQ,754
937
970
  helm/proxy/static/index.html,sha256=nUJf_hwBPokqrm_hDZsVfHcJrnhZLYhkVSoLdGOocf8,2009
938
- helm/proxy/static/index.js,sha256=-OXgf2rUYI49vg4KhwdL2VygKgzAGoYHKngaWgMb4E0,14996
971
+ helm/proxy/static/index.js,sha256=bCjx29j88UnfoeYL4jRYGaqg7fd6o8IePZ0sTl-HRy8,15292
939
972
  helm/proxy/static/info-icon.png,sha256=P-PW3Ek3NGiRAW5BXOjJRPBfMVqprjAqtQheGWu7zNI,3428
940
973
  helm/proxy/token_counters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
941
974
  helm/proxy/token_counters/auto_token_counter.py,sha256=Ag368Sb-eLQUMLW7lmWc2EOKN3kgkiCTsYnHNrsf9kw,2071
@@ -967,8 +1000,8 @@ helm/tokenizers/yalm_tokenizer_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQ
967
1000
  helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py,sha256=1ZcPL3srfk031LmA8bEdPcIraAPnHGiYi_CqTiJSTlc,904
968
1001
  helm/tokenizers/yalm_tokenizer_data/voc_100b.sp,sha256=LmPD0_OIOXi8dWuNjXUYOSPhf8kPp2xhvK-g3bXcwrQ,2815034
969
1002
  helm/tokenizers/yalm_tokenizer_data/yalm_tokenizer.py,sha256=kH5Qig1_6r_sKbAHinX7C83tqBUoTwbe-gGZCbGVkko,6389
970
- crfm_helm-0.5.7.dist-info/METADATA,sha256=TMyCY6K4C2Z3wO2Jh5XVDq-hHQ1xxCArIm31BUeGbgM,23548
971
- crfm_helm-0.5.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
972
- crfm_helm-0.5.7.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
973
- crfm_helm-0.5.7.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
974
- crfm_helm-0.5.7.dist-info/RECORD,,
1003
+ crfm_helm-0.5.9.dist-info/METADATA,sha256=LuiU5r_-9KfusWA04IwPffmA5KfGYXwZvRR-noA20Ns,18617
1004
+ crfm_helm-0.5.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1005
+ crfm_helm-0.5.9.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
1006
+ crfm_helm-0.5.9.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
1007
+ crfm_helm-0.5.9.dist-info/RECORD,,