crfm-helm 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (606) hide show
  1. crfm_helm-0.5.5.dist-info/METADATA +413 -0
  2. crfm_helm-0.5.5.dist-info/RECORD +894 -0
  3. {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +13 -1
  5. helm/benchmark/adaptation/adapters/adapter_factory.py +15 -1
  6. helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -1
  7. helm/benchmark/adaptation/adapters/chat_adapter.py +49 -0
  8. helm/benchmark/adaptation/adapters/ehr_instruction_adapter.py +108 -0
  9. helm/benchmark/adaptation/adapters/generation_adapter.py +1 -1
  10. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +1 -1
  11. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +1 -1
  12. helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +4 -2
  13. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -1
  14. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +1 -1
  15. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +4 -2
  16. helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +1 -1
  17. helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py +1 -1
  18. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +2 -2
  19. helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py +87 -0
  20. helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -1
  21. helm/benchmark/adaptation/adapters/test_generation_adapter.py +3 -3
  22. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +2 -2
  23. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +2 -2
  24. helm/benchmark/adaptation/common_adapter_specs.py +69 -4
  25. helm/benchmark/adaptation/prompt.py +1 -1
  26. helm/benchmark/annotation/aci_bench_annotator.py +95 -0
  27. helm/benchmark/annotation/air_bench_annotator.py +20 -5
  28. helm/benchmark/annotation/annotator.py +5 -0
  29. helm/benchmark/annotation/annotator_factory.py +3 -20
  30. helm/benchmark/annotation/anthropic_red_team_annotator.py +11 -24
  31. helm/benchmark/annotation/autobencher_capabilities_annotator.py +107 -0
  32. helm/benchmark/annotation/autobencher_safety_annotator.py +98 -0
  33. helm/benchmark/annotation/bigcodebench_annotator.py +108 -0
  34. helm/benchmark/annotation/bird_sql_annotator.py +58 -0
  35. helm/benchmark/annotation/call_center_annotator.py +22 -11
  36. helm/benchmark/annotation/chw_care_plan_annotator.py +98 -0
  37. helm/benchmark/annotation/czech_bank_qa_annotator.py +78 -0
  38. helm/benchmark/annotation/dischargeme_annotator.py +107 -0
  39. helm/benchmark/annotation/ehr_sql_annotator.py +87 -0
  40. helm/benchmark/annotation/harm_bench_annotator.py +11 -24
  41. helm/benchmark/annotation/helpdesk_call_summarization_annotator.py +131 -0
  42. helm/benchmark/annotation/image2struct/image_compiler_annotator.py +6 -1
  43. helm/benchmark/annotation/live_qa_annotator.py +10 -5
  44. helm/benchmark/annotation/med_dialog_annotator.py +99 -0
  45. helm/benchmark/annotation/medalign_annotator.py +100 -0
  46. helm/benchmark/annotation/medi_qa_annotator.py +98 -0
  47. helm/benchmark/annotation/medication_qa_annotator.py +90 -61
  48. helm/benchmark/annotation/mental_health_annotator.py +98 -0
  49. helm/benchmark/annotation/mimic_rrs_annotator.py +100 -0
  50. helm/benchmark/annotation/model_as_judge.py +281 -18
  51. helm/benchmark/annotation/mtsamples_procedures_annotator.py +98 -0
  52. helm/benchmark/annotation/mtsamples_replicate_annotator.py +101 -0
  53. helm/benchmark/annotation/omni_math/gpt_evaluation_template.txt +152 -0
  54. helm/benchmark/annotation/omni_math/gpt_evaluation_zero_shot_template.txt +36 -0
  55. helm/benchmark/annotation/omni_math_annotator.py +132 -0
  56. helm/benchmark/annotation/simple_safety_tests_annotator.py +11 -25
  57. helm/benchmark/annotation/spider_annotator.py +18 -0
  58. helm/benchmark/annotation/starr_patient_instructions_annotator.py +98 -0
  59. helm/benchmark/annotation/wildbench/eval_template.pairwise.v2.md +75 -0
  60. helm/benchmark/annotation/wildbench/eval_template.score.v2.md +66 -0
  61. helm/benchmark/annotation/wildbench_annotator.py +119 -0
  62. helm/benchmark/annotation/xstest_annotator.py +20 -30
  63. helm/benchmark/annotation_executor.py +35 -15
  64. helm/benchmark/augmentations/cleva_perturbation.py +9 -8
  65. helm/benchmark/augmentations/contraction_expansion_perturbation.py +2 -2
  66. helm/benchmark/augmentations/contrast_sets_perturbation.py +2 -2
  67. helm/benchmark/augmentations/dialect_perturbation.py +4 -5
  68. helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
  69. helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
  70. helm/benchmark/augmentations/gender_perturbation.py +2 -2
  71. helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
  72. helm/benchmark/augmentations/mild_mix_perturbation.py +6 -6
  73. helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
  74. helm/benchmark/augmentations/person_name_perturbation.py +4 -5
  75. helm/benchmark/augmentations/perturbation.py +1 -1
  76. helm/benchmark/augmentations/space_perturbation.py +2 -2
  77. helm/benchmark/augmentations/suffix_perturbation.py +2 -2
  78. helm/benchmark/augmentations/synonym_perturbation.py +4 -3
  79. helm/benchmark/augmentations/test_perturbation.py +16 -13
  80. helm/benchmark/augmentations/translate_perturbation.py +2 -2
  81. helm/benchmark/augmentations/typos_perturbation.py +2 -2
  82. helm/benchmark/data_preprocessor.py +2 -2
  83. helm/benchmark/huggingface_registration.py +2 -7
  84. helm/benchmark/metrics/aci_bench_metrics.py +34 -0
  85. helm/benchmark/metrics/basic_metrics.py +6 -6
  86. helm/benchmark/metrics/bbq_metrics.py +2 -2
  87. helm/benchmark/metrics/bias_metrics.py +12 -3
  88. helm/benchmark/metrics/bigcodebench_metrics.py +25 -0
  89. helm/benchmark/metrics/bird_sql_metrics.py +28 -0
  90. helm/benchmark/metrics/chw_care_plan_metrics.py +34 -0
  91. helm/benchmark/metrics/classification_metrics.py +76 -12
  92. helm/benchmark/metrics/cleva_harms_metrics.py +8 -7
  93. helm/benchmark/metrics/code_metrics.py +5 -5
  94. helm/benchmark/metrics/comet_metric.py +125 -0
  95. helm/benchmark/metrics/common_metric_specs.py +9 -2
  96. helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +72 -0
  97. helm/benchmark/metrics/copyright_metrics.py +4 -4
  98. helm/benchmark/metrics/czech_bank_qa_metrics.py +29 -0
  99. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +2 -2
  100. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +2 -2
  101. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +2 -2
  102. helm/benchmark/metrics/dischargeme_metrics.py +34 -0
  103. helm/benchmark/metrics/disinformation_metrics.py +4 -4
  104. helm/benchmark/metrics/dry_run_metrics.py +5 -5
  105. helm/benchmark/metrics/efficiency_metrics.py +3 -3
  106. helm/benchmark/metrics/ehr_sql_metrics.py +103 -0
  107. helm/benchmark/metrics/evaluate_instances_metric.py +3 -3
  108. helm/benchmark/metrics/evaluate_reference_metrics.py +144 -16
  109. helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +103 -0
  110. helm/benchmark/metrics/gpt4_audio_critique_metrics.py +167 -0
  111. helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +36 -0
  112. helm/benchmark/metrics/ifeval/instructions.py +1574 -0
  113. helm/benchmark/metrics/ifeval/instructions_registry.py +182 -0
  114. helm/benchmark/metrics/ifeval/instructions_registry.pyi +3 -0
  115. helm/benchmark/metrics/ifeval/instructions_util.py +153 -0
  116. helm/benchmark/metrics/ifeval_metrics.py +55 -0
  117. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +1 -1
  118. helm/benchmark/metrics/image_generation/detection_metrics.py +1 -1
  119. helm/benchmark/metrics/image_generation/detectors/vitdet.py +1 -1
  120. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +1 -1
  121. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +1 -1
  122. helm/benchmark/metrics/image_generation/nsfw_metrics.py +1 -1
  123. helm/benchmark/metrics/image_generation/q16/test_q16.py +3 -1
  124. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +1 -1
  125. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +2 -2
  126. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +1 -1
  127. helm/benchmark/metrics/image_generation/watermark_metrics.py +1 -1
  128. helm/benchmark/metrics/instruction_following_critique_metrics.py +4 -4
  129. helm/benchmark/metrics/language_modeling_metrics.py +4 -4
  130. helm/benchmark/metrics/machine_translation_metrics.py +2 -2
  131. helm/benchmark/metrics/med_dialog_metrics.py +34 -0
  132. helm/benchmark/metrics/medalign_metrics.py +34 -0
  133. helm/benchmark/metrics/medcalc_bench_metrics.py +124 -0
  134. helm/benchmark/metrics/medec_metrics.py +101 -0
  135. helm/benchmark/metrics/medi_qa_metrics.py +34 -0
  136. helm/benchmark/metrics/medication_qa_metrics.py +15 -4
  137. helm/benchmark/metrics/mental_health_metrics.py +34 -0
  138. helm/benchmark/metrics/metric.py +3 -3
  139. helm/benchmark/metrics/mimic_rrs_metrics.py +34 -0
  140. helm/benchmark/metrics/mimiciv_billing_code_metrics.py +96 -0
  141. helm/benchmark/metrics/mtsamples_procedures_metrics.py +34 -0
  142. helm/benchmark/metrics/mtsamples_replicate_metrics.py +34 -0
  143. helm/benchmark/metrics/nltk_helper.py +32 -0
  144. helm/benchmark/metrics/numeracy_metrics.py +4 -4
  145. helm/benchmark/metrics/omni_math_metrics.py +32 -0
  146. helm/benchmark/metrics/output_processing_metric.py +60 -0
  147. helm/benchmark/metrics/output_processors.py +15 -0
  148. helm/benchmark/metrics/paraphrase_generation_metrics.py +2 -2
  149. helm/benchmark/metrics/ranking_metrics.py +3 -3
  150. helm/benchmark/metrics/reference_metric.py +3 -3
  151. helm/benchmark/metrics/safety_metrics.py +39 -17
  152. helm/benchmark/metrics/{bhasa_metrics.py → seahelm_metrics.py} +3 -3
  153. helm/benchmark/metrics/seahelm_metrics_specs.py +10 -0
  154. helm/benchmark/metrics/spider_metrics.py +7 -0
  155. helm/benchmark/metrics/starr_patient_instructions_metrics.py +34 -0
  156. helm/benchmark/metrics/statistic.py +1 -1
  157. helm/benchmark/metrics/summac/model_summac.py +1 -1
  158. helm/benchmark/metrics/summarization_critique_metrics.py +4 -4
  159. helm/benchmark/metrics/summarization_metrics.py +19 -9
  160. helm/benchmark/metrics/test_bias_metrics.py +5 -1
  161. helm/benchmark/metrics/test_classification_metrics.py +140 -68
  162. helm/benchmark/metrics/test_evaluate_reference_metrics.py +15 -0
  163. helm/benchmark/metrics/test_metric.py +1 -1
  164. helm/benchmark/metrics/test_statistic.py +2 -2
  165. helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py +1 -1
  166. helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +6 -6
  167. helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py +1 -1
  168. helm/benchmark/metrics/tokens/free_token_cost_estimator.py +1 -1
  169. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +1 -1
  170. helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
  171. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +1 -1
  172. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +1 -1
  173. helm/benchmark/metrics/toxicity_metrics.py +4 -4
  174. helm/benchmark/metrics/unitxt_metrics.py +21 -4
  175. helm/benchmark/metrics/vision_language/image_metrics.py +7 -3
  176. helm/benchmark/metrics/wildbench_metrics.py +34 -0
  177. helm/benchmark/model_metadata_registry.py +16 -0
  178. helm/benchmark/presentation/create_plots.py +1 -1
  179. helm/benchmark/presentation/schema.py +3 -0
  180. helm/benchmark/presentation/summarize.py +119 -256
  181. helm/benchmark/presentation/test_summarize.py +145 -3
  182. helm/benchmark/presentation/torr_robustness_summarizer.py +178 -0
  183. helm/benchmark/reeval_run.py +203 -0
  184. helm/benchmark/reeval_runner.py +355 -0
  185. helm/benchmark/run.py +8 -17
  186. helm/benchmark/run_expander.py +105 -8
  187. helm/benchmark/run_spec_factory.py +12 -0
  188. helm/benchmark/run_specs/air_bench_run_specs.py +21 -3
  189. helm/benchmark/run_specs/audio_run_specs.py +613 -0
  190. helm/benchmark/run_specs/call_center_run_specs.py +49 -0
  191. helm/benchmark/run_specs/capabilities_run_specs.py +308 -0
  192. helm/benchmark/run_specs/classic_run_specs.py +1 -69
  193. helm/benchmark/run_specs/enem_challenge_specs.py +31 -0
  194. helm/benchmark/run_specs/enterprise_run_specs.py +260 -0
  195. helm/benchmark/run_specs/experimental_run_specs.py +112 -3
  196. helm/benchmark/run_specs/finance_run_specs.py +6 -2
  197. helm/benchmark/run_specs/imdb_ptbr_run_specs.py +30 -0
  198. helm/benchmark/run_specs/lite_run_specs.py +2 -2
  199. helm/benchmark/run_specs/long_context_run_specs.py +89 -0
  200. helm/benchmark/run_specs/medhelm_run_specs.py +1155 -0
  201. helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py +49 -0
  202. helm/benchmark/run_specs/oab_exams_specs.py +32 -0
  203. helm/benchmark/run_specs/safety_run_specs.py +37 -0
  204. helm/benchmark/run_specs/{bhasa_run_specs.py → seahelm_run_specs.py} +66 -52
  205. helm/benchmark/run_specs/sql_run_specs.py +54 -0
  206. helm/benchmark/run_specs/tweetsentbr_run_specs.py +32 -0
  207. helm/benchmark/run_specs/unitxt_run_specs.py +14 -5
  208. helm/benchmark/run_specs/vlm_run_specs.py +83 -5
  209. helm/benchmark/run_specs/winogrande_afr_run_specs.py +47 -0
  210. helm/benchmark/scenarios/aci_bench_scenario.py +120 -0
  211. helm/benchmark/scenarios/air_bench_scenario.py +6 -1
  212. helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +5 -3
  213. helm/benchmark/scenarios/anthropic_red_team_scenario.py +1 -1
  214. helm/benchmark/scenarios/audio_language/__init__.py +0 -0
  215. helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py +128 -0
  216. helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py +154 -0
  217. helm/benchmark/scenarios/audio_language/ami_scenario.py +96 -0
  218. helm/benchmark/scenarios/audio_language/audio_mnist_scenario.py +62 -0
  219. helm/benchmark/scenarios/audio_language/audio_pairs_scenario.py +62 -0
  220. helm/benchmark/scenarios/audio_language/audiocaps_scenario.py +59 -0
  221. helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py +152 -0
  222. helm/benchmark/scenarios/audio_language/common_voice_15_scenario.py +99 -0
  223. helm/benchmark/scenarios/audio_language/covost2_scenario.py +163 -0
  224. helm/benchmark/scenarios/audio_language/fleurs_fairness_scenario.py +83 -0
  225. helm/benchmark/scenarios/audio_language/fleurs_scenario.py +312 -0
  226. helm/benchmark/scenarios/audio_language/iemocap_audio_scenario.py +83 -0
  227. helm/benchmark/scenarios/audio_language/librispeech_fairness_scenario.py +96 -0
  228. helm/benchmark/scenarios/audio_language/librispeech_scenario.py +80 -0
  229. helm/benchmark/scenarios/audio_language/meld_audio_scenario.py +113 -0
  230. helm/benchmark/scenarios/audio_language/multilingual_librispeech_scenario.py +80 -0
  231. helm/benchmark/scenarios/audio_language/mustard_scenario.py +142 -0
  232. helm/benchmark/scenarios/audio_language/mutox_scenario.py +254 -0
  233. helm/benchmark/scenarios/audio_language/parade_scenario.py +97 -0
  234. helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py +124 -0
  235. helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py +69 -0
  236. helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py +87 -0
  237. helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py +106 -0
  238. helm/benchmark/scenarios/autobencher_capabilities_scenario.py +68 -0
  239. helm/benchmark/scenarios/autobencher_safety_scenario.py +51 -0
  240. helm/benchmark/scenarios/babi_qa_scenario.py +1 -1
  241. helm/benchmark/scenarios/banking77_scenario.py +6 -1
  242. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  243. helm/benchmark/scenarios/big_bench_scenario.py +11 -1
  244. helm/benchmark/scenarios/bigcodebench_scenario.py +58 -0
  245. helm/benchmark/scenarios/bird_sql_scenario.py +94 -0
  246. helm/benchmark/scenarios/bird_sql_scenario_helper.py +118 -0
  247. helm/benchmark/scenarios/blimp_scenario.py +1 -1
  248. helm/benchmark/scenarios/bold_scenario.py +1 -1
  249. helm/benchmark/scenarios/boolq_scenario.py +1 -1
  250. helm/benchmark/scenarios/casehold_scenario.py +79 -0
  251. helm/benchmark/scenarios/chw_care_plan_scenario.py +105 -0
  252. helm/benchmark/scenarios/civil_comments_scenario.py +1 -1
  253. helm/benchmark/scenarios/clear_scenario.py +153 -0
  254. helm/benchmark/scenarios/cleva_scenario.py +2 -2
  255. helm/benchmark/scenarios/code_scenario.py +17 -4
  256. helm/benchmark/scenarios/commonsense_scenario.py +1 -1
  257. helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +97 -0
  258. helm/benchmark/scenarios/copyright_scenario.py +1 -1
  259. helm/benchmark/scenarios/covid_dialog_scenario.py +10 -1
  260. helm/benchmark/scenarios/cti_to_mitre_scenario.py +240 -0
  261. helm/benchmark/scenarios/custom_mcqa_scenario.py +1 -1
  262. helm/benchmark/scenarios/czech_bank_qa_scenario.py +130 -0
  263. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +1 -1
  264. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +1 -1
  265. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +1 -1
  266. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +1 -1
  267. helm/benchmark/scenarios/dialogue_scenarios.py +13 -2
  268. helm/benchmark/scenarios/dischargeme_scenario.py +157 -0
  269. helm/benchmark/scenarios/disinformation_scenario.py +10 -1
  270. helm/benchmark/scenarios/dyck_language_scenario.py +10 -1
  271. helm/benchmark/scenarios/echr_judgment_classification_scenario.py +113 -0
  272. helm/benchmark/scenarios/ehr_sql_scenario.py +131 -0
  273. helm/benchmark/scenarios/ehrshot_scenario.py +1546 -0
  274. helm/benchmark/scenarios/enem_challenge_scenario.py +58 -0
  275. helm/benchmark/scenarios/entity_data_imputation_scenario.py +11 -1
  276. helm/benchmark/scenarios/entity_matching_scenario.py +12 -2
  277. helm/benchmark/scenarios/financial_phrasebank_scenario.py +94 -0
  278. helm/benchmark/scenarios/gold_commodity_news_scenario.py +124 -0
  279. helm/benchmark/scenarios/gpqa_scenario.py +80 -0
  280. helm/benchmark/scenarios/grammar_scenario.py +2 -2
  281. helm/benchmark/scenarios/gsm_scenario.py +10 -1
  282. helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +50 -0
  283. helm/benchmark/scenarios/harm_bench_scenario.py +1 -1
  284. helm/benchmark/scenarios/headqa_scenario.py +131 -0
  285. helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +37 -0
  286. helm/benchmark/scenarios/ice_scenario.py +8 -4
  287. helm/benchmark/scenarios/ifeval_scenario.py +53 -0
  288. helm/benchmark/scenarios/imdb_ptbr_scenario.py +60 -0
  289. helm/benchmark/scenarios/imdb_scenario.py +11 -2
  290. helm/benchmark/scenarios/infinite_bench_sum_scenario.py +82 -0
  291. helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py +2 -2
  292. helm/benchmark/scenarios/koala_scenario.py +1 -1
  293. helm/benchmark/scenarios/legal_contract_summarization_scenario.py +129 -0
  294. helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py +77 -0
  295. helm/benchmark/scenarios/legal_summarization_scenario.py +11 -1
  296. helm/benchmark/scenarios/legal_support_scenario.py +11 -1
  297. helm/benchmark/scenarios/legalbench_scenario.py +22 -3
  298. helm/benchmark/scenarios/lex_glue_scenario.py +12 -2
  299. helm/benchmark/scenarios/lextreme_scenario.py +11 -1
  300. helm/benchmark/scenarios/live_qa_scenario.py +1 -1
  301. helm/benchmark/scenarios/lm_entry_scenario.py +1 -1
  302. helm/benchmark/scenarios/lsat_qa_scenario.py +1 -1
  303. helm/benchmark/scenarios/math_scenario.py +9 -1
  304. helm/benchmark/scenarios/me_q_sum_scenario.py +10 -1
  305. helm/benchmark/scenarios/med_dialog_scenario.py +22 -24
  306. helm/benchmark/scenarios/med_mcqa_scenario.py +10 -1
  307. helm/benchmark/scenarios/med_paragraph_simplification_scenario.py +10 -1
  308. helm/benchmark/scenarios/med_qa_scenario.py +10 -1
  309. helm/benchmark/scenarios/medalign_scenario.py +88 -0
  310. helm/benchmark/scenarios/medalign_scenario_helper.py +429 -0
  311. helm/benchmark/scenarios/medbullets_scenario.py +140 -0
  312. helm/benchmark/scenarios/medcalc_bench_scenario.py +125 -0
  313. helm/benchmark/scenarios/medec_scenario.py +120 -0
  314. helm/benchmark/scenarios/medhallu_scenario.py +66 -0
  315. helm/benchmark/scenarios/medi_qa_scenario.py +105 -0
  316. helm/benchmark/scenarios/medication_qa_scenario.py +2 -2
  317. helm/benchmark/scenarios/mental_health_scenario.py +112 -0
  318. helm/benchmark/scenarios/mimic_bhc_scenario.py +98 -0
  319. helm/benchmark/scenarios/mimic_rrs_scenario.py +89 -0
  320. helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +71 -0
  321. helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py +74 -0
  322. helm/benchmark/scenarios/mmlu_pro_scenario.py +95 -0
  323. helm/benchmark/scenarios/mmlu_scenario.py +11 -1
  324. helm/benchmark/scenarios/msmarco_scenario.py +1 -1
  325. helm/benchmark/scenarios/mtsamples_procedures_scenario.py +141 -0
  326. helm/benchmark/scenarios/mtsamples_replicate_scenario.py +141 -0
  327. helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +271 -0
  328. helm/benchmark/scenarios/narrativeqa_scenario.py +1 -1
  329. helm/benchmark/scenarios/natural_qa_scenario.py +1 -1
  330. helm/benchmark/scenarios/newsqa_scenario.py +1 -1
  331. helm/benchmark/scenarios/numeracy_scenario.py +10 -1
  332. helm/benchmark/scenarios/oab_exams_scenario.py +57 -0
  333. helm/benchmark/scenarios/omni_math_scenario.py +53 -0
  334. helm/benchmark/scenarios/open_assistant_scenario.py +11 -2
  335. helm/benchmark/scenarios/opinions_qa_scenario.py +1 -1
  336. helm/benchmark/scenarios/pubmed_qa_scenario.py +54 -43
  337. helm/benchmark/scenarios/quac_scenario.py +10 -1
  338. helm/benchmark/scenarios/race_based_med_scenario.py +142 -0
  339. helm/benchmark/scenarios/raft_scenario.py +18 -3
  340. helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +1 -1
  341. helm/benchmark/scenarios/ruler_qa_scenario_helper.py +171 -0
  342. helm/benchmark/scenarios/ruler_qa_scenarios.py +88 -0
  343. helm/benchmark/scenarios/scenario.py +9 -1
  344. helm/benchmark/scenarios/{bhasa_scenario.py → seahelm_scenario.py} +233 -84
  345. helm/benchmark/scenarios/self_instruct_scenario.py +1 -1
  346. helm/benchmark/scenarios/shc_bmt_scenario.py +69 -0
  347. helm/benchmark/scenarios/shc_cdi_scenario.py +70 -0
  348. helm/benchmark/scenarios/shc_conf_scenario.py +70 -0
  349. helm/benchmark/scenarios/shc_ent_scenario.py +72 -0
  350. helm/benchmark/scenarios/shc_gip_scenario.py +66 -0
  351. helm/benchmark/scenarios/shc_ptbm_scenario.py +76 -0
  352. helm/benchmark/scenarios/shc_sei_scenario.py +89 -0
  353. helm/benchmark/scenarios/shc_sequoia_scenario.py +69 -0
  354. helm/benchmark/scenarios/simple_safety_tests_scenario.py +1 -1
  355. helm/benchmark/scenarios/spider_scenario.py +91 -0
  356. helm/benchmark/scenarios/starr_patient_instructions_scenario.py +90 -0
  357. helm/benchmark/scenarios/summarization_scenario.py +11 -1
  358. helm/benchmark/scenarios/sumosum_scenario.py +157 -0
  359. helm/benchmark/scenarios/synthetic_efficiency_scenario.py +1 -1
  360. helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +11 -1
  361. helm/benchmark/scenarios/synthetic_reasoning_scenario.py +11 -1
  362. helm/benchmark/scenarios/test_bigcodebench_scenario.py +26 -0
  363. helm/benchmark/scenarios/test_czech_bank_qa_scenario.py +18 -0
  364. helm/benchmark/scenarios/test_enem_challenge_scenario.py +53 -0
  365. helm/benchmark/scenarios/test_ewok_scenario.py +6 -2
  366. helm/benchmark/scenarios/test_gold_commodity_news_scenario.py +18 -0
  367. helm/benchmark/scenarios/test_gpqa_scenario.py +44 -0
  368. helm/benchmark/scenarios/test_ifeval_scenario.py +36 -0
  369. helm/benchmark/scenarios/test_imdb_ptbr_scenario.py +27 -0
  370. helm/benchmark/scenarios/test_infinite_bench_sum_scenario.py +46 -0
  371. helm/benchmark/scenarios/test_math_scenario.py +1 -0
  372. helm/benchmark/scenarios/test_mmlu_clinical_afr_scenario.py +21 -0
  373. helm/benchmark/scenarios/test_mmlu_pro_scenario.py +53 -0
  374. helm/benchmark/scenarios/test_oab_exams_scenario.py +51 -0
  375. helm/benchmark/scenarios/test_omni_math_scenario.py +27 -0
  376. helm/benchmark/scenarios/test_tweetsentbr_scenario.py +24 -0
  377. helm/benchmark/scenarios/test_wildbench_scenario.py +15 -0
  378. helm/benchmark/scenarios/test_winogrande_afr_scenario.py +19 -0
  379. helm/benchmark/scenarios/thai_exam_scenario.py +10 -1
  380. helm/benchmark/scenarios/the_pile_scenario.py +1 -1
  381. helm/benchmark/scenarios/truthful_qa_scenario.py +10 -1
  382. helm/benchmark/scenarios/tweetsentbr_scenario.py +66 -0
  383. helm/benchmark/scenarios/twitter_aae_scenario.py +1 -1
  384. helm/benchmark/scenarios/unitxt_scenario.py +8 -2
  385. helm/benchmark/scenarios/verifiability_judgment_scenario.py +1 -1
  386. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  387. helm/benchmark/scenarios/vision_language/blink_scenario.py +140 -0
  388. helm/benchmark/scenarios/vision_language/mm_star_scenario.py +95 -0
  389. helm/benchmark/scenarios/vision_language/vqa_rad_scenario.py +88 -0
  390. helm/benchmark/scenarios/wikifact_scenario.py +11 -1
  391. helm/benchmark/scenarios/wikitext_103_scenario.py +1 -1
  392. helm/benchmark/scenarios/wildbench_scenario.py +83 -0
  393. helm/benchmark/scenarios/winogrande_afr_scenario.py +78 -0
  394. helm/benchmark/scenarios/wmt_14_scenario.py +14 -2
  395. helm/benchmark/scenarios/xstest_scenario.py +1 -1
  396. helm/benchmark/server.py +11 -0
  397. helm/benchmark/slurm_runner.py +1 -1
  398. helm/benchmark/static/schema_audio.yaml +752 -0
  399. helm/benchmark/static/schema_autobencher.yaml +150 -0
  400. helm/benchmark/static/schema_call_center.yaml +97 -60
  401. helm/benchmark/static/schema_capabilities.yaml +254 -0
  402. helm/benchmark/static/schema_czech_bank.yaml +148 -0
  403. helm/benchmark/static/schema_enem_challenge.yaml +146 -0
  404. helm/benchmark/static/schema_enterprise.yaml +298 -0
  405. helm/benchmark/static/schema_finance.yaml +14 -12
  406. helm/benchmark/static/schema_heim.yaml +1389 -0
  407. helm/benchmark/static/schema_legal.yaml +566 -0
  408. helm/benchmark/static/{schema_medical.yaml → schema_long_context.yaml} +67 -82
  409. helm/benchmark/static/schema_medhelm.yaml +1081 -0
  410. helm/benchmark/static/schema_mmlu_winogrande_afr.yaml +1045 -0
  411. helm/benchmark/static/schema_safety.yaml +42 -6
  412. helm/benchmark/static/{schema_bhasa.yaml → schema_seahelm.yaml} +40 -26
  413. helm/benchmark/static/schema_social_audio.yaml +224 -0
  414. helm/benchmark/static/schema_sql.yaml +171 -0
  415. helm/benchmark/static/{schema_tables.yaml → schema_torr.yaml} +187 -30
  416. helm/benchmark/static/schema_tweetsentbr.yaml +146 -0
  417. helm/benchmark/static/schema_vhelm.yaml +151 -47
  418. helm/benchmark/static_build/assets/helm-safety-2907a7b6.png +0 -0
  419. helm/benchmark/static_build/assets/index-262903c1.js +10 -0
  420. helm/benchmark/static_build/assets/index-42060d71.css +1 -0
  421. helm/benchmark/static_build/assets/medhelm-overview-3ddfcd65.png +0 -0
  422. helm/benchmark/static_build/assets/{react-d4a0b69b.js → react-f82877fd.js} +1 -1
  423. helm/benchmark/static_build/assets/{recharts-6d337683.js → recharts-4037aff0.js} +1 -1
  424. helm/benchmark/static_build/assets/{tremor-54a99cc4.js → tremor-9cefc3c5.js} +1 -1
  425. helm/benchmark/static_build/assets/vhelm-aspects-1437d673.png +0 -0
  426. helm/benchmark/static_build/assets/vhelm-framework-a1ca3f3f.png +0 -0
  427. helm/benchmark/static_build/assets/vhelm-model-8afb7616.png +0 -0
  428. helm/benchmark/static_build/config.js +1 -1
  429. helm/benchmark/static_build/index.html +5 -5
  430. helm/benchmark/window_services/default_window_service.py +1 -1
  431. helm/benchmark/window_services/encoder_decoder_window_service.py +1 -1
  432. helm/benchmark/window_services/ice_window_service.py +1 -1
  433. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +1 -1
  434. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +1 -1
  435. helm/benchmark/window_services/local_window_service.py +2 -2
  436. helm/benchmark/window_services/test_anthropic_window_service.py +3 -3
  437. helm/benchmark/window_services/test_bloom_window_service.py +3 -3
  438. helm/benchmark/window_services/test_gpt2_window_service.py +7 -2
  439. helm/benchmark/window_services/test_gpt4_window_service.py +8 -3
  440. helm/benchmark/window_services/test_gptj_window_service.py +8 -3
  441. helm/benchmark/window_services/test_gptneox_window_service.py +3 -3
  442. helm/benchmark/window_services/test_openai_window_service.py +8 -3
  443. helm/benchmark/window_services/test_opt_window_service.py +3 -3
  444. helm/benchmark/window_services/test_palmyra_window_service.py +3 -3
  445. helm/benchmark/window_services/test_t0pp_window_service.py +3 -3
  446. helm/benchmark/window_services/test_t511b_window_service.py +3 -3
  447. helm/benchmark/window_services/test_ul2_window_service.py +3 -3
  448. helm/benchmark/window_services/test_utils.py +1 -1
  449. helm/benchmark/window_services/test_yalm_window_service.py +3 -3
  450. helm/benchmark/window_services/tokenizer_service.py +0 -5
  451. helm/benchmark/window_services/yalm_window_service.py +1 -1
  452. helm/clients/ai21_client.py +3 -3
  453. helm/clients/aleph_alpha_client.py +1 -1
  454. helm/clients/audio_language/__init__.py +0 -0
  455. helm/clients/audio_language/diva_llama_client.py +118 -0
  456. helm/clients/audio_language/llama_omni_client.py +198 -0
  457. helm/clients/audio_language/qwen2_audiolm_client.py +188 -0
  458. helm/clients/audio_language/qwen_audiolm_client.py +150 -0
  459. helm/clients/auto_client.py +4 -2
  460. helm/clients/azure_openai_client.py +55 -0
  461. helm/clients/bedrock_client.py +201 -7
  462. helm/clients/bedrock_utils.py +33 -0
  463. helm/clients/clip_scorers/clip_scorer.py +1 -1
  464. helm/clients/clip_scorers/multilingual_clip_scorer.py +1 -1
  465. helm/clients/cohere_client.py +3 -3
  466. helm/clients/google_client.py +1 -1
  467. helm/clients/http_model_client.py +1 -1
  468. helm/clients/huggingface_client.py +10 -18
  469. helm/clients/ibm_client.py +267 -0
  470. helm/clients/image_generation/adobe_vision_client.py +1 -1
  471. helm/clients/image_generation/aleph_alpha_image_generation_client.py +1 -1
  472. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +3 -3
  473. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +5 -2
  474. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +5 -2
  475. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +2 -2
  476. helm/clients/image_generation/cogview2_client.py +1 -1
  477. helm/clients/image_generation/dalle2_client.py +1 -1
  478. helm/clients/image_generation/dalle3_client.py +2 -2
  479. helm/clients/image_generation/dalle_mini/__init__.py +1 -1
  480. helm/clients/image_generation/dalle_mini/data.py +1 -1
  481. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -5
  482. helm/clients/image_generation/dalle_mini/model/configuration.py +1 -1
  483. helm/clients/image_generation/dalle_mini/model/modeling.py +2 -2
  484. helm/clients/image_generation/dalle_mini/model/processor.py +4 -4
  485. helm/clients/image_generation/dalle_mini/model/tokenizer.py +1 -1
  486. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -1
  487. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +2 -2
  488. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +1 -1
  489. helm/clients/image_generation/dalle_mini_client.py +1 -1
  490. helm/clients/image_generation/deep_floyd_client.py +1 -1
  491. helm/clients/image_generation/huggingface_diffusers_client.py +1 -1
  492. helm/clients/image_generation/lexica_client.py +1 -1
  493. helm/clients/image_generation/mindalle/models/__init__.py +6 -6
  494. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +1 -1
  495. helm/clients/image_generation/mindalle/models/stage2/transformer.py +1 -1
  496. helm/clients/image_generation/mindalle/utils/__init__.py +3 -3
  497. helm/clients/image_generation/mindalle_client.py +1 -1
  498. helm/clients/image_generation/together_image_generation_client.py +1 -1
  499. helm/clients/lit_gpt_client.py +2 -2
  500. helm/clients/mistral_client.py +62 -18
  501. helm/clients/nvidia_nim_client.py +0 -3
  502. helm/clients/openai_client.py +255 -21
  503. helm/clients/palmyra_client.py +2 -6
  504. helm/clients/reka_client.py +1 -1
  505. helm/clients/stanfordhealthcare_azure_openai_client.py +58 -0
  506. helm/clients/stanfordhealthcare_claude_client.py +31 -0
  507. helm/clients/stanfordhealthcare_google_client.py +43 -0
  508. helm/clients/stanfordhealthcare_http_model_client.py +93 -0
  509. helm/clients/stanfordhealthcare_openai_client.py +62 -0
  510. helm/clients/stanfordhealthcare_shc_openai_client.py +42 -0
  511. helm/clients/test_client.py +1 -1
  512. helm/clients/test_together_client.py +6 -1
  513. helm/clients/together_client.py +69 -7
  514. helm/clients/upstage_client.py +23 -0
  515. helm/clients/vertexai_client.py +39 -13
  516. helm/clients/vision_language/open_flamingo/__init__.py +2 -2
  517. helm/clients/vision_language/open_flamingo/src/factory.py +3 -3
  518. helm/clients/vision_language/open_flamingo/src/flamingo.py +2 -2
  519. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +2 -2
  520. helm/clients/vision_language/qwen2_vlm_client.py +175 -0
  521. helm/clients/vllm_client.py +4 -6
  522. helm/clients/yi_client.py +0 -3
  523. helm/common/audio_utils.py +111 -0
  524. helm/common/cache.py +8 -30
  525. helm/common/file_caches/local_file_cache.py +1 -1
  526. helm/common/file_caches/test_local_file_cache.py +1 -1
  527. helm/common/images_utils.py +2 -2
  528. helm/common/key_value_store.py +9 -9
  529. helm/common/media_object.py +2 -2
  530. helm/common/mongo_key_value_store.py +3 -3
  531. helm/common/multimodal_request_utils.py +26 -0
  532. helm/common/reeval_parameters.py +12 -0
  533. helm/common/request.py +6 -2
  534. helm/common/response_format.py +18 -0
  535. helm/common/test_cache.py +1 -48
  536. helm/common/test_media_object.py +1 -1
  537. helm/common/tokenization_request.py +0 -9
  538. helm/config/model_deployments.yaml +1258 -33
  539. helm/config/model_metadata.yaml +1110 -41
  540. helm/config/tokenizer_configs.yaml +403 -3
  541. helm/proxy/cli.py +2 -2
  542. helm/proxy/example_queries.py +1 -1
  543. helm/proxy/server.py +11 -13
  544. helm/proxy/services/remote_service.py +1 -7
  545. helm/proxy/services/server_service.py +6 -19
  546. helm/proxy/services/service.py +0 -6
  547. helm/proxy/services/test_remote_service.py +2 -2
  548. helm/proxy/services/test_service.py +1 -1
  549. helm/proxy/static/general.js +122 -0
  550. helm/proxy/static/help.html +99 -0
  551. helm/proxy/static/index.css +57 -0
  552. helm/proxy/static/index.html +40 -0
  553. helm/proxy/static/index.js +456 -0
  554. helm/proxy/static/info-icon.png +0 -0
  555. helm/proxy/test_retry.py +1 -1
  556. helm/proxy/token_counters/auto_token_counter.py +1 -1
  557. helm/tokenizers/aleph_alpha_tokenizer.py +1 -1
  558. helm/tokenizers/caching_tokenizer.py +2 -30
  559. helm/tokenizers/http_model_tokenizer.py +1 -1
  560. helm/tokenizers/huggingface_tokenizer.py +2 -2
  561. helm/tokenizers/lit_gpt_tokenizer.py +1 -1
  562. helm/tokenizers/test_anthropic_tokenizer.py +6 -2
  563. helm/tokenizers/test_huggingface_tokenizer.py +1 -1
  564. helm/tokenizers/test_yalm_tokenizer.py +1 -1
  565. helm/tokenizers/tiktoken_tokenizer.py +1 -1
  566. helm/tokenizers/tokenizer.py +3 -1
  567. helm/tokenizers/yalm_tokenizer.py +3 -3
  568. helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py +1 -1
  569. crfm_helm-0.5.3.dist-info/METADATA +0 -355
  570. crfm_helm-0.5.3.dist-info/RECORD +0 -699
  571. helm/benchmark/data_overlap/data_overlap_spec.py +0 -86
  572. helm/benchmark/data_overlap/export_scenario_text.py +0 -119
  573. helm/benchmark/data_overlap/light_scenario.py +0 -60
  574. helm/benchmark/metrics/bhasa_metrics_specs.py +0 -10
  575. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  576. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  577. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  578. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  579. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  580. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  581. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  582. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  583. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  584. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  585. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  586. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  587. helm/benchmark/static_build/assets/index-05c76bb1.css +0 -1
  588. helm/benchmark/static_build/assets/index-58f97dcd.js +0 -10
  589. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  590. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  591. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  592. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  593. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  594. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  595. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  596. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  597. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  598. helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  599. helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  600. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  601. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  602. helm/tokenizers/anthropic_tokenizer.py +0 -52
  603. {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info}/entry_points.txt +0 -0
  604. {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info/licenses}/LICENSE +0 -0
  605. {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info}/top_level.txt +0 -0
  606. /helm/benchmark/{data_overlap → metrics/ifeval}/__init__.py +0 -0
@@ -0,0 +1,894 @@
1
+ crfm_helm-0.5.5.dist-info/licenses/LICENSE,sha256=bJiay7Nn5SHQ2n_4ZIT3AE0W1RGq4O7pxOApgBsaT64,11349
2
+ helm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ helm/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ helm/benchmark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ helm/benchmark/annotation_executor.py,sha256=LEehcWmkmqV_bFFzzmdm3GqsObJGCqoAYi1ekwG-yQ4,5757
6
+ helm/benchmark/config_registry.py,sha256=Cd25a8FHriUzAgvGGU5sBAPyhisdSIjdUJR4YbYs6T4,1603
7
+ helm/benchmark/data_preprocessor.py,sha256=wqGzAiLwOYa4v6TVPe6ayrnuzdNbmfjeiofRQiO2uso,2201
8
+ helm/benchmark/executor.py,sha256=simd7SdJ7TciUpoq3D0uz_XUSCZj5KIWCIP57FYm4js,4906
9
+ helm/benchmark/huggingface_registration.py,sha256=DAiHffNmo4H90rBfvQ_LHADtUCnCk6dfpI7Wbat1DZA,4389
10
+ helm/benchmark/model_deployment_registry.py,sha256=BjL0ghHgO7_Z5jZZ7kuSOj9saegI3BivaL-b699C0rc,9527
11
+ helm/benchmark/model_metadata_registry.py,sha256=7XisV0an_edM8hvP8LSoCnTeUN2QLJrQknOCA6-OE7M,8841
12
+ helm/benchmark/multi_gpu_runner.py,sha256=WmTKpVfcKXyiiPzrmxpbvQoZy0Ua8IyPgxB8r_3jrRw,4773
13
+ helm/benchmark/reeval_run.py,sha256=ApGc7T3koXPLhW60e4g8KNMbOfhUTMwMXMWIPcHXWGo,7211
14
+ helm/benchmark/reeval_runner.py,sha256=Qt9t47c6xJqGmkGYKfDLhf6idhQHThk_46fui9tsIwA,15593
15
+ helm/benchmark/run.py,sha256=eVtwVYvm51R-maimOehonn3IvJObGUFPbGvqoedykKQ,13658
16
+ helm/benchmark/run_expander.py,sha256=ZIVTmFUZlu9SJR0yTiNErOVT9-zSR-pU3cje8jdltuQ,55891
17
+ helm/benchmark/run_spec.py,sha256=GiIU8iGO2FGYFDWIxt51CeNPsW7rM7BzDqH1KgEL1cg,3217
18
+ helm/benchmark/run_spec_factory.py,sha256=lchT8iltTIYrkJ_uGAQkS5gmu9gvrZ-mVIkx2KhR10g,7728
19
+ helm/benchmark/runner.py,sha256=zlHDJ2Ys5-HxtXcwpkXcrdfXy_i886fBcq1iNeLyC3Q,14669
20
+ helm/benchmark/runner_config_registry.py,sha256=2gW5wBLkHdYb2WNbZulto06hTcto2ROvjy8HULw3jNM,515
21
+ helm/benchmark/server.py,sha256=_L5rb3NqtShQMkpESPKpo04KcMcRzR_ZQkWFokAb-nU,6344
22
+ helm/benchmark/slurm_jobs.py,sha256=eNCAoaWDfT0Wk32ZJRIGo-x8kgjhDPnPB4Xrvw_eLB0,3225
23
+ helm/benchmark/slurm_runner.py,sha256=RjmwMqMdKwOzd9B2S6fkuSqB2UjybmiSRVjraiLtzgM,16567
24
+ helm/benchmark/test_data_preprocessor.py,sha256=_esdtkqyU_8Yp5ZOO7n1b-Y4Qc28wpD5drG-4Y4UhIM,2219
25
+ helm/benchmark/test_run_expander.py,sha256=gLeHkNt_nLgbwEJiYxhwda-eKA3sJAxkYolCvgRN5TY,1163
26
+ helm/benchmark/tokenizer_config_registry.py,sha256=ZOImg38ta0FXZYAWna6q7A5xrG2mU7Ofr-8j4EqGlUY,1585
27
+ helm/benchmark/adaptation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
+ helm/benchmark/adaptation/adapter_spec.py,sha256=WrDOvQoeV5Ciw2bmvtnz6HTCAEfjCHTYgfTZwRZzkN8,5680
29
+ helm/benchmark/adaptation/common_adapter_specs.py,sha256=V8aYhQYuwohzwW0T_IU_ymGlxEwARKIiChLvwLKt-ew,12553
30
+ helm/benchmark/adaptation/prompt.py,sha256=vPCFeKVUwpbnTe0IbphkyAKFkkM0YnEONfvjcb8Hj50,2158
31
+ helm/benchmark/adaptation/request_state.py,sha256=WAPyubn35on-Ry7xKpXsVz3wYBMCMc_LidDOdcKxatI,3053
32
+ helm/benchmark/adaptation/scenario_state.py,sha256=mWEhgzk18SVoMEuj2pSnc_r9JrGAHLdOlteHJKUMA5k,1961
33
+ helm/benchmark/adaptation/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
+ helm/benchmark/adaptation/adapters/adapter.py,sha256=NbsvNITD_xByHxwknkHS_vWrzvO1peA9T1rjWbz_cy0,1239
35
+ helm/benchmark/adaptation/adapters/adapter_factory.py,sha256=hAcjDUqFg496FNaz6jftcObfoTuX1Z15r3-oKVdiyWw,3629
36
+ helm/benchmark/adaptation/adapters/binary_ranking_adapter.py,sha256=dvwirvz4dRzJHo2VpX1uGA8e9LN6F1Iy_zPkerKzO9A,5816
37
+ helm/benchmark/adaptation/adapters/chat_adapter.py,sha256=1Pf2XgdtrqAxbZPkUfw7TUH2lrulYoDTkC8Q0sckQHA,1852
38
+ helm/benchmark/adaptation/adapters/ehr_instruction_adapter.py,sha256=dhDZANH5lyL5VdR_Ks72cNlP-NHbJqThZVP6xKHmXaE,5034
39
+ helm/benchmark/adaptation/adapters/generation_adapter.py,sha256=LI7uWpKIHvTUjGiygmjB_1HLk26vNkYYCBWIx0EEyL4,2180
40
+ helm/benchmark/adaptation/adapters/in_context_learning_adapter.py,sha256=QckQyDe_BvEj3sOZ65UEqR7rMcOVPEq7MREeE7DHrjA,15031
41
+ helm/benchmark/adaptation/adapters/language_modeling_adapter.py,sha256=u_GFEgg5wmpate-s5U5aMsmcHuFmreJcA8J0TO1kPCc,14907
42
+ helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py,sha256=-fY4mvzoGCCoR0HesT_xf2U2m2arVjgDuj59lm07_tg,1923
43
+ helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py,sha256=lzmHwvDOHWl9IWC3NTLGfJDbduXtK_zrS2_YoUQmdc8,4464
44
+ helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py,sha256=RV6B3i5juBbJCtPDWzSfma49YXeDq3vQAQ5xQwnH-cA,3282
45
+ helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py,sha256=hhH9ehK092j1WdUwrKYSy5PvNJ73gsIu6-5W8aLoYVI,2190
46
+ helm/benchmark/adaptation/adapters/test_adapter.py,sha256=0-JrYnogZu4kENQG1eQMXHWnuSurCLRbkLpDuSnfRqs,745
47
+ helm/benchmark/adaptation/adapters/test_generation_adapter.py,sha256=Iq5q0HpBHrI3d2SodI0OwQ-COXuM7KvCjlBk_zNguNI,12868
48
+ helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py,sha256=HASZNtKXYWOOIMKVe16yokWNfCNJITJXoUhDLVkk-FQ,8048
49
+ helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py,sha256=-412yPKMylDMDXpbG-SlssXEjZlr3dshecrTFZoE-wY,11942
50
+ helm/benchmark/adaptation/adapters/multimodal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
+ helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py,sha256=MvE7YdIt8Y0nefXLskY9gPmXp7QWi2b8cqg8fxUpzbM,1980
52
+ helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py,sha256=KXP9MzDdmUao3uVjPgZYKjZQ_LvGHgZvI-86o3E87xA,6404
53
+ helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py,sha256=jyL61UxBsIr68hUz-jtjBUnyB2HBp5ESNyECGp_Gf6Q,2129
54
+ helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py,sha256=ftwSOTPugDuw8vh2WaQDJb0tQAeWR7S7qtD4yE_nOt4,4804
55
+ helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py,sha256=mjjyn9p31V-yt6S8BX7SvqvkQ56D9cKSff6d-daM6HM,10250
56
+ helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py,sha256=6nuz0Vn89A1mOedutsiq2SwTOG3qn8dUZTiaXhKffiw,3587
57
+ helm/benchmark/annotation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
+ helm/benchmark/annotation/aci_bench_annotator.py,sha256=SjXidlbpm5HOhdhNXg3HjabMEQvt3hq1iJ5GPajxt8M,3228
59
+ helm/benchmark/annotation/air_bench_annotator.py,sha256=wC9LKP_I8XX-Qa9Na5OQes4QOYmJoVN9fi8tcXPlKuc,3500
60
+ helm/benchmark/annotation/annotator.py,sha256=__BkMVpAEpSs1pbwPK5sVWLdCAXnjsHcPYgmOqmNPu0,1843
61
+ helm/benchmark/annotation/annotator_factory.py,sha256=8uo5uz1UpIVCHUd7CRvmy6b9XB1gspdHmgxH5UZMPVI,2335
62
+ helm/benchmark/annotation/anthropic_red_team_annotator.py,sha256=4hob15m2k9e2A97E0aG9FstCbJ_oMM7-9y-nh2EaYqc,2395
63
+ helm/benchmark/annotation/autobencher_capabilities_annotator.py,sha256=TkW3xbcEuaPeGwuFrlu0YNSmj896WarmVT0WYL1it_E,4913
64
+ helm/benchmark/annotation/autobencher_safety_annotator.py,sha256=w_xjZmY1zuLjVvVbcbUygNvqcfn5dtwpXeV99yqm9aU,3914
65
+ helm/benchmark/annotation/bigcodebench_annotator.py,sha256=_p_keqJ6WwOGP7wTfNFY_zAADN3HUHHNpb8QenEkcQE,4449
66
+ helm/benchmark/annotation/bird_sql_annotator.py,sha256=mYK-2LeMzo9RrWzZFqaIRtn-1VyOe4ArCqBqtF6RAD0,2443
67
+ helm/benchmark/annotation/call_center_annotator.py,sha256=pTEjwfA4tgZhroFbamoQ8IO_D1O9r6k5GIlD50JEg5c,11601
68
+ helm/benchmark/annotation/chw_care_plan_annotator.py,sha256=LdY1GBQsU6O5z4KsVyan5z38vS6sNqpQak6ZacMmqfk,3073
69
+ helm/benchmark/annotation/czech_bank_qa_annotator.py,sha256=YIH5g4zHe3BQF2Y-6uRVw7g9u_SPBncqBobdvZdIzyA,3096
70
+ helm/benchmark/annotation/dischargeme_annotator.py,sha256=Z6xnUK1cNrFco9x0w8B_qhlLOEZrzXBwT6TKZPKoPBk,3676
71
+ helm/benchmark/annotation/ehr_sql_annotator.py,sha256=q99HGDcnG7_YcU47nK4Yi6ZoykURCNDWW6wIwQa5lms,4028
72
+ helm/benchmark/annotation/financebench_annotator.py,sha256=gNERLY35t2kcpayXGGrY4-pBs2jbEUomqElRYbb9nho,4150
73
+ helm/benchmark/annotation/harm_bench_annotator.py,sha256=zhkWnV3qZgY-nvHgQRHGrrCMC7605JwFHesY7UC3ZnQ,2293
74
+ helm/benchmark/annotation/helpdesk_call_summarization_annotator.py,sha256=ufvfXMTKcuk11Hfe3C7bEAyoqlqbrrv8D3hBEN3na6I,6057
75
+ helm/benchmark/annotation/live_qa_annotator.py,sha256=8DXsjwmeSyvC0kfp1uYds4cwpxqzF7FcskeZaXxXiOw,3552
76
+ helm/benchmark/annotation/med_dialog_annotator.py,sha256=OVTFIlvdhcOr_hdK0tnrDes9hYdN1mDWFTp4GDYY7O0,3162
77
+ helm/benchmark/annotation/medalign_annotator.py,sha256=8edAZh8oQgDKUT1bQ3Hp2NBE-QnBZ_-ZQjHkV7YKWhs,3240
78
+ helm/benchmark/annotation/medi_qa_annotator.py,sha256=v8e6hkHZX1x9KtTedCnpCseh-Y72z5kUgUrXHWPUkX8,3074
79
+ helm/benchmark/annotation/medication_qa_annotator.py,sha256=uZ3VpJ0nsDyF70_kn8kSSBPr4OlfiNdZC7q8wq_jJFE,3090
80
+ helm/benchmark/annotation/mental_health_annotator.py,sha256=JwgSeXtwf4KFZxNtAxsnqdLJQSvP-F-ZoCcCWdasrMQ,3275
81
+ helm/benchmark/annotation/mimic_rrs_annotator.py,sha256=zABO1FJH9pOFhUe5vc2B-c14Hf5RsuU9jQAGiMg6G0I,3204
82
+ helm/benchmark/annotation/model_as_judge.py,sha256=G6mDrbxNp4roC-smrhqZb5swt18Coa9b2-aJMPOaGuM,12116
83
+ helm/benchmark/annotation/mtsamples_procedures_annotator.py,sha256=qqWHY2HfCwMP5GqvObS3JpMIYVs4yyITCsA1B7lcDks,3201
84
+ helm/benchmark/annotation/mtsamples_replicate_annotator.py,sha256=TUxNzJcItErsw0gw76hiKZAWeQTNHGHnC0qf-_CGeF0,3316
85
+ helm/benchmark/annotation/omni_math_annotator.py,sha256=fAgABWlSEs8jnmNbd8RWbU7KNBP-a32kqxTWirs229Q,6207
86
+ helm/benchmark/annotation/simple_safety_tests_annotator.py,sha256=if4S8MaENr1HZ42ZsOjDPXZ-kJ0p4l4B2j9m994RuxQ,2140
87
+ helm/benchmark/annotation/spider_annotator.py,sha256=B48ylGg5J7xuTSUio7VztdXk3lI6ilMqrUvAD-ve0sE,621
88
+ helm/benchmark/annotation/starr_patient_instructions_annotator.py,sha256=5jU-dK_0OvB_jXNLDZtQ5E3gaSUcAxFNzv6prA17eAg,3186
89
+ helm/benchmark/annotation/test_annotator_factory.py,sha256=ifv5hxSbFe113AHeXLqTPkVJ-C2PW_gb9L3a0SHNi-M,986
90
+ helm/benchmark/annotation/test_dummy_annotator.py,sha256=LfY1ErJDUJ7rD8JUy92RUDD1b91jUs4Nk8Gvope-Z98,1644
91
+ helm/benchmark/annotation/wildbench_annotator.py,sha256=sk_GJnPeaIBC0frV04XNq3piOG7Hikn2bCF-_DqRe2A,5488
92
+ helm/benchmark/annotation/xstest_annotator.py,sha256=arL5DyA_nYkiSCAtl6G7MliZz5ZYRsyc7xQJNu0RBcA,3604
93
+ helm/benchmark/annotation/image2struct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
94
+ helm/benchmark/annotation/image2struct/image_compiler_annotator.py,sha256=iWqPDXscrXDkmzRGDg0o6ibmDVo5bQqvcWxZkr6P-d0,3620
95
+ helm/benchmark/annotation/image2struct/latex_compiler_annotator.py,sha256=drbxogMMGwGxgVFbhT7hxPGDh7uyhptlmEmeP1Gq2xM,2471
96
+ helm/benchmark/annotation/image2struct/lilypond_compiler_annotator.py,sha256=odIGciLX2oVq_O8_H15lWUZoSfVvY-jRb0ILjs7GCIg,4061
97
+ helm/benchmark/annotation/image2struct/webpage_compiler_annotator.py,sha256=w6RKv7Fz__j_abKXnsTn98kHPv9tWKipdLW3NVT55m8,6389
98
+ helm/benchmark/annotation/omni_math/gpt_evaluation_template.txt,sha256=XtD4ysEHDHN1icMKSvBi7E69jG6NoVUkfGGdG0ccW4A,9223
99
+ helm/benchmark/annotation/omni_math/gpt_evaluation_zero_shot_template.txt,sha256=KcSlBgagkCtY5A3boy4o4lsDdumLNDhwIET0vruGmhU,2050
100
+ helm/benchmark/annotation/wildbench/eval_template.pairwise.v2.md,sha256=zNV72MTHP0-Dz4lj7zwml8HHuekH6tkeDQUSzKSuehE,2380
101
+ helm/benchmark/annotation/wildbench/eval_template.score.v2.md,sha256=6mJzJHf56uSM8WCBs1V_12VRYLE6-5uXBFW72rDJf3s,2228
102
+ helm/benchmark/augmentations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
103
+ helm/benchmark/augmentations/cleva_perturbation.py,sha256=PJkJ_It_rvup_jWwVWmvCHHKR6csZDoCTFRsnZfEkTQ,29378
104
+ helm/benchmark/augmentations/contraction_expansion_perturbation.py,sha256=J097zFSqpwMwSvjwhkRT9BlAuq9ib6K-Ua1bkFkvvRg,4906
105
+ helm/benchmark/augmentations/contrast_sets_perturbation.py,sha256=frVnb8SaXtKu8ih37g2eGB00qspAIBgyl_m9YuxmunQ,3394
106
+ helm/benchmark/augmentations/correct_to_misspelling.json,sha256=L44RiJXlJCa6zQzTLf0MFHCOhFyRDRKfLQNXH-n3XIs,213429
107
+ helm/benchmark/augmentations/data_augmenter.py,sha256=Uk7rMDEgJGDoFyJLuOepjclBiNLB7Y3fATCH0HP_k_4,3847
108
+ helm/benchmark/augmentations/dialect_perturbation.py,sha256=Eas09Uo0435JnsgMdS4lBQ0hIC0aBnLZG5rg1Om1ef8,6303
109
+ helm/benchmark/augmentations/extra_space_perturbation.py,sha256=vDXptbwBzH31lNPgPBzNJ8GZVxA2Xpw_l1YA3XyUNic,899
110
+ helm/benchmark/augmentations/filler_words_perturbation.py,sha256=JTyciXOqHGw2e2TrUJlueFHUUyf4ORu053Yc3spd_bQ,2978
111
+ helm/benchmark/augmentations/gender_perturbation.py,sha256=D0t-o6w15QHJ8CvfzcB_KQcTsqhebpvL9dI4N8Oc7Cc,9443
112
+ helm/benchmark/augmentations/lowercase_perturbation.py,sha256=eCj8nt8mvNbLxHPZ7QOJuD9pzep09zXJaMnFXoqGIYY,575
113
+ helm/benchmark/augmentations/mild_mix_perturbation.py,sha256=nYbWNCuqh0uOAIYOQSiL-pO9MkiJSjUJ_13VwghmBOU,1955
114
+ helm/benchmark/augmentations/misspelling_perturbation.py,sha256=W3nARzIowF-fgWFeBF4fPgVLsjk-ewPgQTzf4LoTdiw,2200
115
+ helm/benchmark/augmentations/person_name_perturbation.py,sha256=EqxqhDfo5llXCq-QjEHIzfFWHRBFpeg8eBBPHu3jAHY,14408
116
+ helm/benchmark/augmentations/perturbation.py,sha256=vGQg8VHLv0qvd8rGqoSheuIwzv6kNFWiQqzmnMRsoBY,3908
117
+ helm/benchmark/augmentations/perturbation_description.py,sha256=VKOwBRPQY-0vuxhGvtac1Z5F10metPfpFnfs8ykFVmU,1184
118
+ helm/benchmark/augmentations/space_perturbation.py,sha256=6w7DjoyTZu5T0jWiAAs7OklAeOTQKRkTx4pjDy0U4RM,991
119
+ helm/benchmark/augmentations/suffix_perturbation.py,sha256=HGuxFHMsFyEdoz86X3Gx2dIHGuadKQaNbzaN3GljMn0,841
120
+ helm/benchmark/augmentations/synonym_perturbation.py,sha256=EHD9kOyG9CL5DoVjHhr_V3oTyF5xBc4h-Ve_Buuk-2E,4276
121
+ helm/benchmark/augmentations/test_perturbation.py,sha256=9V65K6mQKPq2DfK4qMaIq3kwFFOnuojJ5QpUKUQQ74Y,13562
122
+ helm/benchmark/augmentations/translate_perturbation.py,sha256=IgU8wHyQ748TyoYAeRv-0W1I5gT1WQUUCakelfsH7-0,1153
123
+ helm/benchmark/augmentations/typos_perturbation.py,sha256=C7N55rYHZxTgvcjOKLaQpYmeGENmwZdaUv5DBiMa4Bg,2854
124
+ helm/benchmark/efficiency_data/inference_denoised_runtimes.json,sha256=ios_dt-_8wtXvkVAx0iI2zwCxqHvk3XKTx31qHPalsI,4203
125
+ helm/benchmark/efficiency_data/inference_idealized_runtimes.json,sha256=5w7reeZc0yc4cjH8kJGxQQSoe8yaRVX2SSlSrx0QWFQ,12348
126
+ helm/benchmark/efficiency_data/training_efficiency.json,sha256=aH2moiBLStOLVi8Ci2KTK5ZkWlTBLK-B3fRfNZwhoSg,9763
127
+ helm/benchmark/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
128
+ helm/benchmark/metrics/aci_bench_metrics.py,sha256=X1HCoGfoOzcNRsnYcfdqs50cy-hZcxJYDwWK69LcMuc,1324
129
+ helm/benchmark/metrics/air_bench_metrics.py,sha256=VMNQDDEtz2CiK4U55lCHLz0b_DxHprTAZ1WtYtGXjcY,2282
130
+ helm/benchmark/metrics/annotation_metrics.py,sha256=JbXNleQsPJVF2uc1xXgUW2bzvJqwLPZyhnndqc6THv0,4268
131
+ helm/benchmark/metrics/basic_metrics.py,sha256=d0iwYnwrbF7w7CFtazx8vPIsZnj51U2PVVoscCb-HJA,20495
132
+ helm/benchmark/metrics/bbq_metrics.py,sha256=GeZhSSJzqGD0e5EAiRHitIC3XtPICF7rDI6GfeYQc8E,6201
133
+ helm/benchmark/metrics/bias_metrics.py,sha256=8qcInRJwQsuCI-lMC1umd-ZZaYvorUPrMjnuC6vSeb4,11602
134
+ helm/benchmark/metrics/bias_word_lists.py,sha256=mx5JjW3mHffXIqo4GcQN-zENUEttBqQnEjPTz3J3J_4,13909
135
+ helm/benchmark/metrics/bigcodebench_metrics.py,sha256=JcPZrSiHR-kxT-MFM8zXqOs6wTC5Hus3TbxuHFQVZow,860
136
+ helm/benchmark/metrics/bird_sql_metrics.py,sha256=ooCuXW5nPpRs_-4seCONQmn25DzTbcUgGXznXTK9y0Y,1153
137
+ helm/benchmark/metrics/chw_care_plan_metrics.py,sha256=E-TNRngWb_q7vVKs4QN8AfcpJndaKXd-XX0Ggxt96Qo,1341
138
+ helm/benchmark/metrics/classification_metrics.py,sha256=TjsD7RjkIn7VpTrWtt88aJUsiGVtlv5N06FJl8VaJ-g,8858
139
+ helm/benchmark/metrics/cleva_accuracy_metrics.py,sha256=1eDxHxVk-JW1mF9SBcuplIefAoi_edUwKpp-XxYbmeU,2740
140
+ helm/benchmark/metrics/cleva_harms_metrics.py,sha256=PILZDbVOeUflCFbs_6cE-3qaBt5vwL8R-BirbB2jTn8,11278
141
+ helm/benchmark/metrics/cleva_metrics_helper.py,sha256=8UwiGhekUmp7DxYWU4rxqX2v3ewkg-O5-jOh49iOGmc,304
142
+ helm/benchmark/metrics/code_metrics.py,sha256=SebQ5MXJe_phTiMfGMfhgYago-hwh_g9ctBWEHGqCnU,5230
143
+ helm/benchmark/metrics/code_metrics_helper.py,sha256=UNai154RuhYRZM_YK-rveLct4Ui5iEBNPYmYdKq34Xs,22712
144
+ helm/benchmark/metrics/comet_metric.py,sha256=qOvwE0ov1plb6SwwT3CbX1XuSo4GJ-M3iRe98yMiMaM,4797
145
+ helm/benchmark/metrics/common_metric_specs.py,sha256=JKqmO4ovBdfOYKC-00OSzOMv--g9NTCVfUHLaz-1Uns,6025
146
+ helm/benchmark/metrics/conv_fin_qa_calc_metrics.py,sha256=q7s6B-O11jVrRSAZDSoM3PzhksXCsoAwIZkPOXkiVFY,2663
147
+ helm/benchmark/metrics/copyright_metrics.py,sha256=_Lp7sKWgacY_13kFadNfnhrM2Ks8syBXnUW7zYuJkwo,7817
148
+ helm/benchmark/metrics/czech_bank_qa_metrics.py,sha256=bKoooK2T5v_fFKNbUnsuW6Mv9muAirJD5lTrzuHfpz8,1113
149
+ helm/benchmark/metrics/decodingtrust_fairness_metrics.py,sha256=x66XP0iQGk4ThT7ddmrlLCA0XF4arRbQMDT42LHf2kE,3297
150
+ helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py,sha256=TxTkkWdx6d6ym0MirZTiucl_TWFdn4uJLnlTfLjQvgk,2925
151
+ helm/benchmark/metrics/decodingtrust_privacy_metrics.py,sha256=OU7lka-hm6PubR5Gjj4uNyrqhjlfhe0mmjBCAz9vlRs,3456
152
+ helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py,sha256=BKDD3lblqT6Ebi5kEC4zbN_OvQwD1SdEtBv5Wf0kzWw,6460
153
+ helm/benchmark/metrics/dischargeme_metrics.py,sha256=Z5EOn6uvrOZCqUQeBK_mGWTzOCqJwewh4SAndeCn9CI,1336
154
+ helm/benchmark/metrics/disinformation_metrics.py,sha256=5n8wgRBb6FaDjqe1nR3Cj9aS48esmMsIUq4KpBHoQoU,7870
155
+ helm/benchmark/metrics/dry_run_metrics.py,sha256=Ss0lzf944HIbL1CX6QuJpGFPqOzhBT0qVWLNR1BoEjk,3784
156
+ helm/benchmark/metrics/efficiency_metrics.py,sha256=cLnPCvOzbUETOJh-lu65iNgYwVOOZAJO_s5iTUAd0MI,11852
157
+ helm/benchmark/metrics/ehr_sql_metrics.py,sha256=YRjvPIty7zlyoyGD6wo3HYOz7y_PThySOZzVRJ38iww,4797
158
+ helm/benchmark/metrics/evaluate_instances_metric.py,sha256=LGk1Dv_76Ak0YUlWKFTsOLEFiBSmcGVhNrbj_4zg9g4,2913
159
+ helm/benchmark/metrics/evaluate_reference_metrics.py,sha256=t0251_2aA0CrXB8oUBKlPRgPl-xYjzdVhLcGjwuhOgo,19621
160
+ helm/benchmark/metrics/fin_qa_metrics.py,sha256=MtXxGMGYiCiwCD1CclBXPopzly-Tz3zJTrXJaHYTXn4,2470
161
+ helm/benchmark/metrics/fin_qa_metrics_helper.py,sha256=sH5FIpsxxGUkXO21YGS2EtVsev1EdQ44lYoqFZPSSGo,11884
162
+ helm/benchmark/metrics/gpqa_chain_of_thought_metric.py,sha256=HRRKkcTbCu5ScOVwmjzYaA7UAEGE_AJUZVOCDRuv4Po,4321
163
+ helm/benchmark/metrics/gpt4_audio_critique_metrics.py,sha256=L9tGFwvl1-Ew3MdInQ7KPa8OlI5YexIB2KuCYVYsuPY,7023
164
+ helm/benchmark/metrics/gpt4v_originality_critique_metrics.py,sha256=1m7IWy9vu66svnmdBRjZQI-2YsGYzH2vXZMptlRGM0Y,5654
165
+ helm/benchmark/metrics/helpdesk_call_summarization_metrics.py,sha256=9-kB3NeBacI6nxs2oQ7Km_1SHyiz98UVZuR8PAlvCHM,1442
166
+ helm/benchmark/metrics/ifeval_metrics.py,sha256=iYj-880nHHXECC8t8B93f1LZL9e6PMB-M0nxRdRBZcg,2572
167
+ helm/benchmark/metrics/instruction_following_critique_metrics.py,sha256=RR9cMIG113oXUnBjU_denn7DaCGB11k1oGtQ5dQON3M,9874
168
+ helm/benchmark/metrics/language_modeling_metrics.py,sha256=yS7k8iFjxfkckSBA0RVA7VdOivSEBtNzCjczK6We7y0,4598
169
+ helm/benchmark/metrics/live_qa_metrics.py,sha256=f2XFmQaohjQNqYqNg8NcDVavCzyP4cd8Cl8rLArn9EM,816
170
+ helm/benchmark/metrics/machine_translation_metrics.py,sha256=22vaGBCSw12uM1wmtDG-MBBZW8OiTZwNPaerjckdtDE,3860
171
+ helm/benchmark/metrics/med_dialog_metrics.py,sha256=uDa4xQkU_-zv6WVC1gzoW6YVC1lSt7bzSD9kN0cmkE8,1329
172
+ helm/benchmark/metrics/medalign_metrics.py,sha256=TVqkqlQYuSePt_pG1jAJHhulrAhODFI2-hT4-RWhUkc,1321
173
+ helm/benchmark/metrics/medcalc_bench_metrics.py,sha256=9wZgg20-9QBNk0_XhuwR3LT940fqDPkCM4Kl0dPkbAs,5353
174
+ helm/benchmark/metrics/medec_metrics.py,sha256=hNBOGX52G_QOmgTCp9LnIMrmGSRxbb5vgjxKU069TMQ,4152
175
+ helm/benchmark/metrics/medi_qa_metrics.py,sha256=e8nZ7aMLgg7r088AQ7DBadQsncqCagkxpI81_X88qZA,1314
176
+ helm/benchmark/metrics/medication_qa_metrics.py,sha256=2UEH1DbcrDzWpTfLgKQC68_AhGhJ_igACvYWO2yvspo,1338
177
+ helm/benchmark/metrics/mental_health_metrics.py,sha256=T8Y2lQo0IZvDvsK7G30nnIN9djj_GlGK2CFMuFRBSBA,1344
178
+ helm/benchmark/metrics/metric.py,sha256=jqQyiKDq_pQv-ulGqfZI56ydRDQs3N3XhfHIPysUhrk,14311
179
+ helm/benchmark/metrics/metric_name.py,sha256=POhgmUqqIWh_LjCbYpiKkzGqqChBLeW3FADy9u_FcWw,1354
180
+ helm/benchmark/metrics/metric_service.py,sha256=mlX_MEFSYNzME6GFS3El_VVOvzPYnOMosKI0XIxygP4,1802
181
+ helm/benchmark/metrics/mimic_rrs_metrics.py,sha256=YPU0cwH12L0VqdLhXd12P-eKUDqn39z_sLFx3YdGrP4,1324
182
+ helm/benchmark/metrics/mimiciv_billing_code_metrics.py,sha256=Pu9efXoBrhsvxSeGHqwbUA5k365-pJTeXpMNhmcg0L0,3927
183
+ helm/benchmark/metrics/mtsamples_procedures_metrics.py,sha256=HfmNYyqHplEEM-ABzuMSL_vX92gFrZchO2DITl1Ukiw,1379
184
+ helm/benchmark/metrics/mtsamples_replicate_metrics.py,sha256=A0Ir6B0f99SwCf5KBGGUBFXCqV1Jo693BsYU_wIN3Ws,1374
185
+ helm/benchmark/metrics/nltk_helper.py,sha256=QMEps-lqJZ_pCgvjlMf4BvC0pzDu3ez5jit5F4p8dAk,1313
186
+ helm/benchmark/metrics/numeracy_metrics.py,sha256=3E-CMmB2wuGW5tLjmEm8wFMf85DJ1ZDUANfh84SQuP0,2906
187
+ helm/benchmark/metrics/omni_math_metrics.py,sha256=Gqih87UrE93-a0hbRhTBkjmfGLNTkuKQGaG-sTQeuG8,1287
188
+ helm/benchmark/metrics/output_processing_metric.py,sha256=ey9UBi2f3780OwFlp82ymzfjLR3MA2fpA9vW5R4W5TA,2581
189
+ helm/benchmark/metrics/output_processors.py,sha256=ULZlDBOf6NupAXzDKBKyTDdgPZ5PSxOAlOYTbrQEek8,472
190
+ helm/benchmark/metrics/paraphrase_generation_metrics.py,sha256=771CjpW5Ek00OCaCFfEsO6Cdy9eZb1fMlgWASvQgiK4,2025
191
+ helm/benchmark/metrics/prometheus_vision_critique_metrics.py,sha256=pexBbEFF3-bzWoPWNFuVs-3fm7XJw2EC4xgiSb3gSa4,8508
192
+ helm/benchmark/metrics/ranking_metrics.py,sha256=hSNKy4h7zRkGYSgo6RWt4PXQztA5ZX1PCJorVqpCvpA,17457
193
+ helm/benchmark/metrics/reference_metric.py,sha256=hseI7A16SOC8ymYZYFCL6nxnyxn0q9_Gywuvb1r9FLE,6092
194
+ helm/benchmark/metrics/reka_vibe_critique_metrics.py,sha256=CwzzQ13bBT0r_o75TqFj2Zr0ST9vzQi74K_ezWTnLCU,6568
195
+ helm/benchmark/metrics/safety_metrics.py,sha256=oARko_EwVnykBKYxi-w3ytKme4qcb1waz_0N2GKbSlg,3348
196
+ helm/benchmark/metrics/seahelm_metrics.py,sha256=egRkeXnnb8Nqi9qJJMDXJRSl4NK6WvdUxAc_LffBips,6964
197
+ helm/benchmark/metrics/seahelm_metrics_specs.py,sha256=cx8p4kwTuEOWxZioK9CVoeTNJT0fZjxRy_6_EM9F394,452
198
+ helm/benchmark/metrics/spider_metrics.py,sha256=RSrFJoA5SNcNxfmgVqCQixcSLrfJBYuVQw5jsfrc9Xg,189
199
+ helm/benchmark/metrics/starr_patient_instructions_metrics.py,sha256=lylCQ9rj1Y990bWr_h_BfYrikGGTuejxptTRZoXeaKM,1407
200
+ helm/benchmark/metrics/statistic.py,sha256=ATuOm0jU3L-0ELiZaF2GVMNF22W66-rMvzxRtlfqcII,3446
201
+ helm/benchmark/metrics/summarization_critique_metrics.py,sha256=-mki8-zvZx54dQg8X0BG2Y6wmfypQhkIuD_9ZjNBl78,4782
202
+ helm/benchmark/metrics/summarization_metrics.py,sha256=LNLGFi4DAKJEL0P60rnPlS_-yLMNLUprJbuJ6VsdL0g,16842
203
+ helm/benchmark/metrics/test_bias_metrics.py,sha256=qEZsCULvwjVdIyfNgJSc2L7Xp9suKKW7L5OuQmGrwZ8,6393
204
+ helm/benchmark/metrics/test_classification_metrics.py,sha256=CRDMGmVmzEUnNaM0C02qUTOU2AS11Mt2-GdEl89y7lw,9541
205
+ helm/benchmark/metrics/test_disinformation_metrics.py,sha256=U3ZmS9s33oimTQbKO-7pgWeX_WiDB9chlOCtf_vslXw,2249
206
+ helm/benchmark/metrics/test_evaluate_reference_metrics.py,sha256=B7xtDDWPAxF7d-vcUx_R51hFMae-DD52nUwbu_eWt6Y,1601
207
+ helm/benchmark/metrics/test_metric.py,sha256=0sGlXE3_Al_VyKpOPBhQR_xT-XrcVgGepLpwut37DmA,771
208
+ helm/benchmark/metrics/test_numeracy_metrics.py,sha256=ls1ZIHDePKpHMoqAbf4HmJ1SIBjLFuLIzGbfg6OiZvM,4162
209
+ helm/benchmark/metrics/test_statistic.py,sha256=yK6m2BZ5UXWmb2D1cQzDH_2ELvrNDaR_lyzX4WoHw9Q,1273
210
+ helm/benchmark/metrics/toxicity_metrics.py,sha256=SkVp91-LnZMa5ouEspcYa-PYvPGMi4H_bU3uuc6ve5I,4115
211
+ helm/benchmark/metrics/toxicity_utils.py,sha256=-bfittLtMkHyV5wu-hj6KVtaiNGgVIO5duUmThBlX8w,988
212
+ helm/benchmark/metrics/unitxt_metrics.py,sha256=OBJ6Ph-4mpAOAEcl4loBemcEQjz-UrbmPVnWmzifhG4,4863
213
+ helm/benchmark/metrics/wildbench_metrics.py,sha256=sY7MNTzRlJJK3yph3rCijgbMaajtLyCCquThlsoE5wU,1380
214
+ helm/benchmark/metrics/ifeval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
215
+ helm/benchmark/metrics/ifeval/instructions.py,sha256=qNoa1vMPDNz6ORWfyMv_efwKZ4U5zkI-cf4aApyfSqU,53247
216
+ helm/benchmark/metrics/ifeval/instructions_registry.py,sha256=NprvkRQz0QWaIpJsFp95CQCWsnuY_57ZSqFn2IISDP8,7555
217
+ helm/benchmark/metrics/ifeval/instructions_registry.pyi,sha256=ryH3Jimbvk9T0PtxTN6TPXv476ukLVJtTcQWYXYYtp0,63
218
+ helm/benchmark/metrics/ifeval/instructions_util.py,sha256=VhkJfZLCaHi094rZSoeQbok7-Q-IH5gHfAYnOs7geeo,19787
219
+ helm/benchmark/metrics/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
220
+ helm/benchmark/metrics/image_generation/aesthetics_metrics.py,sha256=UqjBgAi1ylegvHBjALJ8vxINhHEqqr2fSvN9lXgyIZk,2140
221
+ helm/benchmark/metrics/image_generation/aesthetics_scorer.py,sha256=ISdThDKMrx-SHQe69dCcr8qUrMCa_GsxX3BeZnd0WPA,2538
222
+ helm/benchmark/metrics/image_generation/clip_score_metrics.py,sha256=tUnAoew24jjjbjPaoE2-4iyRTq6YNW8Xfk1p5JWZkAU,3338
223
+ helm/benchmark/metrics/image_generation/denoised_runtime_metric.py,sha256=Nom_yw15ePU7wUuV2DFHpLnEAqaZQjlkW9LowRElOAI,1646
224
+ helm/benchmark/metrics/image_generation/detection_metrics.py,sha256=mfYoPbLCmqWxqMSXbcX6TM0niNnpCeipcHImuV3mZ3c,2160
225
+ helm/benchmark/metrics/image_generation/efficiency_metrics.py,sha256=neeNJNtHAVUMWqr5rvRIRlPKl225cXUGCURLB0z-rKQ,1459
226
+ helm/benchmark/metrics/image_generation/fidelity_metrics.py,sha256=Vewml_NOcM2jK-yyKHWsHB0KC3NVG8HfweA4rGZ9RAQ,7583
227
+ helm/benchmark/metrics/image_generation/fractal_dimension_metric.py,sha256=-WtHsMKiUolekyBBLKtONF8NdwCpIPSNxeGS6CEZxHI,2135
228
+ helm/benchmark/metrics/image_generation/gender_metrics.py,sha256=j_sHhAkq1fA2VL483OX80cC9EQjzOIWGHQAeGVEN8fY,2371
229
+ helm/benchmark/metrics/image_generation/image_critique_metrics.py,sha256=fVgb-GdA9QmNywq7byNlG1TTxb_zvzP_bcURruZ54Wo,12958
230
+ helm/benchmark/metrics/image_generation/lpips_metrics.py,sha256=naVxg-yXdXum-yQD6MgRcemVr0L567Y2drGjfehravQ,3582
231
+ helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py,sha256=z3xaiKrfarF-lfQ8Aa-Dveaun3LfMMpQIWR8bHbQIrY,3507
232
+ helm/benchmark/metrics/image_generation/nsfw_detector.py,sha256=X1hsWRBa-1KOhT_TTfCk9_jsXXMXHadAxddsFmTKQHc,3914
233
+ helm/benchmark/metrics/image_generation/nsfw_metrics.py,sha256=ZAyd5n0yt2fj30vBiXDPHmEQBiMgr-5G8JeCeZnwrvY,4665
234
+ helm/benchmark/metrics/image_generation/nudity_metrics.py,sha256=KQG-jybgaWuXB5rL-kLBWBD7gdZKWdTJYYrAEEMnZAU,1553
235
+ helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py,sha256=0bwfJZ5pOAQFec4TEf_eUd9qQxwximwzDrYFkZnO9Yk,6951
236
+ helm/benchmark/metrics/image_generation/psnr_metrics.py,sha256=VLq9gOkaoIZNAGII9fMI3tOCMpKAzbfLuqGbWo_mOV8,3126
237
+ helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py,sha256=veb_QJdWiwm8HRElSBI4qwqthyMW_QNO0f3z7xHFoLs,1963
238
+ helm/benchmark/metrics/image_generation/skin_tone_metrics.py,sha256=Ikxq4nsDhfaGcIZTblT1pCRxMfCt0G8NNDnCDJohsnU,6023
239
+ helm/benchmark/metrics/image_generation/uiqi_metrics.py,sha256=NaBxJvZJdNw8g7Z6jxUC3mtUhdPXsib45TSK2fr5DUM,3904
240
+ helm/benchmark/metrics/image_generation/watermark_metrics.py,sha256=pK_076GaxMoqG6-SvQG60uKQ3z5n84OwG__gK0GYs6s,1924
241
+ helm/benchmark/metrics/image_generation/detectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
242
+ helm/benchmark/metrics/image_generation/detectors/base_detector.py,sha256=e4c8vPfioGzl2ftYzWOFIBDJcZJxBmpjU13n4fXaSvY,226
243
+ helm/benchmark/metrics/image_generation/detectors/vitdet.py,sha256=kxXS8uNBC0pQ7LatuN85CXU8pJHZn0pJXY0rOLd_39g,7526
244
+ helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
245
+ helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py,sha256=vFO6s8QHo6Pt1QfbOKAI0m3mJrc0BeH1Hcf7u2uWMIk,2116
246
+ helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py,sha256=5qKL-gHnEVmzSDW2GKDq6Uox_EJMDLe0QA55Nrl4H6s,1472
247
+ helm/benchmark/metrics/image_generation/q16/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
248
+ helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py,sha256=8Y5h-6RMjivm50RnNbNwV7wCug4RhKT5g8R_YeEp54I,3467
249
+ helm/benchmark/metrics/image_generation/q16/test_q16.py,sha256=aDas2UJ6N8Mqq7jISXkMkrypDTKyAUL-6qO9paervCw,828
250
+ helm/benchmark/metrics/image_generation/watermark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
251
+ helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py,sha256=Ir4u8blJWTRtEBogb6u22qCy3JXAIzvx-Th6dSBLfdw,698
252
+ helm/benchmark/metrics/image_generation/watermark/watermark_detector.py,sha256=w6WnTc6t6zx0W0gTjgedXC9OO5dq5iWpx9UcnioKml4,3641
253
+ helm/benchmark/metrics/summac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
254
+ helm/benchmark/metrics/summac/model_summac.py,sha256=nDB8sAJNnQ5TkBKoQBGUejFK6ynrRiaV4oyEKcm3qkg,17488
255
+ helm/benchmark/metrics/summac/utils_misc.py,sha256=7_Q1c72cKt8PWtxn8u4R8nB53HK6_JF2nP8bBXYNk-A,1485
256
+ helm/benchmark/metrics/tokens/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
257
+ helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py,sha256=XDZGK8h84F2w_pK8Zjko8ssKZmVxKFqTOuHL0mLBzMY,694
258
+ helm/benchmark/metrics/tokens/auto_token_cost_estimator.py,sha256=HtL3FtgDK1KPjs2FhH-FbmarT5jMbfx7ZQODrmRFA9k,2148
259
+ helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py,sha256=i715T4OW9yng-eJjsb8Qip5JFuEl4x-k9adnq7O164w,552
260
+ helm/benchmark/metrics/tokens/free_token_cost_estimator.py,sha256=PiraoV3WtAYtcF5NM9sFEGHrFSxMqasdJDVgDIgk80U,490
261
+ helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py,sha256=sa7Cu0S9IPF35puSVU-gYnLg1uXEZYAdRyKmCc-_5ss,1549
262
+ helm/benchmark/metrics/tokens/openai_token_cost_estimator.py,sha256=CovkJ4zeVn89bjno2gP0K8ix_Ie0EC2tUJLHLCEl378,1427
263
+ helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py,sha256=n9f2rcgaNHROORvSYjULXC_LEA4KZZjs8wASk0vAG7o,1100
264
+ helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py,sha256=Gk1ihZsT-EhcTyMsbmNWPP2Z6FlS4nRYOpq7v41f3j0,2657
265
+ helm/benchmark/metrics/tokens/token_cost_estimator.py,sha256=fTGUfhHV6yMwpTkCEMTGMxKO8jskqJz4sAtwXT6M_C8,425
266
+ helm/benchmark/metrics/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
267
+ helm/benchmark/metrics/vision_language/emd_utils.py,sha256=KdZdcqu3eo016FdAjAm_83v92-wWuR90EPsTogfTcok,15196
268
+ helm/benchmark/metrics/vision_language/image_metrics.py,sha256=RgKAn7ftl4KCZ86V3zO_LUstNbc6Lla-0hdQq77JDXw,23841
269
+ helm/benchmark/metrics/vision_language/image_utils.py,sha256=4E0NYh09O6-5sGhAPo6KZqYaZfBpCtuYbD3vLt-wQzk,3755
270
+ helm/benchmark/presentation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
271
+ helm/benchmark/presentation/contamination.py,sha256=PiIdcaD3-xfExjOmyL5q4Ao2ASa-OlScJAB9u1Zxe7o,2811
272
+ helm/benchmark/presentation/create_plots.py,sha256=T6ewj8rXZfRlqg01bgbhdU1rsABK4xyrLpruhRG-7Fc,28691
273
+ helm/benchmark/presentation/run_display.py,sha256=tC1DciLvDTQJog4BDo8StWDdX7DbBkhrG2sX_SwXSPQ,11838
274
+ helm/benchmark/presentation/run_entry.py,sha256=J1QgLOP99N7N4bs7nzXWxyU3pOd-a1j8xwL9ag1nP_Y,1158
275
+ helm/benchmark/presentation/schema.py,sha256=cblGmgkhuqQRWPh-IT75u3Il_-SVXipeq-mh64lvgWY,10947
276
+ helm/benchmark/presentation/summarize.py,sha256=Y4rNMgnQYEwOKX8Syd9R0HybjnaW_tJQZcWF4ZFrHvc,59749
277
+ helm/benchmark/presentation/table.py,sha256=-foH1BIfMiD6YvpwoGJ910CH7Hib-_pYtHH1hE8zwNc,2904
278
+ helm/benchmark/presentation/test_contamination.py,sha256=RlihBOF6vx2tKEj6_EMnJojTYoStx0FUeJSLT1bdf8w,509
279
+ helm/benchmark/presentation/test_create_plots.py,sha256=5PPPegMTdBZurxyyUxI4rN13AVsjV3eQrwFqlobJ8UA,1286
280
+ helm/benchmark/presentation/test_run_entry.py,sha256=4n484sSYT0gQ4WVt67Fs3ctKa4vi97hI32O5XXxGY1o,794
281
+ helm/benchmark/presentation/test_schema.py,sha256=6mq6CeAOLW2Kxi1lX_ZW8QCVqVR73XImR8ylcRGFkBE,378
282
+ helm/benchmark/presentation/test_summarize.py,sha256=GzZNwBDybpstzl6wT0Rgqn75N9iCNrUIzrdjOfUolu0,6317
283
+ helm/benchmark/presentation/torr_robustness_summarizer.py,sha256=SmMOZWCQ-KaJBp78otwvAeE1btWignyWalaQ8QG87r4,8242
284
+ helm/benchmark/run_specs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
285
+ helm/benchmark/run_specs/air_bench_run_specs.py,sha256=K86SqpINMBOiLIpuHz-jwlQL3SrH6n6WbqjD90i4LQQ,2231
286
+ helm/benchmark/run_specs/audio_run_specs.py,sha256=g6uncT8dIK59qU3aEcyvpPLiblx3Ks8yCUY6s8IMO9U,21957
287
+ helm/benchmark/run_specs/call_center_run_specs.py,sha256=QhRQw91WblB9UaB319XNCO5K8PX8Riiza41Ym-1CcRU,7044
288
+ helm/benchmark/run_specs/capabilities_run_specs.py,sha256=sbqhIj4AoujV45erwoVK61lWdlkjg4qssmGlu0eSr1U,12067
289
+ helm/benchmark/run_specs/classic_run_specs.py,sha256=1NYeYIwC2F7EjkPEPxNoFb3Ap6BUcUJK_hxBKq4lzt0,56144
290
+ helm/benchmark/run_specs/cleva_run_specs.py,sha256=lEIHEqQY3Efx-sl2Z6Rq9Qq_1HEWHqFYuUkZbGvq66s,13387
291
+ helm/benchmark/run_specs/decodingtrust_run_specs.py,sha256=7slILDS9f0_Z0y-Pz5xEspoGQUmOCOI2K2r4XWUVsm8,14428
292
+ helm/benchmark/run_specs/enem_challenge_specs.py,sha256=5UWeP2bsnwCHMMXI3DFRMUPKcnJ9_EL01qPUthbWIvE,1351
293
+ helm/benchmark/run_specs/enterprise_run_specs.py,sha256=J6NbdgDO3sFvBf-Zqzug05T_JbFIk1Vx50QmkDG6QHc,8966
294
+ helm/benchmark/run_specs/experimental_run_specs.py,sha256=pqK3_yD_2Qw1OWjj0biiV1G2BlZhAVnTPcUEbLnz2Wc,6765
295
+ helm/benchmark/run_specs/finance_run_specs.py,sha256=5mwb7GbAcSLVZiumqCiAr9dr8qBYApkEt5Oben5CFXs,4371
296
+ helm/benchmark/run_specs/heim_run_specs.py,sha256=Pt1eVbzvwZ5EXq8WB2b3XYw62SWYN_i1P_H3oE4i8KY,22096
297
+ helm/benchmark/run_specs/imdb_ptbr_run_specs.py,sha256=nkW5A_xeD5kCKeJVxsL8RFS8r3UpP_WCcwSdMh2s850,1215
298
+ helm/benchmark/run_specs/instruction_following_run_specs.py,sha256=GElJhgbQhlZMYSAM4YyGcYq0pqycR32kBCoHqG6m-ZY,4177
299
+ helm/benchmark/run_specs/lite_run_specs.py,sha256=8OkL9g3wQBG96g0ijGZ9L1Trb59b7VPDyYMqvA3hXfE,11129
300
+ helm/benchmark/run_specs/long_context_run_specs.py,sha256=A1ysL7pmyvCmfnokZaBSyWEKfpMh9XCaOejFqABwL38,3033
301
+ helm/benchmark/run_specs/medhelm_run_specs.py,sha256=97fWtee-VpnS9ydudc3285kHayK-JYhIM5j6qZX6p8A,40440
302
+ helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py,sha256=kenpGGMK1XXaNtvNXsshPvdvN9ubv1sOfaPdjFM4obA,2034
303
+ helm/benchmark/run_specs/oab_exams_specs.py,sha256=ws7Vppo_zJvxKqQ_sNhm9N7-5eQbX2CBkcDI5c_sRG4,1658
304
+ helm/benchmark/run_specs/safety_run_specs.py,sha256=3X6tYaq2SlRsZs9q6SCtBUgjNEpOwUtV6M7iY2Kowm0,6807
305
+ helm/benchmark/run_specs/seahelm_run_specs.py,sha256=R3mg4_OoaRizZ5n0FHcUQpJLny3j-ulBlHzOyF0a0Ok,23904
306
+ helm/benchmark/run_specs/simple_run_specs.py,sha256=0kK_e8U4JUWZ6wO4N-GPFRE1iGT4ilvSMUGfirvpIE0,3837
307
+ helm/benchmark/run_specs/sql_run_specs.py,sha256=JWCICELKi81m11MggyR6CJNl3vpWPwk4kr8DZSsWvj4,1965
308
+ helm/benchmark/run_specs/tweetsentbr_run_specs.py,sha256=qogc-fb83Rh1DooKKaskhak52ycvu8DAnhabw9rc7yA,1129
309
+ helm/benchmark/run_specs/unitxt_run_specs.py,sha256=4Vbsq0MPpSe4cIJOXzeVpMm60N9Qafa2R85X5BeFQew,1873
310
+ helm/benchmark/run_specs/vlm_run_specs.py,sha256=qjo0YzyIKq8UIbkKIUhHYh7iErPDQSG76_m-5kiPKEc,36648
311
+ helm/benchmark/run_specs/winogrande_afr_run_specs.py,sha256=dhOm8z6Q_ZpnzYKrsS0nEbRQPWs_phkXxmL5pxCJzQA,1853
312
+ helm/benchmark/scenarios/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
313
+ helm/benchmark/scenarios/aci_bench_scenario.py,sha256=u-Vo3J16o1d3GPb3qqclYv4mzSaPOa_RblmZbYV_xik,5345
314
+ helm/benchmark/scenarios/air_bench_scenario.py,sha256=B6_WMowLFe4gWfnoFA_yrHe0kagbIkZabEnK4kGGqSU,1884
315
+ helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py,sha256=jnUGbppDGEsbe5xoJjmv7nW_RvwPIYm6cwSULeqk2Fk,5133
316
+ helm/benchmark/scenarios/anthropic_red_team_scenario.py,sha256=_OWE33eVRaZI0gmfP7bLd572uOi_6jb39z_J6nkcvfg,3182
317
+ helm/benchmark/scenarios/autobencher_capabilities_scenario.py,sha256=cx5Um-crMG3cNHc8tX51r--K7sVYyM9WbhEqnrb9bag,2503
318
+ helm/benchmark/scenarios/autobencher_safety_scenario.py,sha256=MFt3f5baN5r-FmzWZfUChGR1mX_PUB_5hxoINac_Whs,1854
319
+ helm/benchmark/scenarios/babi_qa_scenario.py,sha256=SyM6RP4v08B1PjumkdQnuKrM9L8SyK0bXbx-LgmyTPo,5067
320
+ helm/benchmark/scenarios/banking77_scenario.py,sha256=dtiM-Q_pMDWqkLi-hgl0tH-aGuDdgHkXgweE1JqrPYs,1883
321
+ helm/benchmark/scenarios/bbq_scenario.py,sha256=2A7MX6iMAZHuPpH9RePi9rVBeFRmGdiE6GlqZ5uNdAM,9603
322
+ helm/benchmark/scenarios/big_bench_scenario.py,sha256=g1TLoDTYQAe-efzQnV9J5UBCaUfN1jWTTjTd-ZJQmVQ,8146
323
+ helm/benchmark/scenarios/bigcodebench_scenario.py,sha256=zQLv91uwfGAR9N4jm_iBUmYOVFj9cL14Nj8aqoCqUM0,2004
324
+ helm/benchmark/scenarios/bird_sql_scenario.py,sha256=n5elzanKEX9YclAl2l1y33aCjihTmaw1VF_ZsAU5IaM,3613
325
+ helm/benchmark/scenarios/bird_sql_scenario_helper.py,sha256=FIwPk-dwfTY-8gDXeAiTZbfbS0Oe1OuWRlYiJOhZwk4,4664
326
+ helm/benchmark/scenarios/blimp_scenario.py,sha256=9Ge3QKRgtVHpWy7aehZVKiO6JrsxK7zrEdtqAb4zxtQ,6284
327
+ helm/benchmark/scenarios/bold_scenario.py,sha256=iE9drB9IeXfRn3xvLnaQi3-nJAp-bV1RE0GJGnp9dJc,4130
328
+ helm/benchmark/scenarios/boolq_scenario.py,sha256=wPETIu5jcI4jgP5GoFa_xi4SsvHtS9gxQ5TD8neHmdk,8037
329
+ helm/benchmark/scenarios/call_center_scenario.py,sha256=19J2N57WnUkPMGRRbJyZak8YCeMTRwD3BRK1SArQlL0,3037
330
+ helm/benchmark/scenarios/casehold_scenario.py,sha256=QSe0D3KQJhlTOo6kM9OHwdKy6NlclsFGRVCAB3mTG7s,3174
331
+ helm/benchmark/scenarios/chw_care_plan_scenario.py,sha256=y0B1g8wMyjK7zncJjaHUBSbvIK_4DNiAVE-Xk8KBsP0,3695
332
+ helm/benchmark/scenarios/ci_mcqa_scenario.py,sha256=slZZT74QI3OMQAgT-ybcR_xVcRDoopXw6mMu4iy3XCY,3074
333
+ helm/benchmark/scenarios/civil_comments_scenario.py,sha256=pnZU2U_cYFYOJmlmwTehHU5oLIPx_Yg8Ayxinroh4IQ,4875
334
+ helm/benchmark/scenarios/clear_scenario.py,sha256=-r4YIQLKgbjT54J96urcxEcQ1bhxjKVtfyajuFOaEoA,5915
335
+ helm/benchmark/scenarios/cleva_scenario.py,sha256=yPIiToKow76YMc0EDYeqQEPx-9a_6Bm3w4S1IsRRV5E,57987
336
+ helm/benchmark/scenarios/code_scenario.py,sha256=lSbZWw67ie9osOjXDZukj3EEZGa3L6TrMvTg--IbuxE,12520
337
+ helm/benchmark/scenarios/code_scenario_apps_pinned_file_order.py,sha256=KC-5MQ-d8Nn46aDN4FaPxmd6yk1DtVUmVR-CIZsNCp4,1738
338
+ helm/benchmark/scenarios/code_scenario_helper.py,sha256=TnXAlY-wdAFwIDylFItf0z7HOu93WD6dNThwzZYe330,5904
339
+ helm/benchmark/scenarios/commonsense_scenario.py,sha256=yZ6n9aqOi7UWY3q4uTDNc2JRNZxaBZPIp7n_Snt_8g8,9511
340
+ helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py,sha256=gKEwqHDD8KlKmW8z3xAxSIGmALTXrRRPcoDUzbv_IXg,3854
341
+ helm/benchmark/scenarios/copyright_scenario.py,sha256=FHzUYEabj-BTKl90fgq7jSCq5_Yf9cO9MA9djn50B1Q,3697
342
+ helm/benchmark/scenarios/covid_dialog_scenario.py,sha256=Vnxfn6EKwN-KR1vH-x46YHUC5jf7UAOv7zsnXVHYmZY,4032
343
+ helm/benchmark/scenarios/cti_to_mitre_scenario.py,sha256=pfHAteKXLNUrhKyAYk6m9j-d7iuEgz58o15xukp_GFQ,10260
344
+ helm/benchmark/scenarios/custom_mcqa_scenario.py,sha256=rgdHsSh8QknlcdGfZQ4VvqBUMLfTTHaNolCv4QgWHzE,1939
345
+ helm/benchmark/scenarios/czech_bank_qa_scenario.py,sha256=ZBfkUYlIa-BagRVBf97RoyLfEloAjnM0RPv5wmEWueQ,4406
346
+ helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py,sha256=vOUE5-rj_Wr6m7n76knte-kCMsphb-SSq9LraYf-Dh8,7933
347
+ helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py,sha256=9qo3l44aby1EfQqohh1M2DVtHXqY1fuvj1XT3_n4hBk,5588
348
+ helm/benchmark/scenarios/decodingtrust_fairness_scenario.py,sha256=rAOZnFSxO3ENOvcNz688P_f3Y7NzdwiWgoYTNvAaw3A,2866
349
+ helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py,sha256=qhzqW614WnsiyN7TiHUdZY_NpEdW_iMO0AMrLK8DmK0,14116
350
+ helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py,sha256=RSigvRdqjeFTwFfXNmslz8zyAGSmLf6UtBDA4NrQBCo,8304
351
+ helm/benchmark/scenarios/decodingtrust_privacy_scenario.py,sha256=zaXn4sRPUEZiqPoudiDT1xHMV2DaiEXOOTz3qB5q7Go,20143
352
+ helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py,sha256=NjutVTOVVze-IJniRFecz8gqh_BUpuJG3-BUboTGKRw,2933
353
+ helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py,sha256=EYKoXDWMesbY5dCNY-N0eYMRL0rjEfGsuS_TkeD3Suo,2952
354
+ helm/benchmark/scenarios/dialogue_scenarios.py,sha256=yXCMZegzlgL0CXTY1W5lXdkFFHicUvq4z7_284MfRpw,5778
355
+ helm/benchmark/scenarios/dischargeme_scenario.py,sha256=sTh3bj3dqvh20FCn7bGMycFzH5xphgespVS063XZ_Wg,6759
356
+ helm/benchmark/scenarios/disinformation_scenario.py,sha256=0T7LhXguzBP645Fruc2udfTaMuy7XGtOEMJKpFMIFRk,8565
357
+ helm/benchmark/scenarios/dyck_language_scenario.py,sha256=hygFPTcICGUEPwjtxULLKBSbuBOXLYpozIgiGcT__W0,9379
358
+ helm/benchmark/scenarios/echr_judgment_classification_scenario.py,sha256=IqODoUY1-zJD1KW4Qkg3VwJcUeeLgGUKThr62bW-wx8,4915
359
+ helm/benchmark/scenarios/ehr_sql_scenario.py,sha256=ufrY7zmeXlgOxsq1Sr0x0vhR7xbL6FTJJWiM0pzwIpg,5119
360
+ helm/benchmark/scenarios/ehrshot_scenario.py,sha256=ROPfWBDOAaHxcnnh5eGkCh-qhwvpxORcGmpA8DrjD0A,68721
361
+ helm/benchmark/scenarios/enem_challenge_scenario.py,sha256=sxYXKvf-mGNqctTkemwI9rrA_Rg2xA8mz3_W3TIfzUE,2147
362
+ helm/benchmark/scenarios/entity_data_imputation_scenario.py,sha256=4V426oOuexGg59q0djHCTQjQmqYgyLT191Z5fayubmU,6681
363
+ helm/benchmark/scenarios/entity_matching_scenario.py,sha256=kzzDaoVikL2P7Z-17EkLIVR_W7IHcNVerUts2oXDKLA,7111
364
+ helm/benchmark/scenarios/entity_matching_scenario_fixed_random_state.py,sha256=TklbX7Kx4y-estV-YHUbI5O08q2qCZRrOmX9D3gZS9c,2193
365
+ helm/benchmark/scenarios/ewok_scenario.py,sha256=vrbJg9vakAxE6n-1jURUcwb-ihrsYoY9e32BpnEGDaQ,4684
366
+ helm/benchmark/scenarios/fin_qa_scenario.py,sha256=Dm_kGOivaxiKVhcqFgN8pRPs1eqm2LdBZxWy0yFhFuE,5958
367
+ helm/benchmark/scenarios/financebench_scenario.py,sha256=cHMljdg0_9HA3FbwcwwMt3DR9rxl0jkyFN9jNrUStSE,1956
368
+ helm/benchmark/scenarios/financial_phrasebank_scenario.py,sha256=dMTfI9MRHKXnECsXOIY8xvX6w5vAPEIa6A7TYyIu2Fw,4457
369
+ helm/benchmark/scenarios/gold_commodity_news_scenario.py,sha256=-O4ilLwNcycmpQG5h_5WtQP7yJEr4mjWjKBe2eNP0uY,4806
370
+ helm/benchmark/scenarios/gpqa_scenario.py,sha256=369E0JvaR12EcgcEFKKRcDw1iztt4sb8ghIsk9Brzi4,2884
371
+ helm/benchmark/scenarios/grammar.py,sha256=Pb9vEP_0Ki87UdQCj1ym7QWJ24M4DRP6TXB5d3GnhLs,5597
372
+ helm/benchmark/scenarios/grammar_scenario.py,sha256=Hz59gp5ivH3tIP5UAcHZbnk8pBX6GhIABSQlG33gIRI,1502
373
+ helm/benchmark/scenarios/gsm_scenario.py,sha256=QIj0QK5ncF31ES0GUlxbdBk6SIiJJnj5wzamj0do0tQ,2674
374
+ helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py,sha256=8_ShEuOoEGu7iRE2b0tgi-cfBrCPF9k1L-Pgb__n3Bg,2005
375
+ helm/benchmark/scenarios/harm_bench_scenario.py,sha256=CBo_AfbtHTlvJdsiquP0EDTKApVmDZc7EW0VTENNAfQ,2478
376
+ helm/benchmark/scenarios/headqa_scenario.py,sha256=TufgA1tjcEyq8vQ6Wk1oYxYXhSm0pjxvG14lL3y8GAI,5417
377
+ helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py,sha256=iv1khpdiWW0Z7lshyWOhhjRfYFdAU6etN8X5EDEQCrc,1302
378
+ helm/benchmark/scenarios/ice_scenario.py,sha256=NCbeqvpDFIIG7kSCrJrS-Z9S3iG2THZ7HpAqghpi_y4,16725
379
+ helm/benchmark/scenarios/ice_scenario_pinned_file_order.py,sha256=fuirubIdi-rkJMfSd7YoDdBX2q0f5K7GGTN4XVapAUY,1613
380
+ helm/benchmark/scenarios/ifeval_scenario.py,sha256=SYn9itpFG0tlWSayf6v0P8bRgdtc-BmOV1dF-4TEm-0,1675
381
+ helm/benchmark/scenarios/imdb_ptbr_scenario.py,sha256=laq9UwyvBvZZuo54rf-8SdKTLrMdDHTdGWJ4TdC8Eng,2340
382
+ helm/benchmark/scenarios/imdb_scenario.py,sha256=qHXd-QIXTCBq8rWW3N5I2Rvg6Pz9v1zFhZkwc73w9io,6259
383
+ helm/benchmark/scenarios/imdb_scenario_pinned_file_order.py,sha256=fjW0Gkzg2Y3IAbtYJ3KC7MueWd9U8h0tlcBCqxYmRrM,1621
384
+ helm/benchmark/scenarios/infinite_bench_sum_scenario.py,sha256=jp5a2p_yqlCQXmhJRsqpTiKN8EGZi8Xyw3h37elb2OI,2785
385
+ helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py,sha256=F-gDO6r4GPBJTLirhF5noRaV0edvoIT7tiIDlovBFfE,2253
386
+ helm/benchmark/scenarios/koala_scenario.py,sha256=A5M6SD7Jjg7r9QlbHCtMaydBe-wpOtB6oc6gFXuZ47o,1389
387
+ helm/benchmark/scenarios/legal_contract_summarization_scenario.py,sha256=xjw3iKRf8P50Wo58n7ssnFiWHR2QFehzHlZhh9P1XKs,5374
388
+ helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py,sha256=q_iezJo23_HNNoIXYT4cLYCbwNzLYJx6uvxgPSE5bQA,2804
389
+ helm/benchmark/scenarios/legal_summarization_scenario.py,sha256=BFK524H7uLfz_ZURuRS7KrhzRCP-WyhIcOgdcBrsldA,8709
390
+ helm/benchmark/scenarios/legal_support_scenario.py,sha256=Ty93M8yTr_VEHomJ_36htqqBDZZKWI7PHtXA4qkSJGk,3990
391
+ helm/benchmark/scenarios/legalbench_scenario.py,sha256=Yfyouxb4ir16zlBea77Xzjc0BlcYPkiXoBoVI38FXwM,4779
392
+ helm/benchmark/scenarios/lex_glue_scenario.py,sha256=-3fsSjTXjgRN96Hl4GzDIMB_dlxSR9NR0ATUb-CiU3w,10357
393
+ helm/benchmark/scenarios/lextreme_scenario.py,sha256=gVTHoMYX6Q_Itt5rOVO5lYmqWfAtuuf63CnKAF8b_ak,20461
394
+ helm/benchmark/scenarios/live_qa_scenario.py,sha256=TnWaOPOcA4U1_8JdahQOUZ9KBj0MpMf4BcK2TDBl3BE,3666
395
+ helm/benchmark/scenarios/lm_entry_scenario.py,sha256=kQTnj5gKJmDxCgynmzQOmghwNySpna7aTY7K7RPD2x4,9109
396
+ helm/benchmark/scenarios/lsat_qa_scenario.py,sha256=2VUJ36vHUZp6fZuLfRIuPSsU_K6Z3Im2ums06sZENqo,6153
397
+ helm/benchmark/scenarios/math_scenario.py,sha256=UtNj0UaCxt0RjM-uwD_Evm7SjKnvMlfCt6K0HQOAVC0,14377
398
+ helm/benchmark/scenarios/me_q_sum_scenario.py,sha256=7DOqQmO70BpDeJy_S4fJ5i2UcCH8tunxzjFgTIim9bQ,4062
399
+ helm/benchmark/scenarios/med_dialog_scenario.py,sha256=A-OhCSsbyrkIiyScfGXf5mWJJ9mUXhWQ1S2hHFUYxQk,7254
400
+ helm/benchmark/scenarios/med_mcqa_scenario.py,sha256=XEipvuIA-QoyZrtlm8nnaPuyZzdDaeTskAhnseD3Q68,5096
401
+ helm/benchmark/scenarios/med_paragraph_simplification_scenario.py,sha256=0Z1JrizLygjd9v_LLFMk8uZ805IWjJPvg-ZvPVhtMm4,7652
402
+ helm/benchmark/scenarios/med_qa_scenario.py,sha256=m0W-FgFi58psLglZyQy_ouMQIDP-2j3aL7uInkdVtms,4478
403
+ helm/benchmark/scenarios/medalign_scenario.py,sha256=yNaEyCGdeMMTZmPQcAyQeHFDD3mHZVIrauCC-WEuiZQ,3040
404
+ helm/benchmark/scenarios/medalign_scenario_helper.py,sha256=wFR15zRuuvtGc1MXaecndfHH-_uCLF6O_3twpqXZOLo,15292
405
+ helm/benchmark/scenarios/medbullets_scenario.py,sha256=mAQ1-jgsbd5hM78C0E5cgFs6fPt2KYErdemZBTQ12iE,6447
406
+ helm/benchmark/scenarios/medcalc_bench_scenario.py,sha256=fq1qBjGkL9EA7wQBJrg_V5fIewNgpiQVK70-qShXnaA,5374
407
+ helm/benchmark/scenarios/medec_scenario.py,sha256=uS567bCzOQx08euNU6vA01YqxxXadIcSqModkqT22LQ,4969
408
+ helm/benchmark/scenarios/medhallu_scenario.py,sha256=Ed2JesQzU41P_rv_9zgBnQCGD-EEkG-EkIBw1qEIXbI,2223
409
+ helm/benchmark/scenarios/medi_qa_scenario.py,sha256=JtsRryV88nFy0UlFaUuR2QyEdYkY2vLYMScvKC9ndTQ,3770
410
+ helm/benchmark/scenarios/medication_qa_scenario.py,sha256=YJdluvYLb2_m96JkcCOTBIQOPY2h8dfmjTbJ__7Jydk,2266
411
+ helm/benchmark/scenarios/mental_health_scenario.py,sha256=nGTXWYWfeO_t1u5va4u-S_OD70qo9IKbCHk5vLEBhT4,4518
412
+ helm/benchmark/scenarios/mimic_bhc_scenario.py,sha256=RLUV_ydURfA6kTUJQ48TtaYzIYu8TSY9vl_ahNtSa7c,3777
413
+ helm/benchmark/scenarios/mimic_rrs_scenario.py,sha256=4baXBaW_zcZU5RhQM3JsRrzHGhFvLwGiOu0irZShjps,3401
414
+ helm/benchmark/scenarios/mimiciv_billing_code_scenario.py,sha256=IECc3j1RSIZeO-JqTyxOBFCbc7ovl4XsYMdGL1k6pcw,2355
415
+ helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py,sha256=-OkPMRyB7aO6QBFwoTl6a2rpzcoHeEl84tqz7k9kpCM,2982
416
+ helm/benchmark/scenarios/mmlu_pro_scenario.py,sha256=zUY0Nb8PzwxvohS1C2Me9utRfFM-8OLr0CmUfyjiVgc,4013
417
+ helm/benchmark/scenarios/mmlu_scenario.py,sha256=_5cX2uI7CxD7K_GvO3MD8CRJLuN4EzS2o_EFvbrfjSU,3855
418
+ helm/benchmark/scenarios/msmarco_scenario.py,sha256=-l7_rIMQjMWcpTyn6dGotmNJ5XxN_Ze8dEJyv5ftWFA,34050
419
+ helm/benchmark/scenarios/mtsamples_procedures_scenario.py,sha256=ftRkhL8oLZgsDvLzI8ya5jv9xv77YcDT9TU9JZBss8o,5333
420
+ helm/benchmark/scenarios/mtsamples_replicate_scenario.py,sha256=eB6PT0wwH521r6uXvoiQEo7fZQQcKATuElHuPmyVyW0,5301
421
+ helm/benchmark/scenarios/n2c2_ct_matching_scenario.py,sha256=DAN3xeupuZEzxubf60C938Hr3WjhkzgaI1MbNwgPu8I,13194
422
+ helm/benchmark/scenarios/narrativeqa_scenario.py,sha256=MiSq0UnUllJxHFU2gO7m4vr_vmulavJxc4ruZhsAt2U,5632
423
+ helm/benchmark/scenarios/natural_qa_scenario.py,sha256=g-fP8L1lXs7zwNVQOc0ZUnbYkCyElQtLVt5fe5dtvSE,12564
424
+ helm/benchmark/scenarios/newsqa_scenario.py,sha256=G25VYaLrV_JyyoT0jpzJ6p4l5qsOydm8rlzTvSptNKQ,7284
425
+ helm/benchmark/scenarios/numeracy_scenario.py,sha256=E1WkVgqPiZwaKuskD5iVwoypbG3DKI_r3bPXPqZ_SSk,30885
426
+ helm/benchmark/scenarios/oab_exams_scenario.py,sha256=vbjUzQP0zU4ckvMbsk4lh24NddVWbUAtfWmsq1h24_w,2101
427
+ helm/benchmark/scenarios/omni_math_scenario.py,sha256=5qb2cO-Ibb3kDbwYvkzsoU_aOsoKV3ROLgZbi83OyGU,1955
428
+ helm/benchmark/scenarios/open_assistant_scenario.py,sha256=zd8T6eLOlYMZiFyKrRjc-EPwk5_KpbBedAcKDbZ-TdI,5609
429
+ helm/benchmark/scenarios/opinions_qa_scenario.py,sha256=JK39tq306tKe0RDBDLz1AfAdZwNjK_Ng-rHvu6bTRY4,7395
430
+ helm/benchmark/scenarios/pubmed_qa_scenario.py,sha256=AmgdTGffaxNOJ_xDqA1ju5jXjlvEVdx3Gz7Cp7mqsd4,7789
431
+ helm/benchmark/scenarios/quac_scenario.py,sha256=RpJpOPbvhB0jv3R91Odc20LcNyZsny9J4IF24GNEygQ,6689
432
+ helm/benchmark/scenarios/race_based_med_scenario.py,sha256=FQl99ttwk-SQdix9UpOCG1nI92JHuemLRgWjFTHGiTA,5295
433
+ helm/benchmark/scenarios/raft_scenario.py,sha256=Yk56dUMqDGXpp6SxoGWhyxa4lAIniSQfivjkoPqMuFA,4644
434
+ helm/benchmark/scenarios/real_toxicity_prompts_scenario.py,sha256=zpQthgDi-AyEgOUFO5F0qaWCctLEI5WGHBEGlPEVpqc,2424
435
+ helm/benchmark/scenarios/ruler_qa_scenario_helper.py,sha256=bonCyKO9hCSce4EQCTwYAv0pgRlBYwlXAmEHl6_ljHA,6323
436
+ helm/benchmark/scenarios/ruler_qa_scenarios.py,sha256=bhm8IINDa1asCFHTLkB0BztzPvww0Uy0Lv6_G9By77M,2876
437
+ helm/benchmark/scenarios/scenario.py,sha256=kSy7tmtFeC6-QSEsBuvlrMTA1PB6fOY9jycMld-vBVM,8592
438
+ helm/benchmark/scenarios/seahelm_scenario.py,sha256=GA46ShNGUjVdMLK0ZbN4vPuGEWFQsDPJXEGHQbs1qf8,78150
439
+ helm/benchmark/scenarios/self_instruct_scenario.py,sha256=3Kvi3pLL6eGOEezjoQoGv9c1UxKiRVlFmILKzqst4pI,2309
440
+ helm/benchmark/scenarios/shc_bmt_scenario.py,sha256=bbl3XczUrPW_mzHPtNxSC2SHRKBzgZP7RueIi8vc5y0,2362
441
+ helm/benchmark/scenarios/shc_cdi_scenario.py,sha256=AJlKAiSoUJQ4N4WeeBjqOfYNsl2teq8G8B_8pOZOW8E,2422
442
+ helm/benchmark/scenarios/shc_conf_scenario.py,sha256=AQUukzN3lT5dSQWyc6t6ZSZTEIEvOj-TC7K8BA1Q5fs,2535
443
+ helm/benchmark/scenarios/shc_ent_scenario.py,sha256=urspIw8idVLiuK__cG3IvnYnky4AZWiTWzK2nzjwAVg,2530
444
+ helm/benchmark/scenarios/shc_gip_scenario.py,sha256=ectxVRWal0LnqFlBsRGhtJgWN5RZls7tHAVDeMzcW4w,2337
445
+ helm/benchmark/scenarios/shc_ptbm_scenario.py,sha256=bAHIu7YKwUhwvGJuS5hplo7JedwLFGxyLub_ALLZo98,3077
446
+ helm/benchmark/scenarios/shc_sei_scenario.py,sha256=udGZAIQ4Fpi_bV6WsuuQyIpSY74qc8VIc1MP9yRFIRs,4213
447
+ helm/benchmark/scenarios/shc_sequoia_scenario.py,sha256=Z_7LM-RHuwWGdwFwCAgAQvIz4dfXNE0uHhLM1_9m7n8,2410
448
+ helm/benchmark/scenarios/simple_safety_tests_scenario.py,sha256=sjIHT5NZlHv_IcXr_15-pOiBUPKKwykyH-QpMfvrHAY,1247
449
+ helm/benchmark/scenarios/simple_scenarios.py,sha256=ersSzp9bFEFfpJ-SNy368AuonwswLnuyA1n7FOgkw4U,6459
450
+ helm/benchmark/scenarios/spider_scenario.py,sha256=mhiV3XWGwpnIQkaHFM_rvZlrwE7nqS12-F9t1eB8kdI,3306
451
+ helm/benchmark/scenarios/starr_patient_instructions_scenario.py,sha256=X7AY99m8Lv8bVpOg-Bzld0vyFzpvi6fOAqE3rstRfpw,3843
452
+ helm/benchmark/scenarios/summarization_scenario.py,sha256=WZnqhMQED6UBmRjHSboygdenLecOqIhvgdYVXzy6Q-I,6912
453
+ helm/benchmark/scenarios/sumosum_scenario.py,sha256=HG3wrKj5alV0a2aKb_nau8bB4oKDtTOLtdf3bx8h7sw,7695
454
+ helm/benchmark/scenarios/synthetic_efficiency_scenario.py,sha256=OaxEvT1H9VjOjBSw_yKs3dcYt33vFE_UARr-UIP9pBY,3120
455
+ helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py,sha256=pt2Aln_dX1YMSl-9hV1HJmwW90MC3fWwGsMxZg-Q-UY,16391
456
+ helm/benchmark/scenarios/synthetic_reasoning_scenario.py,sha256=7STCSHiHGIQ2aaN_PwDE5jXUJ-qcu8PaS4pC-pbOceE,8410
457
+ helm/benchmark/scenarios/test_air_bench_scenario.py,sha256=9o92CK57xxgPaA9Xt9uJPPie4Cxllzq-KbMt3G35UQ0,1320
458
+ helm/benchmark/scenarios/test_bigcodebench_scenario.py,sha256=q9FWJsxLJoFaB3PSMLjI_-YyPoZYusOsMPwn6X6NKXw,1304
459
+ helm/benchmark/scenarios/test_commonsense_scenario.py,sha256=V5Mq4cxWqU6j1U3icfIuzcnCZsZO7NTKLQgF0lEpdyc,924
460
+ helm/benchmark/scenarios/test_czech_bank_qa_scenario.py,sha256=bZNLEGu58iHmutGlSp-2uVC2931TO6Rxw7giqFh9RHY,828
461
+ helm/benchmark/scenarios/test_enem_challenge_scenario.py,sha256=XfPkYaSwdGa63ToC_BLuVKTRSldWNBlKsZYK6CFzL3w,2000
462
+ helm/benchmark/scenarios/test_ewok_scenario.py,sha256=WY2vqbHF1120ht4PER0uviKMb2jnoPM3ff4KwvwcU4I,1291
463
+ helm/benchmark/scenarios/test_financebench_scenario.py,sha256=EFZLJXXBoyjlTiMQFaQ6MiYkve1lfQDjQWjn4BjqgAQ,1184
464
+ helm/benchmark/scenarios/test_gold_commodity_news_scenario.py,sha256=RO0NcIkJuujdPVO6tDygmDxhZ5YlmIIYlhwx9LeXlQs,731
465
+ helm/benchmark/scenarios/test_gpqa_scenario.py,sha256=QQJ_-nmujZBSmhBhikRUWznFJ4jHPbGDnUVCP_17poI,1884
466
+ helm/benchmark/scenarios/test_grammar.py,sha256=sPlA36sHpThbXgnGlXyOuqHfDPe2epIafmzIeL0nkoU,1364
467
+ helm/benchmark/scenarios/test_gsm_scenario.py,sha256=I-Sl8Sg8kmFd7u0zZbwbNmeFV1mQLuOHoQ1cQDDwovs,1123
468
+ helm/benchmark/scenarios/test_ifeval_scenario.py,sha256=h3CBg13VKwyb1Xaddwg2GWOzAXz4stK5lXdQtHenAw0,1646
469
+ helm/benchmark/scenarios/test_imdb_ptbr_scenario.py,sha256=8kfCkMRUMU7N4WIrWawFDoxaLB2iTvQ-sPj4RoE2Osg,887
470
+ helm/benchmark/scenarios/test_infinite_bench_sum_scenario.py,sha256=Jy8HDZPtYS48-bBFIStKaQtxvQv_GcAwh42wCYku0vw,1969
471
+ helm/benchmark/scenarios/test_legalbench_scenario.py,sha256=FqbgwBAhHWyTIUYSzI5FOnTDx0A3u1o2ANKa_6bfA4g,1212
472
+ helm/benchmark/scenarios/test_math_scenario.py,sha256=8Raix_ykxUENh7UREw1RhpM287oav1p59P1Dn2gXktI,829
473
+ helm/benchmark/scenarios/test_med_qa_scenario.py,sha256=Ekp6r5eYPkCxV3FCzVvLemKxlhENhelqdO0Mdhg5yFo,1515
474
+ helm/benchmark/scenarios/test_mmlu_clinical_afr_scenario.py,sha256=Jp3XXq6rL62CJSmSX8rimjq0QMjxT2d92PMUF8lzdac,1118
475
+ helm/benchmark/scenarios/test_mmlu_pro_scenario.py,sha256=v7A5CK5bUm-YTQK1kmqL8OGM-qsk5Hxu1ououH_rzNY,2696
476
+ helm/benchmark/scenarios/test_mmlu_scenario.py,sha256=mxEsTydKUOt8OD1Ei82nPgUFV1Tlvu5Z6drEMToEURM,1593
477
+ helm/benchmark/scenarios/test_narrativeqa_scenario.py,sha256=Rac_OrUpd2ruT95YvSrmoVz2Jpycgq3Roiyogm_0aAc,6420
478
+ helm/benchmark/scenarios/test_oab_exams_scenario.py,sha256=6iBsG_wkG1bpWY2vS4dw0zROTJkdzGxRtNeM60WfZlI,2071
479
+ helm/benchmark/scenarios/test_omni_math_scenario.py,sha256=vpK1OxWrgRHbP8hfYtEdR49nSdJHg_xFAfOApVfQ_xQ,1275
480
+ helm/benchmark/scenarios/test_scenario.py,sha256=HexTZBKphMDJbhIYj-HRCDwltPTDqHFHdT7FjPmu8Xs,2070
481
+ helm/benchmark/scenarios/test_simple_scenarios.py,sha256=9b-gtuRnd638q_JevVlEVsHzMZSzOe8j0FrUQmMyZM4,1736
482
+ helm/benchmark/scenarios/test_tweetsentbr_scenario.py,sha256=V6ZsT405ltgC3pYXW-FVN7Z4nGH8ZLiFfh_F9OPXZjw,789
483
+ helm/benchmark/scenarios/test_wildbench_scenario.py,sha256=pmQ87MNoGAXwAmPf0eoep5qf9hk6BPP2zzgzGuKXwzs,527
484
+ helm/benchmark/scenarios/test_winogrande_afr_scenario.py,sha256=LZfE4J42BZ7OF3BvfKgMWuCHpdw4-LpWnFiKyrHGXp8,910
485
+ helm/benchmark/scenarios/thai_exam_scenario.py,sha256=YjFsom1yiu-xBZ3SGenNuczVCwQcmyoITTMavGv-QEk,6069
486
+ helm/benchmark/scenarios/the_pile_scenario.py,sha256=X3GWABiJ5cSoZzeNpgNUVAz7_A9SyM5MhgpJseKpZow,5019
487
+ helm/benchmark/scenarios/truthful_qa_scenario.py,sha256=p3KAHk3C01TW7ya_XZIuK3JMJE51uoMpOnARV2UKgJM,6096
488
+ helm/benchmark/scenarios/tweetsentbr_scenario.py,sha256=ppugbPWd_3hHesLC52QbC-wUknctr9ZX4tmHefnPf6w,2879
489
+ helm/benchmark/scenarios/twitter_aae_scenario.py,sha256=wnP-zH38J62zmbdeOLzdU-E3iclbQPApgEk4AGyhdoo,2120
490
+ helm/benchmark/scenarios/unitxt_scenario.py,sha256=uL8Gni-Uw_eIp9xKQefp4J7XtKSttjJHzJE4USyoC2U,1930
491
+ helm/benchmark/scenarios/verifiability_judgment_scenario.py,sha256=2iCJplnxdR7NNKjhsLR5o51pL55Q0bcbjjWlvrk5lw4,6067
492
+ helm/benchmark/scenarios/vicuna_scenario.py,sha256=RFLUXx4zTfVPl5nT5j_DZ9TuHzk216PQcktomXqqR50,1685
493
+ helm/benchmark/scenarios/wikifact_scenario.py,sha256=tTIHk7-xEsi-CGTobcEdbsjVrtAXTZOeWXRVj0hOeWA,5856
494
+ helm/benchmark/scenarios/wikitext_103_scenario.py,sha256=k13TxITriwqoBrMzf-JzPKr5wHaC9M2A_HyxxBaASnk,3111
495
+ helm/benchmark/scenarios/wildbench_scenario.py,sha256=Qd9b1SC9ZtY1spf1vVuuFXXVxSJ0FlmR_DP7mIvAs8I,2981
496
+ helm/benchmark/scenarios/winogrande_afr_scenario.py,sha256=3SOVyrQ8D7Wzz06uSbczDE-IN4sjKSEAJ7Po-_-O6qw,3131
497
+ helm/benchmark/scenarios/wmt_14_scenario.py,sha256=1YYjz4x2RbYfJAXBTux9X30dxYTSC-YNngCCLhEiNfI,4646
498
+ helm/benchmark/scenarios/xstest_scenario.py,sha256=ndRNB5ApW4th5iltlmT9-Nfw9eTaVZQw5AMC4HZCI-k,1309
499
+ helm/benchmark/scenarios/audio_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
500
+ helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py,sha256=wufgORN0vPuTUnp7-VeCUoLH03C5MzSa-PMYku8D0P8,5626
501
+ helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py,sha256=8piJKvbTeKdxLTWnazp0_ydC6ESRHeb-Pj2ri-86U28,6619
502
+ helm/benchmark/scenarios/audio_language/ami_scenario.py,sha256=SH4r2YyW2kQ8r6-nSRI_F4unJC-l-lzikr2O7hMKgEM,4371
503
+ helm/benchmark/scenarios/audio_language/audio_mnist_scenario.py,sha256=kiUngeoAVOXfuKgqo96RgK_volpJUPFziu-cYDqT8WM,2685
504
+ helm/benchmark/scenarios/audio_language/audio_pairs_scenario.py,sha256=oLOeBGjQCa3hpzjhX2bNS6637VD9VF1KbSJri9BJ3PI,2698
505
+ helm/benchmark/scenarios/audio_language/audiocaps_scenario.py,sha256=PkVqQM1zX6ecXYk-Pz4YWlST3Hnla8NyeBHbuHvhSlY,2447
506
+ helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py,sha256=uoiB3mnkudRH_rY1qeZRgobYYZ0xDn93F1Mn6Avl24Q,6724
507
+ helm/benchmark/scenarios/audio_language/common_voice_15_scenario.py,sha256=CbcoGPW65xXRRkrDthotDfoVn51ozANG9s3LCsjxkLA,3706
508
+ helm/benchmark/scenarios/audio_language/covost2_scenario.py,sha256=3YiaQXuLGfths2XswRw30Vf26bO9jEW_kAj5wZQSOSI,5119
509
+ helm/benchmark/scenarios/audio_language/fleurs_fairness_scenario.py,sha256=OKawk6Mq6ONOxcttkk-qodeFkNet7nvP0UbeEu5EgJw,3079
510
+ helm/benchmark/scenarios/audio_language/fleurs_scenario.py,sha256=k8AFujDJYtH37Zaquy4TH8xYcxE62cvOK6DVDfp1TKA,9235
511
+ helm/benchmark/scenarios/audio_language/iemocap_audio_scenario.py,sha256=an4z2Ve7CpZwBoQEvuR7e7h0_Jbfor_Itj1FQtb4Od8,3538
512
+ helm/benchmark/scenarios/audio_language/librispeech_fairness_scenario.py,sha256=Gx6ITS2hblIa_KpNbYeP6GBAZxU54DkVKLtgk_LsjG4,3996
513
+ helm/benchmark/scenarios/audio_language/librispeech_scenario.py,sha256=ogMXxnyTG05tCyJ2d4hiuiVsbQvf4TbndksYeaJXl1s,3475
514
+ helm/benchmark/scenarios/audio_language/meld_audio_scenario.py,sha256=j1JFX0jGfcqX0QZBKSjYjDWo1jHJbW5Q9jHyOs6Kgls,4903
515
+ helm/benchmark/scenarios/audio_language/multilingual_librispeech_scenario.py,sha256=Jo_-3zC226iKGT-ac0JNMhlEccazMMiHbomx_qU0rxg,3098
516
+ helm/benchmark/scenarios/audio_language/mustard_scenario.py,sha256=9bpcvFtWq5Pd9i9X8iaY9jod3YcRqk88xnXfjwcNMoY,6130
517
+ helm/benchmark/scenarios/audio_language/mutox_scenario.py,sha256=bDCQbhsRDR6iQGNlCu_35kjmjGjuzjOIoraSncfOlOY,10277
518
+ helm/benchmark/scenarios/audio_language/parade_scenario.py,sha256=UuOa5cSrHh5n3VF_SuJp4cy1MxlI3uEKHLrNEhGuyuw,4186
519
+ helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py,sha256=oN4vBkElVzjccaEK2JFqoXMCGFTTHD0gcYwSDhvHTpQ,5438
520
+ helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py,sha256=Ar7IgtfZXFpsHJ76QacEB1KKwXVrOBE0BcSBO_GN2T4,2718
521
+ helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py,sha256=4M_gTWs4CoJ1Ce9dDFBTAe9dzSovpsve_sN1eco2V2A,3155
522
+ helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py,sha256=oaUeTgmr2AkSvEJYua4SItCbXsiK6cSSrIjlqsSQC7g,4431
523
+ helm/benchmark/scenarios/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
524
+ helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py,sha256=c8zcoGCOFqBGE4TAEx1uLsUmGXw_jIS8alI99ubGeDA,5477
525
+ helm/benchmark/scenarios/image_generation/cub200_scenario.py,sha256=7p3G4mJRc8QHR4Mw2GLsfAFuJcEe6OeZbezVhbyc55E,4103
526
+ helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py,sha256=yiaX_2Aut3hZdfggCsTCcTPOdraaX-cOjPHyE5D1lYg,9045
527
+ helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py,sha256=yAFh8Kn84b3RpHZVsVMYnk4GvbMgxB7oIPUzv5sp78I,3216
528
+ helm/benchmark/scenarios/image_generation/detection_scenario.py,sha256=E4tqQXmPSLutPYdk9Ngil7LteUwRe7p55MXSURxk_rU,3188
529
+ helm/benchmark/scenarios/image_generation/draw_bench_scenario.py,sha256=b4WcbttcjoIY5gcOMk6e7c_mqfjlQsfowo9-D_-wccY,3179
530
+ helm/benchmark/scenarios/image_generation/i2p_scenario.py,sha256=8VXVyWZmW6k4Q43GBDwqzYDj1pCKuAraGKaulZ_0MPk,2233
531
+ helm/benchmark/scenarios/image_generation/landing_page_scenario.py,sha256=0R1sAuS7txLxpaJOuvojg6dZEcRFGo4WabjX2ieo0FA,1361
532
+ helm/benchmark/scenarios/image_generation/logos_scenario.py,sha256=Sbc_vlMdHAr6AhJ6rr4TjBIMEbzcw_hdGhYGufHSlJc,10245
533
+ helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py,sha256=swcdlPAYYwtidMUIHWCW1-xxHH2QF3wm3IKDBkr6-DU,3599
534
+ helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py,sha256=kybgeSjY0nm3Wya2vQeoBtJGtHZra4XhT4VY7Izjxrk,1879
535
+ helm/benchmark/scenarios/image_generation/mscoco_scenario.py,sha256=KMzu0II1F8ajL7NrYBWe74UXKcLugg7bhubpSZR4TQw,4201
536
+ helm/benchmark/scenarios/image_generation/paint_skills_scenario.py,sha256=6nm_uo6rmkyBzbU0BntvLq9jhWlfunBEy9Vc1d7_RVU,2967
537
+ helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py,sha256=pTSd1XidvWwTPFpVinl28s6150a2w3iK4d1Ce81lijA,5165
538
+ helm/benchmark/scenarios/image_generation/radiology_scenario.py,sha256=7JN8OYap8kA36Od1_bZTkhZd-H9Qjw_dh4TIzr9UTms,1701
539
+ helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py,sha256=DoabanZhd-2MHFDZeR9EoPit0T2TvbVwZGUR0RfJyW0,2362
540
+ helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py,sha256=IB4_GbzQjjXBp-551XZ6PTNUCRX1jLcGfB3bVFI5lo4,3547
541
+ helm/benchmark/scenarios/image_generation/winoground_scenario.py,sha256=E2xPQNQzylDSmqLjjMkQB8D7A6g7bzqtSF4bXPgfVbI,2889
542
+ helm/benchmark/scenarios/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
543
+ helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py,sha256=s-sdEFVx2BgqDFTzuQCCQr4oXaYHUUeQpFgblcCU97I,3052
544
+ helm/benchmark/scenarios/vision_language/bingo_scenario.py,sha256=6YlGGGZW04Oy5A1-UG8JrN6jwR5eBuzrQ5qAise88o4,4108
545
+ helm/benchmark/scenarios/vision_language/blink_scenario.py,sha256=4UuUP704OYiR7RKw6p3eYjAYOVlSk5KtMJ5RuyEmYIg,5644
546
+ helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py,sha256=lfRHjhhXCo0YeDQe4_gfSHCzVKtqQVZ6DALLABcCmtI,4637
547
+ helm/benchmark/scenarios/vision_language/exams_v_scenario.py,sha256=pLD--gtL5q7jLSWQ8iwAdsiOrTJ_rBsLbwWMWKRhPbs,3853
548
+ helm/benchmark/scenarios/vision_language/fair_face_scenario.py,sha256=V6_1Kl2nWDRyHvwnKcSxkP0DChzKDBW0i_-t9oAxps0,4721
549
+ helm/benchmark/scenarios/vision_language/flickr30k_scenario.py,sha256=CDutFh1PHLyeMdJ9HojzYKE1zJidL9ktcsfn9uHNLZY,2612
550
+ helm/benchmark/scenarios/vision_language/gqa_scenario.py,sha256=k4E6JAN8a_KT1jjV2Ch3K5YhWKJ0f-9iCXLO-_2Xl8M,3535
551
+ helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py,sha256=qiLLdiSzhnSyjmqCAvMxjhcZ7yBiX37L1cdsZvHL4ds,3845
552
+ helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py,sha256=7GK_jAOfCgRIGiN_GInDePwuT2wZqmWHp1rqdx18xQg,4994
553
+ helm/benchmark/scenarios/vision_language/math_vista_scenario.py,sha256=HnzA0L1Mm9rw9uyK-hnCGrxo33z_U_86TLnlELjDV6E,4738
554
+ helm/benchmark/scenarios/vision_language/mementos_scenario.py,sha256=7ZHpRD7TdQQ-Mp5XQV5yyiLUE0k1KpgbLSYKLBJMxs0,4343
555
+ helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py,sha256=cM7eTE4bpcIzLyEDye86Ud3rD4Id-0ju73EXjg0DYoI,4340
556
+ helm/benchmark/scenarios/vision_language/mm_star_scenario.py,sha256=cN17oBJmLHcQUPO5GpDPLc1pM13bSNqmVoLIK281zR0,3624
557
+ helm/benchmark/scenarios/vision_language/mme_scenario.py,sha256=7Aa3y0TWGZH3QrPDiqIMkj83LU2Klrzgcb46jv5uytY,5498
558
+ helm/benchmark/scenarios/vision_language/mmmu_scenario.py,sha256=deDMdg2-ORZPV623ngncDPlRn6z6cq_QbQtMu-z0Ydo,7665
559
+ helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py,sha256=HUO09uM2rBXOfCsxzwovmwtihq53xjuzDOtQO_S3J4I,4161
560
+ helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py,sha256=c7YfclYMDtygsLnEfA8oP6Vl7evdrqqTZazmuD9Oy-8,5353
561
+ helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py,sha256=HuizbYsN5Nlihfzu4bfGuC8KSBbeIc6TVknMS4kpVJY,7149
562
+ helm/benchmark/scenarios/vision_language/originality_scenario.py,sha256=1inr-klQEz08CM2GWqbYdy-AuXQmMhOAywAlA0lJHik,1029
563
+ helm/benchmark/scenarios/vision_language/pairs_scenario.py,sha256=D3nNu3uU87eMDiMZZafuRTntXjwbqPaSDygUgQm45F8,9943
564
+ helm/benchmark/scenarios/vision_language/pope_scenario.py,sha256=gWrBG5U8uoU92JPGNm5kuzo1GekoJo1rKQaNhv6MYGA,3996
565
+ helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py,sha256=OJtiGhSN_KYgEz0VGXjCjQik_Xihtgiali70Z00XOzk,2083
566
+ helm/benchmark/scenarios/vision_language/seed_bench_scenario.py,sha256=YNwuIMJBo7wwftx-T5tCYmGo2oy_794fZ330lkDyqb0,5171
567
+ helm/benchmark/scenarios/vision_language/unicorn_scenario.py,sha256=DxGZ7EL22SzxpAkuiA5twuGVTm96wG_RBg3dU3Vh_c4,4241
568
+ helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py,sha256=wRa_OuOdyf-qcy9hml-Kj6YtVP5MDzeTbGcqva6LqdA,3707
569
+ helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py,sha256=zCnkiSya-PHc3ywAhmw03bFdsvLCxAUwGfE6OviEXDQ,4153
570
+ helm/benchmark/scenarios/vision_language/vqa_rad_scenario.py,sha256=7bFu6CYU9bNNuFAlNjdmsmuNlDp-YkLWD1EJuoZuNAc,2597
571
+ helm/benchmark/scenarios/vision_language/vqa_scenario.py,sha256=cC8_Vyqw2f4K4hJY-eo9ptj6ANfWgiFAK7b6OOTIPLI,5239
572
+ helm/benchmark/scenarios/vision_language/image2struct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
573
+ helm/benchmark/scenarios/vision_language/image2struct/chart2csv_scenario.py,sha256=qcs3o9dPsXoeaP0bu9UVZ6P0GPEcRLoaqABxysLN6VY,1802
574
+ helm/benchmark/scenarios/vision_language/image2struct/image2struct_scenario.py,sha256=uDYN10CuXWXvgZ2BYNxlTmBsdfPNlK9G9e_VMGDKvA4,9400
575
+ helm/benchmark/scenarios/vision_language/image2struct/latex_scenario.py,sha256=SnZuHATg5i764MAdgaGwjIGdjCZNrOqP83Y5jE_fkHs,1153
576
+ helm/benchmark/scenarios/vision_language/image2struct/musicsheet_scenario.py,sha256=c08cquz2IALY7PlpOoEfAjupKZmn5GDVZ1H8Gbj4r8s,831
577
+ helm/benchmark/scenarios/vision_language/image2struct/utils_latex.py,sha256=jW3_c63a6u39PJGJw6lM9pIa3dnF8CQgZlPNZdH0sfs,15001
578
+ helm/benchmark/scenarios/vision_language/image2struct/webpage_scenario.py,sha256=DJQIa8NaKV-nhkXEBuY97MJ8a1O3x-Yr6hACVa-67Ns,11117
579
+ helm/benchmark/scenarios/vision_language/image2struct/webpage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
580
+ helm/benchmark/scenarios/vision_language/image2struct/webpage/driver.py,sha256=WBFbb3N_eHIa7OFvHQS3Pmwbmkl6r9VyobxlIEKhty8,2823
581
+ helm/benchmark/scenarios/vision_language/image2struct/webpage/jekyll_server.py,sha256=9WntahzuhVv54IH1m7_z0IxwLma3dbaMOne_pUx751Y,7652
582
+ helm/benchmark/scenarios/vision_language/image2struct/webpage/utils.py,sha256=UYe3PnxCKBYEbZTTEzdIoTY9gW7ZZAWmVISRIdItD-A,940
583
+ helm/benchmark/static/contamination.yaml,sha256=rAfh1DqwyUcDtyzHPQ2QiUK5eY7QfuuRtBXpZMn4TeA,3171
584
+ helm/benchmark/static/schema_air_bench.yaml,sha256=LapSMj3Ecl1Gp9XIwVCYfrerqS93GNErvp6oDnBCtgw,142378
585
+ helm/benchmark/static/schema_audio.yaml,sha256=RD8XMxFlHJ3oM578SGZln_DTjOh72EQqQz_lexSTJHE,28952
586
+ helm/benchmark/static/schema_autobencher.yaml,sha256=yb-NkF5w5R2YOg7RIsadNHJ_5G7lG1gbcDVq_25luEk,5716
587
+ helm/benchmark/static/schema_call_center.yaml,sha256=i30aFzWqdOJRyAHN8vAzyHEX1v95DEK0TI1SMKTN4TE,9106
588
+ helm/benchmark/static/schema_capabilities.yaml,sha256=HHy0aafhOaqL0C4TZw2mMt1Dce2_wuN062ORNZIbwYg,8733
589
+ helm/benchmark/static/schema_classic.yaml,sha256=sK3yVQCrk3Tn3Kmg9WITBmJZI7AKVjmIY0f3zgH_t0c,104611
590
+ helm/benchmark/static/schema_cleva.yaml,sha256=TDh-zcCzzTTs7bu0IWlY5dXYaTFhxly8sJIBGQdBvug,25401
591
+ helm/benchmark/static/schema_czech_bank.yaml,sha256=jkTRQVmmbKkbB0zPH9AtYh6Lt33ymMInRBQnHE5lIOo,5462
592
+ helm/benchmark/static/schema_decodingtrust.yaml,sha256=2VPxzcyKYea7mx-qmswyVRjPfVatjVH4Rs3OU82mgII,15670
593
+ helm/benchmark/static/schema_enem_challenge.yaml,sha256=ZDcOfonL0z-ehsW5OkwaQOeiG1jLPk_toN8s2jhVIdM,5540
594
+ helm/benchmark/static/schema_enterprise.yaml,sha256=TRYP0uNKi_Ln7kKIRYMqbOnGBlf7hF7aiE4dn8OVu1w,11040
595
+ helm/benchmark/static/schema_ewok.yaml,sha256=MluPnZSy22wZLFB2pR7ycBRgUSvIUsqvq4qM0Vk2ur4,12113
596
+ helm/benchmark/static/schema_finance.yaml,sha256=I5-rcZmYpfwS9jVsZM53h6Iv6Um33IhQqt-LUrc4_GU,7165
597
+ helm/benchmark/static/schema_heim.yaml,sha256=EK5F51C6vDZtbVFKqo5GDIi4tG-sfdVm3XcYpfthqNA,44396
598
+ helm/benchmark/static/schema_image2struct.yaml,sha256=cD1X99YcPI8BMAnNfDmXlM-FN0yPsYgu_MB7uu5pwHE,19894
599
+ helm/benchmark/static/schema_instruction_following.yaml,sha256=mYLpMv-iNtsmrv9ewfN9ceDOBBg8nSxOWfc6ByATmIk,6056
600
+ helm/benchmark/static/schema_legal.yaml,sha256=RpoFOuVSIowNgxlPn3UMfJC-68RFr3CGDciUGLPfVqc,28806
601
+ helm/benchmark/static/schema_lite.yaml,sha256=rFSoG7zGPNOtKkJyGgOViWf5WJbMiJMAXrgmqCAi9X4,36611
602
+ helm/benchmark/static/schema_long_context.yaml,sha256=sTz1CWqsXRnR5yAluWzJZJJmfnesV8MHG03wD07LiMo,8430
603
+ helm/benchmark/static/schema_medhelm.yaml,sha256=l31CYuop0hkPaSmwYMMq4DxSCyeB6LRnEAPQZAQF2gE,42013
604
+ helm/benchmark/static/schema_mmlu.yaml,sha256=KI3XnzEwBRpzfYGjP77yKL-hBklEg72D3vL0kVl1BeI,49666
605
+ helm/benchmark/static/schema_mmlu_winogrande_afr.yaml,sha256=YIVYf-mOFPq82UVBdMhnCWNOr4sV8Oi3-ozOszJ2tWQ,40143
606
+ helm/benchmark/static/schema_safety.yaml,sha256=7RfZDX4wr8Xr1BJ149ZwmplPzPkNL0-BKbEZuzUsl_0,9278
607
+ helm/benchmark/static/schema_seahelm.yaml,sha256=9XF9Rlr7I-g-uW6R0LNh7Xg52Xs3_058QybXEiN-hnM,28296
608
+ helm/benchmark/static/schema_social_audio.yaml,sha256=Nj3ORXDT4RHD52cyo1RHfueWwbhqp1qW06TaVJ2lUfE,8653
609
+ helm/benchmark/static/schema_sql.yaml,sha256=8rRff6p_i1CsH7oDbUjau2qRWbLGspuM1Hy-g5pOQiU,6047
610
+ helm/benchmark/static/schema_thai.yaml,sha256=yJUrevvgTJ46TpyXfNecW_B9urh7LPwSbBi_mT4ZngA,8348
611
+ helm/benchmark/static/schema_torr.yaml,sha256=9R6HgT9ZuCnbMdhYB-pFect9apwEVuLEr3R1fx-Txd0,14583
612
+ helm/benchmark/static/schema_tweetsentbr.yaml,sha256=DwHE5Y2STJPDT0fFNm-GPFXq_n3DStQ1ubzhSu4xsoI,5453
613
+ helm/benchmark/static/schema_unitxt.yaml,sha256=9FQhoueYNNYQ2xMuJ2KHzpg_9-_ZhZ9efk6jtTQ3tlc,11855
614
+ helm/benchmark/static/schema_vhelm.yaml,sha256=_Yr04KPL8T2ZqOcQiXnUDOqxcuMn1bjZGCeOFSjbbEM,33974
615
+ helm/benchmark/static/schema_vhelm_lite.yaml,sha256=4I68Em9q5wW8sFzj5GCJz8m49fBEuMyVmSZM0-wbfOk,4024
616
+ helm/benchmark/static_build/config.js,sha256=o98g6QSly1NAfqhYWbU4lEoZB4LEpIrePZtmimiuoXc,165
617
+ helm/benchmark/static_build/index.html,sha256=_t225NmMVglYdTTKPzwQ7Ab-cq_4g4oJgYbfkk3F2Dg,1149
618
+ helm/benchmark/static_build/assets/air-overview-d2e6c49f.png,sha256=0ubEn4J0T51-jx7IlwjaEGSrofZWlW_e67MJw47Ujzg,733055
619
+ helm/benchmark/static_build/assets/crfm-logo-74391ab8.png,sha256=dDkauL_wJR_Luu7L7pltphS3a9HSLjDkpVLa6C9vcA4,62712
620
+ helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png,sha256=Pl46pKbC_TU3L6kZQ_3G-0wTseluAhIYwb3EqpdQAjQ,1344452
621
+ helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png,sha256=LtVAC4OgcWgMAob53rTrf7cRDu-O0z85ZOGGj9wR9hw,86133
622
+ helm/benchmark/static_build/assets/helm-safety-2907a7b6.png,sha256=KQentq_1e3uGwiWMViAPxHu2XZ60gqFgovP3UWTyMmw,72312
623
+ helm/benchmark/static_build/assets/helmhero-28e90f4d.png,sha256=KOkPTf-q28PdvGOBp1G5O4q1eWUJjuij3z2h_SUUf8s,55314
624
+ helm/benchmark/static_build/assets/index-262903c1.js,sha256=mnPpe8lHsSWqq9GivvCxMwPVlZusXFP3AQChU3-bDAs,95853
625
+ helm/benchmark/static_build/assets/index-42060d71.css,sha256=QgYNcW4kJWHl4GN2T1ep6DTI9tgFbZoj3MXmwTg3sfM,489884
626
+ helm/benchmark/static_build/assets/medhelm-overview-3ddfcd65.png,sha256=Pd_NZfAf1ZeU2BIGx9zNT6WmypZNP2bk5z5AxDkbwoU,270625
627
+ helm/benchmark/static_build/assets/overview-74aea3d8.png,sha256=dK6j2Nn3j9O-FMUIVRT5HGBpR_GL78vrKi8oHdG1eaI,74685
628
+ helm/benchmark/static_build/assets/process-flow-bd2eba96.png,sha256=vS66lq700aPEKTJR7maMrmepAyBZySaL42tBNCRjFWA,190822
629
+ helm/benchmark/static_build/assets/react-f82877fd.js,sha256=ijg4n6eANaZKXPWIVTQITqrtf-zzicjslJMm6DniDkA,275149
630
+ helm/benchmark/static_build/assets/recharts-4037aff0.js,sha256=SP08CFvsw8cMMMMdqcXvsLviuOxkAhXGwvUIMvYUdxk,432466
631
+ helm/benchmark/static_build/assets/tremor-9cefc3c5.js,sha256=5iR--BuAQHnEFO_jWnh-3hG34ezpt9LRJkTZNHc__pM,293015
632
+ helm/benchmark/static_build/assets/vhelm-aspects-1437d673.png,sha256=FDfWcwGcJhJco4qmZli_ROomLiASrrnsX-wtKSDvMkc,542231
633
+ helm/benchmark/static_build/assets/vhelm-framework-a1ca3f3f.png,sha256=oco_P6kwqp0cC3YaT_2H2RhJ6p1sh3sEQq3R0RA_cT0,71934
634
+ helm/benchmark/static_build/assets/vhelm-model-8afb7616.png,sha256=ivt2FhDk8dwnzp1MAle5WfbXzht_Mxg4rpy-xHRybjs,180285
635
+ helm/benchmark/window_services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
636
+ helm/benchmark/window_services/default_window_service.py,sha256=HlLI3be8s-GNxDygNGrvo9exEhbrO8Vtr3w0rnSIx7M,181
637
+ helm/benchmark/window_services/encoder_decoder_window_service.py,sha256=4_WCz6NpaW-71OoUCpuYgSbRbYhV4fmB3wSg7kEZb20,2155
638
+ helm/benchmark/window_services/ice_window_service.py,sha256=snyIWVeeknf202_pzBUmvPcA7UcN_FKyIpCGpO2CmFU,1100
639
+ helm/benchmark/window_services/local_window_service.py,sha256=-6wlg8gN_dN80lptRWJQsPALCK6W80-KHA7gghs2-5M,5292
640
+ helm/benchmark/window_services/no_decoding_window_service.py,sha256=s_i_cqIuU9p0GDRIBApaOHzjH7gHrBPTJ2X5NEcN33Y,1375
641
+ helm/benchmark/window_services/test_anthropic_window_service.py,sha256=6LHPP-_FwhyWiQ1mfeEimddLehJu_rQm4WdT7Vr8Kio,4286
642
+ helm/benchmark/window_services/test_bloom_window_service.py,sha256=81xYNYmyuENOJensbdjbKIoTwZ5SKXwQif0boXsnwSw,4378
643
+ helm/benchmark/window_services/test_flan_t5_window_service.py,sha256=IhQMWBq2d39O3uNKGwbaMWJkz8585Zc-J_yqvPJfwu4,695
644
+ helm/benchmark/window_services/test_gpt2_window_service.py,sha256=RC1dP17V3BrPvHTD1xtDIzlYkX2gdFtokmCY_lfm7UM,2752
645
+ helm/benchmark/window_services/test_gpt4_window_service.py,sha256=9OqZni8aI4204QRrm0C1KprKkJuPFmmANyo1082xvyA,1163
646
+ helm/benchmark/window_services/test_gptj_window_service.py,sha256=bDMmgguDtgeWUzmIs59FczBW00VedElMoHWDSavRgm8,2485
647
+ helm/benchmark/window_services/test_gptneox_window_service.py,sha256=Bjk8h7Ddg6KPuG_1Qv5XcPZQlkzNw4sO94FLnNU_wGM,4300
648
+ helm/benchmark/window_services/test_openai_window_service.py,sha256=6TZw4AGZ6kG5BIuCtRXDStjgk-JGAhZJYX6JG3aiHCU,2425
649
+ helm/benchmark/window_services/test_opt_window_service.py,sha256=Hmh5Kt1yjI-PkhCPiabqu6eFCreCkMgcTQCv_YRvM7Y,4305
650
+ helm/benchmark/window_services/test_palmyra_window_service.py,sha256=u7xb7syXCxjvQeevWtSFPaOJy40VPk2yfvduNtFTtdw,4302
651
+ helm/benchmark/window_services/test_t0pp_window_service.py,sha256=rmoMW8YsNpD_zC-GBi6M5GugT_lT9lfn5CbwNbr7d7I,4088
652
+ helm/benchmark/window_services/test_t511b_window_service.py,sha256=zmFGL4Nwg3xQ7nRe-IEkl37wx59C33xBUS8qKHqBQeU,4091
653
+ helm/benchmark/window_services/test_ul2_window_service.py,sha256=RhIK4i9XaUfgeqTZEEXxyqaIxdyu29BRKb0pBl7orKk,4151
654
+ helm/benchmark/window_services/test_utils.py,sha256=Lej1zx3q-o5C4uhIIsAbexJjNMobY--c0wy8epXvfOk,3406
655
+ helm/benchmark/window_services/test_yalm_window_service.py,sha256=PJqw2ySLOMg_iiAzJGzj-1YOrDbxFkmP6wjiDcj1RWA,4391
656
+ helm/benchmark/window_services/tokenizer_service.py,sha256=RNznJBAxcCUMCurb7mbraZULx_ZtB0G7IxbrnUe0Urk,865
657
+ helm/benchmark/window_services/window_service.py,sha256=y6BthPY1V-ugmYfaJElm5Wfy3PSgoJLj10vHcXZZGNA,4727
658
+ helm/benchmark/window_services/window_service_factory.py,sha256=T55F0Y2jiOYxUHHZxT4YX4fFXY5gfFhn56zIwUBhc7s,3423
659
+ helm/benchmark/window_services/yalm_window_service.py,sha256=EwwCoMpr9WVLhCI7OI_7tmZHQfTUwn9FFWjbhIBFRfA,1089
660
+ helm/benchmark/window_services/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
661
+ helm/benchmark/window_services/image_generation/clip_window_service.py,sha256=2JHld8GiR_eIQyHMPSN8K2VOswmKJEPMPJLsxlLpU-Q,631
662
+ helm/benchmark/window_services/image_generation/lexica_search_window_service.py,sha256=uDCUclHvo8toxSTMztK3zG7Eb-hjueobGQaBqPqVJlk,454
663
+ helm/benchmark/window_services/image_generation/openai_dalle_window_service.py,sha256=8U2qDrUB1QJHRy5STV5FywkeVm6qfNOaeVBkMQhyMGc,453
664
+ helm/benchmark/window_services/image_generation/test_clip_window_service.py,sha256=domn2MRduHVAdruSUuGPDIGKyDrh-gFxW-fZaBYR7cg,1430
665
+ helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py,sha256=nSyKK-cQxZnase3Bw4X6DyAWZEy1OZi4stDZpKtolF4,1411
666
+ helm/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
667
+ helm/clients/ai21_client.py,sha256=RAXQufajYnxr3b_1Hl-wAZkeE_j6O8zX-vngWEits6c,8158
668
+ helm/clients/ai21_utils.py,sha256=mlg3h615kyckccGZv9rqsP4Y60O3XpwyE-UURRMrxII,471
669
+ helm/clients/aleph_alpha_client.py,sha256=BK2eQIHYMxLMsZNWld85ZCj17JAoy5lU7rHuSBa4fOM,4981
670
+ helm/clients/anthropic_client.py,sha256=s3eCwHh8mbhxLi8up1WtQWKkUsHJa-LO44prNd7XYFc,34059
671
+ helm/clients/auto_client.py,sha256=J5bCxIDZJUdV1dCv_EtbvwPzd1p2Ogtg207vpb3PhgI,11624
672
+ helm/clients/azure_openai_client.py,sha256=mZ0udOAjadp7ZyE2KEtq8XuQp45eHlX_qM_getyzbA0,2009
673
+ helm/clients/bedrock_client.py,sha256=RjkYkWCHhGFA5oB_Bry6K-WHryopkZtL5Zfh48gS34s,12145
674
+ helm/clients/bedrock_utils.py,sha256=8ZZfyOuZkgxL_naJ-wwBnH4GKv425fu3MfyakGHxeb4,3764
675
+ helm/clients/client.py,sha256=InjCQi62TWhWHmfyi-mC3fSAVztd-YDyfB3BkpacHXk,9002
676
+ helm/clients/clip_score_client.py,sha256=ct3GHZ2Zh3fGwyvQ9DyoIPT6PwDPI-nUaFkUFuc8PIE,1622
677
+ helm/clients/cohere_client.py,sha256=edQO5raoJYmYzfVREqHhNvjTcqPevG0M8EPMLOANqXY,10975
678
+ helm/clients/cohere_utils.py,sha256=aYmj60m0e9RF9BIdxp1vmA-uZv17TEALw0dbgTUSpCc,504
679
+ helm/clients/gcs_client.py,sha256=1sK5x5uWtThgz9gqBLaA8oyiXGD_9nn1WyfMzJRyPQ8,3231
680
+ helm/clients/google_client.py,sha256=mIaUzK7GHCa9pqK1BEVhdt6dZsJfHv1Qdsf3I0Ayq8A,2912
681
+ helm/clients/google_translate_client.py,sha256=TgiQEscjOae58Ptgp9f4n0LXUtl1Jf6v9BI-Z1_wcuw,1304
682
+ helm/clients/http_model_client.py,sha256=_F3_y2UWqbzESQdzV0FMEsECIKjporVSAW6iUQhJ35c,2818
683
+ helm/clients/huggingface_client.py,sha256=adnFKZni9DiFDDVDkpQjWXf4HLyYLvpzy3aB9PD3HyY,15428
684
+ helm/clients/ibm_client.py,sha256=4W4fbjnDNjXrP4gVwSfBHPus0QcqFOQzFvfaST1BE1Y,9701
685
+ helm/clients/lit_gpt_client.py,sha256=pgLfSvusNpdj8F5DVxzQdHxTDRNX4RVt6unegao803U,6229
686
+ helm/clients/lit_gpt_generate.py,sha256=8DdBE9ReQ00NbV3KMFYc--PlO9X-HMOR0Rhm5CADWEA,3103
687
+ helm/clients/megatron_client.py,sha256=KFL1BBBDqxr5mtd5iu0dA6uK8_v6d4g_D6RsZrHx3a0,4107
688
+ helm/clients/mistral_client.py,sha256=ceM8KLAcniAqK1BNVdUGzqy4av2SEEau6PVmPivxc0o,8369
689
+ helm/clients/moderation_api_client.py,sha256=I5pYWRb2MmcLDYrScnC3P5N7OUFzQiVQ828_hf7zjM4,4719
690
+ helm/clients/nvidia_nim_client.py,sha256=Z1UAqR2jHacIO_QGqQl1JUZ_82JiSPstBOtj6xURmQk,902
691
+ helm/clients/open_lm_client.py,sha256=qFgYqlV_3UiW8WJKz66lLqRqg2jt1qtJ1bHMRAtBn40,1749
692
+ helm/clients/openai_client.py,sha256=BImOqF2fVdxJrgI74KCsXeUffABFc_pZ5jgSK5NdZ-o,25936
693
+ helm/clients/palmyra_client.py,sha256=U8iD3IENbA4iEpFLNKc8O2UFNYxffgt1QrBvSzctsWs,7151
694
+ helm/clients/perspective_api_client.py,sha256=o_1FFTCrTny6AZ4EJTstX1H9t8SQSQ8dvhi321RTcL4,6105
695
+ helm/clients/reka_client.py,sha256=8PW-NFsqohRQMR-JNWn9xhlG0YfghO_X-QQAnSt9Vqc,8341
696
+ helm/clients/simple_client.py,sha256=55S_y1eWD1bjktcG21Vs8G5bF6QbKKwmJyqs6lCUJeI,2048
697
+ helm/clients/stanfordhealthcare_azure_openai_client.py,sha256=NGbeI6sMenmgqPQTWxYF3C1Aen29LybRcHcsmS3Jqmg,2059
698
+ helm/clients/stanfordhealthcare_claude_client.py,sha256=ShhbLttPDRa-Pnvr35_2WmVx5s0XpsJMGzu5qhzLoLI,1020
699
+ helm/clients/stanfordhealthcare_google_client.py,sha256=cJK_uH-YBQpBJsltNuiUi0x77bh0eCM5UNBaJQ1zai4,1475
700
+ helm/clients/stanfordhealthcare_http_model_client.py,sha256=LEq1fIxHnTnwCsvkF1AUlw6L4Gwv4egx0-PUvNiMNTo,3047
701
+ helm/clients/stanfordhealthcare_openai_client.py,sha256=Qyl8voGz1hJPqT6g4PunMuN99EYaW8U-NXQQSgJbiiM,2169
702
+ helm/clients/stanfordhealthcare_shc_openai_client.py,sha256=V7K4KZaSjIiE0FkoY4qy6ifJ8pUiNa3vBcWiDsIwXFI,1343
703
+ helm/clients/test_auto_client.py,sha256=bc-rsMJ8JM0MFnQ4B48hBJ1jL3RtRyVvmPwOgzF2mF8,3155
704
+ helm/clients/test_client.py,sha256=T27UsIPWsbE1JK_8DN_DW9LkEcIGRbgDjio14YOIAb0,3854
705
+ helm/clients/test_huggingface_client.py,sha256=x2NjMuIrinfUy0wQ1S6F5cYZVr09YfvN6LfhWmyGNAM,3388
706
+ helm/clients/test_simple_client.py,sha256=G0JRQX69ypQN2VxhlNQXs5u2Tdtkcl_aeHqudDUVKi4,702
707
+ helm/clients/test_together_client.py,sha256=kyBLu-2i4EJyuJm5ft0yg8W-H1IqmULRXggEbChuxdo,6178
708
+ helm/clients/together_client.py,sha256=xA_a0R0adb9vNkMfrXOIwwdpGoIPa4Nso2tXT_2YSVg,23215
709
+ helm/clients/toxicity_classifier_client.py,sha256=AI_FizxMurubTIyeceRdkixSnhWQbcD-oEEONj5ve7o,464
710
+ helm/clients/upstage_client.py,sha256=iSL1G8G3jWSbrpacz4I0l6Lwc5T01fsLR-wZzF39ftM,679
711
+ helm/clients/vertexai_client.py,sha256=haOImGAaYwCyxr4__feG6nHUeHRbCV6ExE6Kp9aKtWs,22665
712
+ helm/clients/vllm_client.py,sha256=YLIxGoQ_ZXejA4nfVpmFE4tmHROEFxEbFsV8Ba25Eac,1658
713
+ helm/clients/yi_client.py,sha256=nC60d2HiUL2W59FTne9tWmZ9bGGY1OvI7Ob3Ng4wSPE,750
714
+ helm/clients/audio_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
715
+ helm/clients/audio_language/diva_llama_client.py,sha256=On6oNnGN_zZVkNq_kEFh4BgeNgvbNTbHCx7WRm4LYfI,4778
716
+ helm/clients/audio_language/llama_omni_client.py,sha256=OCak716q97uEk9CBXQqnmUsbLFR-dddMzg5eyIZ4gzE,8718
717
+ helm/clients/audio_language/qwen2_audiolm_client.py,sha256=cY2mScgTWr_No_MFZ8bZn5wKlNd9ae_IndShlegLtrs,8831
718
+ helm/clients/audio_language/qwen_audiolm_client.py,sha256=_SHJh-0R3wj0qWJp3HSO7nPrDtr5G_nH3CaRSofFBxg,6236
719
+ helm/clients/clip_scorers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
720
+ helm/clients/clip_scorers/base_clip_scorer.py,sha256=NfXe79g6M4Wype3Xf-oXxscaUgjbZLmy9dRnBaLiWwk,695
721
+ helm/clients/clip_scorers/clip_scorer.py,sha256=5KzYTrGuy5zA8yHX6c67Is98HLkqQooWhioPxHNLJ7s,1932
722
+ helm/clients/clip_scorers/multilingual_clip_scorer.py,sha256=LgV1hN6y2FiFQ30UakxRmlwtLs_LCMxrOCewriN1nkk,2066
723
+ helm/clients/image_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
724
+ helm/clients/image_generation/adobe_vision_client.py,sha256=eFEeuyyQlas9Oz-w7cdQ_mRIwbPJFECwmAu5xkvt3wA,2892
725
+ helm/clients/image_generation/aleph_alpha_image_generation_client.py,sha256=6HXLS_kxJouNyLVLfhoepMf5CNIQukWzxkVNV71PJ6Y,4028
726
+ helm/clients/image_generation/cogview2_client.py,sha256=ssIrM6-7bbBQq_w6SCE4DtPWnF_S2x_FqMhSjomlBK4,8455
727
+ helm/clients/image_generation/dalle2_client.py,sha256=amsgWBzP266UmOYM6QFIxEV4xAybw_OpPVdtH748REk,8430
728
+ helm/clients/image_generation/dalle3_client.py,sha256=sabS7AJ6O5ewmTkGmHr4cK14tlMcmh-xrGgj7J-xa3k,4639
729
+ helm/clients/image_generation/dalle_mini_client.py,sha256=wTXW79KJ5tCku46AiotA2x0vQAqKi6g4aX05n-Qm_pQ,8175
730
+ helm/clients/image_generation/deep_floyd_client.py,sha256=P566sdeBvLLv6vi9Y4IbtEfmoQykMDh0viUeRXY7Ik0,3028
731
+ helm/clients/image_generation/huggingface_diffusers_client.py,sha256=OgkWWNGquu0xTKVSAduAMz8T_pWG-SwwG9iZegPrXEw,12313
732
+ helm/clients/image_generation/image_generation_client_utils.py,sha256=N130PbHLLvE9Q1iVefPvTCJzs3hG3osZCeYdJyjLjCw,437
733
+ helm/clients/image_generation/lexica_client.py,sha256=zyyfxZdTiBopPLZJ-uu5ewQYf9tUhtrEIHZddq8gCBI,3681
734
+ helm/clients/image_generation/mindalle_client.py,sha256=IH9XM6n-ZHXyidTG66ew-pAgb9XaXLu-_AD_-YBydU8,4653
735
+ helm/clients/image_generation/nudity_check_client.py,sha256=TeFga6HvBKgdX7LitBoioXUD4BQGavVwzr5BFFE29x8,2599
736
+ helm/clients/image_generation/together_image_generation_client.py,sha256=onvBeRFuuuzpAVg5lZAbnUzmv5L5HwsPwRcflDPqWUI,4393
737
+ helm/clients/image_generation/cogview2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
738
+ helm/clients/image_generation/cogview2/coglm_strategy.py,sha256=P3NU3Z4jsj171PrHPtGDiCRq05kEh-KHjSTgxPDw6R8,3766
739
+ helm/clients/image_generation/cogview2/coglm_utils.py,sha256=EJPOEQJInCDVi2LHqkjEUsgw6GgVlLDrIptlT9cXk-Y,2900
740
+ helm/clients/image_generation/cogview2/sr_pipeline/__init__.py,sha256=qWuNwKlcvGwEFcw5932wk_t0_baNwUILIJzQWJjgh2A,488
741
+ helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py,sha256=1DwcUw9Tb563JpKpkPNIB5Ew1djozvPiGASShffiABk,3716
742
+ helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py,sha256=xYn3acxU4BRdDeRjk98Vj0qq8qqty93kPCLdz-bOMKs,10818
743
+ helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py,sha256=OonYjdtNKJo12cNb-t-gFHLXRFxItCXjKgS9YxWAI-k,7718
744
+ helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py,sha256=LSvAHRupsOqk3yb4GxyTsubRxrnPOEfObFym2j4eiKc,5120
745
+ helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py,sha256=5D1QWyAcY0CpwITk7EBN6ylUtc7mvZaE9iHG628AqMQ,10390
746
+ helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py,sha256=d8voNZnXfqh-LzIzAqcMD4h4XJx_u_mvu9mewMrJC5k,4392
747
+ helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py,sha256=IpDLoPBDA-ck-IP4YUqQu8NM5W7_1f3Tg-sWhU3pB6k,1557
748
+ helm/clients/image_generation/dalle_mini/__init__.py,sha256=4RmnjfGTmgYaWsQmaDkOHxgo0Wxr9qqwtpMBC_5XeGg,112
749
+ helm/clients/image_generation/dalle_mini/data.py,sha256=1unTc4lkUZ-6A2DfcbcglGtnE2KP3OuL4YWFROlsEQo,17622
750
+ helm/clients/image_generation/dalle_mini/model/__init__.py,sha256=fyMDjpuzHxWjF5Fk9Rkfyn7KpvFAwxyRCJFoA2RDPdM,428
751
+ helm/clients/image_generation/dalle_mini/model/configuration.py,sha256=8DvL27TPmbntElIy7FrOzlSqWdlkX3R2eNVxiV_QwOM,7861
752
+ helm/clients/image_generation/dalle_mini/model/modeling.py,sha256=YVbiEQSVNmN16Sg1Sn-qR9SVXYfn6UD1-eCt9QcsRwQ,69702
753
+ helm/clients/image_generation/dalle_mini/model/partitions.py,sha256=_fDpk34GL6NhNecHuP78y_gmKpWjbfw3fxMCWVEO4pc,2721
754
+ helm/clients/image_generation/dalle_mini/model/processor.py,sha256=oTx5KHXKhZjVYaS0rmtlzCIbWUTJLh0plLNUWl8xxZ8,2406
755
+ helm/clients/image_generation/dalle_mini/model/text.py,sha256=Kfba8JdO2LrSmCVlQtgc7J2kSordCgjeg7WV9V45B80,7302
756
+ helm/clients/image_generation/dalle_mini/model/tokenizer.py,sha256=SnPUzrfZXSAXXcQRCR8Ykhn5hJfUB3p5wNuriW5GWy0,245
757
+ helm/clients/image_generation/dalle_mini/model/utils.py,sha256=clu2IiIpAT0DzTc2HvmI0ySnETFsJtpi7tocPkqOreY,1171
758
+ helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py,sha256=01pV_QWUmcIpj5kBVihle_VGrJyw2AmV3QuhWASds2M,66
759
+ helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py,sha256=4q39kdTUxeW55SN8NNkA9MdFZtH6rWssN8XauuOwyi0,1213
760
+ helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py,sha256=7OKxVD7eJG7TmyPc9RdbKqz6SAXqJlZ21D-ENlLlqZE,4612
761
+ helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py,sha256=l-01MAjdbCiaaZoLycV7BcpeYwKOaN-GeZUn3mcsmhg,21067
762
+ helm/clients/image_generation/mindalle/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
763
+ helm/clients/image_generation/mindalle/models/__init__.py,sha256=1UieFJ0LGinYSB-idy3atl-gFAmS_ouiiGX6TM2Mh-I,8372
764
+ helm/clients/image_generation/mindalle/models/tokenizer.py,sha256=NFFdLUhoxEkv9SZqU3QIFk0ukaCcn6w_xFWQIRGhZJ4,1190
765
+ helm/clients/image_generation/mindalle/models/stage1/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
766
+ helm/clients/image_generation/mindalle/models/stage1/layers.py,sha256=cg5c6KShCMbMmHFuzDBIG-WgIBBkDrG9XvXC1DxqowA,11044
767
+ helm/clients/image_generation/mindalle/models/stage1/vqgan.py,sha256=KcarvKoMuPBpP0H8F8W67FogdvHaAQuo9jP3rFRxc5E,4035
768
+ helm/clients/image_generation/mindalle/models/stage2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
769
+ helm/clients/image_generation/mindalle/models/stage2/layers.py,sha256=LvDADun5nMaencaRT0pm-dq78xHpPPkpi8rlu7RLHco,5306
770
+ helm/clients/image_generation/mindalle/models/stage2/transformer.py,sha256=MjcFrbOgtwc6zL5izNAPEOwXFhasyZajwczaXFLunZg,10387
771
+ helm/clients/image_generation/mindalle/utils/__init__.py,sha256=qNxJFCN97656FlGo2UDLubtvVaArHHK3MDdWitzFPnw,199
772
+ helm/clients/image_generation/mindalle/utils/config.py,sha256=lh8dXvL7ctKmuYEbeTQZfXN-_DkHQLjGuFuvo53u5pM,3234
773
+ helm/clients/image_generation/mindalle/utils/sampling.py,sha256=soTHaJrN4FV1lDdh9HMveJs6F49UMK57Xfa0ccnHqI8,5029
774
+ helm/clients/image_generation/mindalle/utils/utils.py,sha256=ESugpzG-_73GKl07mj-8o-_nim_FOICxfYkczy3s9x4,3119
775
+ helm/clients/vision_language/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
776
+ helm/clients/vision_language/huggingface_vision2seq_client.py,sha256=hTywh5nM95BmPoDyKOSDWg9G3-QwLO3KZEJZVkmFroo,6478
777
+ helm/clients/vision_language/huggingface_vlm_client.py,sha256=H7AE8mm506PkEcUO8VaLVtptHTwVX58nZx1A_BWdKzA,4968
778
+ helm/clients/vision_language/idefics_client.py,sha256=hi1VCDBegHfBssmW0C62H3OX3U2ISVRhaSkd24gb1K4,7692
779
+ helm/clients/vision_language/open_flamingo_client.py,sha256=QH6el-wkEl4PMZM9b3_H-o2PRaMvumGbN29ee9dmkMU,6519
780
+ helm/clients/vision_language/paligemma_client.py,sha256=IU_T8r1RgpGkEAqabLKBbmoUOWV6c1a9_FXgiTy8exE,6835
781
+ helm/clients/vision_language/palmyra_vision_client.py,sha256=4elEdmwllMr2qzTzBdlRC8L5Ut3vOXFtanGGYrx4lv8,4074
782
+ helm/clients/vision_language/qwen2_vlm_client.py,sha256=XQ6SB1mkpIuYPNZMQe6jkduvwQxMfrFtVnHKv3osFGo,7310
783
+ helm/clients/vision_language/qwen_vlm_client.py,sha256=6rCH4gJMDyQHyjAE_GDIrLsInH_bvd6to-4RMWbRLeM,7407
784
+ helm/clients/vision_language/open_flamingo/__init__.py,sha256=RTxnxjYnTmTZv-608o66_W74qmKLpEO6hx0cxaZaYv8,172
785
+ helm/clients/vision_language/open_flamingo/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
786
+ helm/clients/vision_language/open_flamingo/src/factory.py,sha256=4KRXLV5mOEZ34-Foq2zVgTye3sQD-Buz6NZTSp2X9_A,5790
787
+ helm/clients/vision_language/open_flamingo/src/flamingo.py,sha256=g4ZtQX-ZBauF6UADDGiRlJdB7rlA_gb37pJzxluPXrQ,14753
788
+ helm/clients/vision_language/open_flamingo/src/flamingo_lm.py,sha256=n6eaH9OBhpjIHH822mNE8WIPi0pChNQBx4pRXhAjsPw,6317
789
+ helm/clients/vision_language/open_flamingo/src/helpers.py,sha256=pq_BgkUflYBDw8gxTO2evuiqvjw3bE9rx06iYHp0kTw,8595
790
+ helm/clients/vision_language/open_flamingo/src/utils.py,sha256=6FYU0NgshZadF3QYWQkPW8jyEFiOd6jyb8p5rv_vOj0,1444
791
+ helm/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
792
+ helm/common/audio_utils.py,sha256=iMMS0nC6EYKXpqHMewKYhi8M3J5J9A15pNxDEQzHI-E,3676
793
+ helm/common/authentication.py,sha256=RlMx29_TSrfU7ujE7dJkxmFub5EqLj2NswV5lAVFFDk,179
794
+ helm/common/cache.py,sha256=0gXq97M9JgSO5aO2puEV1WRpEy8jdc_wRsKL4rVVoY0,6725
795
+ helm/common/cache_backend_config.py,sha256=4u5A6BHNBmGnnrDNhCVgrdwhXQtyAbWcUeoo7hdgZSo,1530
796
+ helm/common/clip_score_request.py,sha256=WnNg89owDCmG7tyy8nnQL0RdKQLsUdMWiYH9XqqbGw8,840
797
+ helm/common/codec.py,sha256=gTh6AwIQ0Bbul_QSnIO7eItwMZmYtnkIrG1jkc4GOL4,7100
798
+ helm/common/concurrency.py,sha256=8THtHlCtXo5c8iCuz_UcBBdzZX6aiEALLc4u0M4SYL0,856
799
+ helm/common/credentials_utils.py,sha256=O-57nUgkWLbZF0k3lsSaVGPPHj2_OYeVuCMe0to3bRE,1118
800
+ helm/common/critique_request.py,sha256=yo4aRe-DEjudUmydthtpTj6LdhRXfZ3JZptxTkWzZ3U,3068
801
+ helm/common/file_upload_request.py,sha256=OZeAW1_zsiNdXnWDwNNvhPs0b48TUmW_e4kzzCYmyiY,543
802
+ helm/common/general.py,sha256=nMfHNPXyAAorAMmgDClD8r8XXeJcvfF0QXTP-FgH5PQ,11690
803
+ helm/common/gpu_utils.py,sha256=pmLq6ipYNLEm28VxxSNeZuVt-gAw-WnYmBvxP1P1p6M,480
804
+ helm/common/hierarchical_logger.py,sha256=EnKLnfbQftca08EJfjGEQb4tcnCKbx-JtwLnoCnhMQs,2908
805
+ helm/common/image_generation_parameters.py,sha256=nsbuk_-BlRMK6IwP5y6BnTXbTRTOcvZ6uLblL5VHLOo,916
806
+ helm/common/images_utils.py,sha256=8BsN0fd8pc0rh_TSDvippWhTfwmJJXKNF2zqKLB8cps,3372
807
+ helm/common/key_value_store.py,sha256=D9ZBORzZncf3zHQOP4AuNbQnV8cZpO_kqHY1mDRugqQ,3174
808
+ helm/common/media_object.py,sha256=1SlilnsrfZVVpfci1atin8hbREnGoNQwjBcNAH8RgBU,5151
809
+ helm/common/moderations_api_request.py,sha256=3xTsErSsCr2PHD2jpdV1JglHaYHwP2Yqu25_JFtfa68,2234
810
+ helm/common/mongo_key_value_store.py,sha256=G0TIWQcvwMjyXh4TnN6xJ462HKHUAZtQJJYQOrHK-K8,3887
811
+ helm/common/multimodal_request_utils.py,sha256=n6HgTyHNqfGmU9qmVK-wxQzrkPZ5Wdh-lO_y_ln6VYc,2184
812
+ helm/common/nudity_check_request.py,sha256=VMsujI_RBy5u_cGEk0teE4KyX1dL2Zt3Pb4U6LpBdSY,728
813
+ helm/common/object_spec.py,sha256=_usgTDQULBF6_jy7C6m-9ZNVvNxbGoTE_CdGcSvBASU,4327
814
+ helm/common/optional_dependencies.py,sha256=Qam3QCHff8tuXbS-fCw-MVe-pK18gSvHw-uQoXXxT7M,616
815
+ helm/common/perspective_api_request.py,sha256=WAVwtajNVmi5XJNsPcorGEAVrqkpPSk-Kd3b0hJghbA,2427
816
+ helm/common/reeval_parameters.py,sha256=exaEucXnSI8a076uq_qhO3CTBztMMRoRzL_7v1N4adE,300
817
+ helm/common/request.py,sha256=w6N1TmVnc6C1gzyFyhspU1nf5pOE4zBwdeGMFcsBZLk,9022
818
+ helm/common/response_format.py,sha256=wIptA8FydZoRjMvO5SFIplgDXhwpZvZmFI-Bi-7mcGU,516
819
+ helm/common/test_cache.py,sha256=j19p-qzv_98X_TMW4b39ZHwSJ-MX3p91PrkYumarS6Y,4870
820
+ helm/common/test_codec.py,sha256=igL--k-2DwAy0eoMr8D9Xs8MOjBoT0LutbMPzDlTNkM,5885
821
+ helm/common/test_general.py,sha256=c8Lh0mK8I-SfcMprq909B6zWRBxSBngq2nNL1L6-cYA,1788
822
+ helm/common/test_media_object.py,sha256=SUWLfms_vkXNivRYM0ZT8AI3_2ru6GON5l-Hb-lk-t0,1661
823
+ helm/common/tokenization_request.py,sha256=NND9ESiiDE0H8QRNpfHVjXS7MQfKKIwtVRKDIjPnnJM,3344
824
+ helm/common/file_caches/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
825
+ helm/common/file_caches/file_cache.py,sha256=QfF1hlF8FQ-rcPn9Zyl6L0dOCokvYgd-dFqx4ftRuPA,359
826
+ helm/common/file_caches/local_file_cache.py,sha256=NiXbat1BBGl5P27oERqSLFfhIHpYqA1IQrvE_N1sWR8,1944
827
+ helm/common/file_caches/test_local_file_cache.py,sha256=ANb01ctUV-J4i1ab3l4uhg9Ce54U_56xq9Hayjt1WhQ,686
828
+ helm/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
829
+ helm/config/model_deployments.yaml,sha256=_zdXhMH50hqKFbtFymUFb_LmlCz3XR1-WARsDAH6ESs,135205
830
+ helm/config/model_metadata.yaml,sha256=M-23M608OVESCUai6iBLBIT_17O80pI-YWkEYRax-gk,226327
831
+ helm/config/tokenizer_configs.yaml,sha256=o7oX0jQXqKuoLC2z5YgdvJlcMcr15WtNjlqAkYLLDq8,32860
832
+ helm/proxy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
833
+ helm/proxy/accounts.py,sha256=gd5cKhKeqklf_cXCAISl65AUvZeD6afBNrs6WK3IBvQ,14764
834
+ helm/proxy/cli.py,sha256=3bG-w71CsnPgVzN53aYWebAf0avBNJCVaxxDLupEXk4,8264
835
+ helm/proxy/example_queries.py,sha256=EB2vVpAryOUAFiLrwsMiFz0zGl_UAQ8TJ9SkWngvsu4,4389
836
+ helm/proxy/query.py,sha256=eftbiUICMh8QIHVs-7cLtv_rDXKeKdRPmwjLMu0TDxQ,645
837
+ helm/proxy/retry.py,sha256=iLZmKATEJQa9jsSpOIx6YDRhmrA8G1Qm21cUxCuo2Ug,3490
838
+ helm/proxy/server.py,sha256=Q4Mzts8mketktGVJ5AoOEA-_SGCue5QeOlK8dqPUuHI,10853
839
+ helm/proxy/test_accounts.py,sha256=Vs1iOzTPN29LosDAAEs6IagQ3PccvutrJTlR1qNIcj0,1146
840
+ helm/proxy/test_retry.py,sha256=db0owyGTThmIMhYWU_Eh1U-AJvQ-Wa9j_kRmC9DNjOA,1059
841
+ helm/proxy/critique/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
842
+ helm/proxy/critique/critique_client.py,sha256=ATZuXw77lejwtpgLg3Soy3VDyv8D8xetl0o4guxDM2M,1764
843
+ helm/proxy/critique/mechanical_turk_critique_client.py,sha256=OcppmFOMweBSfVTiLIICIwjvPpHHTkdu9fFUTaubitQ,574
844
+ helm/proxy/critique/mechanical_turk_critique_exporter.py,sha256=taULrc_cIP0O9c5UpGz3l9DmWQadTVzN_v-qzTgMoyo,8470
845
+ helm/proxy/critique/mechanical_turk_critique_importer.py,sha256=NL97joO5pRkcICRdVyG4kf9JhfYRaySsxRoZ7KWDYv0,5581
846
+ helm/proxy/critique/mechanical_turk_utils.py,sha256=mKpUv4zz3s5ptzDY7UrwuI7Cr5HmNgSjPC10BnN9AL4,1766
847
+ helm/proxy/critique/model_critique_client.py,sha256=QMFiMpALXnneumKbJpXOZDEb3lPPdkIaSCasmdXHB8o,12806
848
+ helm/proxy/critique/scale_critique_client.py,sha256=B4povtceyfal95eE3N7em9cC_B5Vy4jMrHXcsXc_5m4,15889
849
+ helm/proxy/critique/surge_ai_critique_client.py,sha256=HnzgAoF4Du9Me0GS_lbNaozZslS4a2OZx735gh-coo0,8357
850
+ helm/proxy/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
851
+ helm/proxy/services/remote_service.py,sha256=zehXO0JYIR6fIgqSZ1p7icPBITYPYfjgTX1ZbxiN1dI,8806
852
+ helm/proxy/services/server_service.py,sha256=vIf0GxDRuHFmCQHpgn5wYURlBkMNfF9e0jMQitc80-w,10691
853
+ helm/proxy/services/service.py,sha256=YFG5ZlBYBz3IdSVRKDIKVlAmA-oLjFCeBHE3iIe_SU8,6020
854
+ helm/proxy/services/test_remote_service.py,sha256=xzkyptctXw3y5d1fgbidBMyw8B4rILZStC_C-hLgLUc,6643
855
+ helm/proxy/services/test_service.py,sha256=oDYen-71iwZ6YMNBVbVSdEFsH6GMvZYw5tS5Eg4YHjY,8987
856
+ helm/proxy/static/general.js,sha256=qcsntanG5UMWK2vznSVAVFy9zd3BMc8DFfNa7KKezew,3053
857
+ helm/proxy/static/help.html,sha256=2Rn_lGZspqrZhNfLQ4wIAvYO_BK9q67Q_AS2-3WsMpY,6231
858
+ helm/proxy/static/index.css,sha256=1OBOJ87LhwI2PtpoIyZoGQbSxQK2dz2vxk8BVmAybWY,717
859
+ helm/proxy/static/index.html,sha256=nUJf_hwBPokqrm_hDZsVfHcJrnhZLYhkVSoLdGOocf8,2009
860
+ helm/proxy/static/index.js,sha256=-OXgf2rUYI49vg4KhwdL2VygKgzAGoYHKngaWgMb4E0,14996
861
+ helm/proxy/static/info-icon.png,sha256=P-PW3Ek3NGiRAW5BXOjJRPBfMVqprjAqtQheGWu7zNI,3428
862
+ helm/proxy/token_counters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
863
+ helm/proxy/token_counters/auto_token_counter.py,sha256=Ag368Sb-eLQUMLW7lmWc2EOKN3kgkiCTsYnHNrsf9kw,2071
864
+ helm/proxy/token_counters/test_auto_token_counter.py,sha256=LO3H_NbVeoeaMmEuFNCmhoEWKjWVvxeW5U4yTKfE-84,8590
865
+ helm/proxy/token_counters/token_counter.py,sha256=TCij1Cp08RoFTLLLdjNPoaeDGHpA1A2hQsrRV775Kf4,425
866
+ helm/tokenizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
867
+ helm/tokenizers/ai21_tokenizer.py,sha256=CE-u39ZY5Y4XQHONpiPHKK7uvEmySYLBQi2n70OV004,2059
868
+ helm/tokenizers/aleph_alpha_tokenizer.py,sha256=Ofc5thTfW_eb5ztiU-y_0p6e2PIGbHMb2wz169sy1fc,3833
869
+ helm/tokenizers/auto_tokenizer.py,sha256=Of-T-CFOhLAjjU45T1hnrEPG_k_hzPufuDE7FRAcSN8,4251
870
+ helm/tokenizers/caching_tokenizer.py,sha256=BwcyVzG7vy3R2O0UgbNxNP2nN4wBnsvpG_9mXQuDYfw,7300
871
+ helm/tokenizers/cohere_tokenizer.py,sha256=6WwHIt7SsICmYR2QQpwDJ7pfNF8VWrFHFxF5Kynq6aY,2116
872
+ helm/tokenizers/http_model_tokenizer.py,sha256=J5Myg6JVDNgHMN7XOHwGV3WrhilUZ9Sw_FrgO4frYuY,3124
873
+ helm/tokenizers/huggingface_tokenizer.py,sha256=fpKwSnZl94AnXQybzJhVnTda5zJnGsjGphKrlPFa_Fg,8726
874
+ helm/tokenizers/lit_gpt_tokenizer.py,sha256=0c6KDeLNHPd6h27SXQvkUfmrCSLYa1kQY1GqCHVfhvw,1675
875
+ helm/tokenizers/simple_tokenizer.py,sha256=6_NROqVbygs-HRA7bYAZluN4YB5gUhVaRsYQeRTjA1E,1147
876
+ helm/tokenizers/test_ai21_tokenizer.py,sha256=V8orjdKxmEV44VYoZ9Sq5E7CIq2caNnr6vjdk0T_w1A,1646
877
+ helm/tokenizers/test_anthropic_tokenizer.py,sha256=h7sJMRv_O2yAuEzbrXLJJIo9Gy8wkTycc4gu6UFvDaw,3937
878
+ helm/tokenizers/test_cohere_tokenizer.py,sha256=15z2GJtZ-VlrliC2_Fk5DIZhQYFkJS7J73fjxYMf8YM,1431
879
+ helm/tokenizers/test_huggingface_tokenizer.py,sha256=7OB2d0PaCp-qmGXVt0V3yf0ciilN3Kd2qnAYprWRl64,6324
880
+ helm/tokenizers/test_simple_tokenizer.py,sha256=vUNdcnJqZV99-E8H1rwUH85AQPJ2HTnDr5DrZ_-zRL4,1219
881
+ helm/tokenizers/test_yalm_tokenizer.py,sha256=8IeJM3X61p3ygBfK_bJtPh_xOJ83IluaZ3UM2xTtbEY,2492
882
+ helm/tokenizers/tiktoken_tokenizer.py,sha256=u10haWtRHiSSj11MDIYIK_wpT8loQVJY2WJxIEPg0Vc,1280
883
+ helm/tokenizers/tokenizer.py,sha256=5dsxihHWA-SRSYwx2AlsLZR0L2MSMwfKRbBQy5rz_Zs,1639
884
+ helm/tokenizers/vertexai_tokenizer.py,sha256=lf-xckyeachaJI86ujNC2Cp_AVZ-BBcFgLZQy4lg7aA,4105
885
+ helm/tokenizers/yalm_tokenizer.py,sha256=u1n_zB5FMSXBU02JrCkipNLpkbmF0LA2i_53xID5d0w,1496
886
+ helm/tokenizers/yalm_tokenizer_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
887
+ helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py,sha256=1ZcPL3srfk031LmA8bEdPcIraAPnHGiYi_CqTiJSTlc,904
888
+ helm/tokenizers/yalm_tokenizer_data/voc_100b.sp,sha256=LmPD0_OIOXi8dWuNjXUYOSPhf8kPp2xhvK-g3bXcwrQ,2815034
889
+ helm/tokenizers/yalm_tokenizer_data/yalm_tokenizer.py,sha256=kH5Qig1_6r_sKbAHinX7C83tqBUoTwbe-gGZCbGVkko,6389
890
+ crfm_helm-0.5.5.dist-info/METADATA,sha256=xmTkUJOFSJ_wrES6pixgfpOjzgS4eJlWaEpGpidNEo8,22210
891
+ crfm_helm-0.5.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
892
+ crfm_helm-0.5.5.dist-info/entry_points.txt,sha256=AvH9soAH3uey9xffisWewd0yrmPWGASC036jHd1SFyg,300
893
+ crfm_helm-0.5.5.dist-info/top_level.txt,sha256=s9yl-XmuTId6n_W_xRjCS99MHTwPXOlkKxmTr8xZUNY,5
894
+ crfm_helm-0.5.5.dist-info/RECORD,,