crfm-helm 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (652) hide show
  1. crfm_helm-0.5.6.dist-info/METADATA +427 -0
  2. crfm_helm-0.5.6.dist-info/RECORD +941 -0
  3. {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +13 -1
  5. helm/benchmark/adaptation/adapters/adapter_factory.py +15 -1
  6. helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -1
  7. helm/benchmark/adaptation/adapters/chat_adapter.py +49 -0
  8. helm/benchmark/adaptation/adapters/ehr_instruction_adapter.py +108 -0
  9. helm/benchmark/adaptation/adapters/generation_adapter.py +1 -1
  10. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +4 -4
  11. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +1 -1
  12. helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +4 -2
  13. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -1
  14. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +1 -1
  15. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +4 -2
  16. helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +1 -1
  17. helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py +1 -1
  18. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +2 -2
  19. helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py +87 -0
  20. helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -1
  21. helm/benchmark/adaptation/adapters/test_adapter.py +4 -4
  22. helm/benchmark/adaptation/adapters/test_generation_adapter.py +3 -3
  23. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +2 -2
  24. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +2 -2
  25. helm/benchmark/adaptation/common_adapter_specs.py +69 -4
  26. helm/benchmark/adaptation/prompt.py +1 -1
  27. helm/benchmark/annotation/aci_bench_annotator.py +95 -0
  28. helm/benchmark/annotation/air_bench_annotator.py +21 -6
  29. helm/benchmark/annotation/annotator.py +5 -0
  30. helm/benchmark/annotation/annotator_factory.py +3 -20
  31. helm/benchmark/annotation/autobencher_capabilities_annotator.py +107 -0
  32. helm/benchmark/annotation/autobencher_safety_annotator.py +98 -0
  33. helm/benchmark/annotation/bigcodebench_annotator.py +108 -0
  34. helm/benchmark/annotation/bird_sql_annotator.py +58 -0
  35. helm/benchmark/annotation/chw_care_plan_annotator.py +93 -0
  36. helm/benchmark/annotation/czech_bank_qa_annotator.py +78 -0
  37. helm/benchmark/annotation/dischargeme_annotator.py +107 -0
  38. helm/benchmark/annotation/ehr_sql_annotator.py +87 -0
  39. helm/benchmark/annotation/helpdesk_call_summarization_annotator.py +131 -0
  40. helm/benchmark/annotation/image2struct/image_compiler_annotator.py +6 -1
  41. helm/benchmark/annotation/live_qa_annotator.py +1 -1
  42. helm/benchmark/annotation/med_dialog_annotator.py +99 -0
  43. helm/benchmark/annotation/medalign_annotator.py +100 -0
  44. helm/benchmark/annotation/medi_qa_annotator.py +98 -0
  45. helm/benchmark/annotation/medication_qa_annotator.py +87 -63
  46. helm/benchmark/annotation/mental_health_annotator.py +98 -0
  47. helm/benchmark/annotation/mimic_bhc_annotator.py +100 -0
  48. helm/benchmark/annotation/mimic_rrs_annotator.py +100 -0
  49. helm/benchmark/annotation/model_as_judge.py +214 -6
  50. helm/benchmark/annotation/mtsamples_procedures_annotator.py +98 -0
  51. helm/benchmark/annotation/mtsamples_replicate_annotator.py +101 -0
  52. helm/benchmark/annotation/omni_math/gpt_evaluation_template.txt +152 -0
  53. helm/benchmark/annotation/omni_math/gpt_evaluation_zero_shot_template.txt +36 -0
  54. helm/benchmark/annotation/omni_math_annotator.py +131 -0
  55. helm/benchmark/annotation/spider_annotator.py +18 -0
  56. helm/benchmark/annotation/starr_patient_instructions_annotator.py +98 -0
  57. helm/benchmark/annotation/wildbench/eval_template.pairwise.v2.md +75 -0
  58. helm/benchmark/annotation/wildbench/eval_template.score.v2.md +66 -0
  59. helm/benchmark/annotation/wildbench_annotator.py +119 -0
  60. helm/benchmark/annotation_executor.py +35 -15
  61. helm/benchmark/augmentations/cleva_perturbation.py +9 -8
  62. helm/benchmark/augmentations/contraction_expansion_perturbation.py +2 -2
  63. helm/benchmark/augmentations/contrast_sets_perturbation.py +2 -2
  64. helm/benchmark/augmentations/dialect_perturbation.py +4 -5
  65. helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
  66. helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
  67. helm/benchmark/augmentations/gender_perturbation.py +2 -2
  68. helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
  69. helm/benchmark/augmentations/mild_mix_perturbation.py +6 -6
  70. helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
  71. helm/benchmark/augmentations/person_name_perturbation.py +4 -5
  72. helm/benchmark/augmentations/perturbation.py +1 -1
  73. helm/benchmark/augmentations/space_perturbation.py +2 -2
  74. helm/benchmark/augmentations/suffix_perturbation.py +2 -2
  75. helm/benchmark/augmentations/synonym_perturbation.py +4 -3
  76. helm/benchmark/augmentations/test_perturbation.py +16 -13
  77. helm/benchmark/augmentations/translate_perturbation.py +2 -2
  78. helm/benchmark/augmentations/typos_perturbation.py +2 -2
  79. helm/benchmark/data_preprocessor.py +2 -2
  80. helm/benchmark/executor.py +11 -12
  81. helm/benchmark/huggingface_registration.py +2 -7
  82. helm/benchmark/metrics/aci_bench_metrics.py +14 -0
  83. helm/benchmark/metrics/basic_metrics.py +6 -6
  84. helm/benchmark/metrics/bbq_metrics.py +2 -2
  85. helm/benchmark/metrics/bias_metrics.py +12 -3
  86. helm/benchmark/metrics/bias_word_lists.py +1 -1
  87. helm/benchmark/metrics/bigcodebench_metrics.py +25 -0
  88. helm/benchmark/metrics/bird_sql_metrics.py +28 -0
  89. helm/benchmark/metrics/chw_care_plan_metrics.py +14 -0
  90. helm/benchmark/metrics/classification_metrics.py +76 -12
  91. helm/benchmark/metrics/cleva_harms_metrics.py +10 -9
  92. helm/benchmark/metrics/code_metrics.py +5 -5
  93. helm/benchmark/metrics/comet_metric.py +125 -0
  94. helm/benchmark/metrics/common_metric_specs.py +9 -2
  95. helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +72 -0
  96. helm/benchmark/metrics/copyright_metrics.py +4 -4
  97. helm/benchmark/metrics/czech_bank_qa_metrics.py +29 -0
  98. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +2 -2
  99. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +2 -2
  100. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +2 -2
  101. helm/benchmark/metrics/dischargeme_metrics.py +14 -0
  102. helm/benchmark/metrics/disinformation_metrics.py +4 -4
  103. helm/benchmark/metrics/dry_run_metrics.py +5 -5
  104. helm/benchmark/metrics/efficiency_metrics.py +6 -6
  105. helm/benchmark/metrics/ehr_sql_metrics.py +103 -0
  106. helm/benchmark/metrics/evaluate_instances_metric.py +3 -3
  107. helm/benchmark/metrics/evaluate_reference_metrics.py +144 -16
  108. helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +103 -0
  109. helm/benchmark/metrics/gpt4_audio_critique_metrics.py +167 -0
  110. helm/benchmark/metrics/gpt4_audio_refusal_metrics.py +145 -0
  111. helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +36 -0
  112. helm/benchmark/metrics/ifeval/__init__.py +0 -0
  113. helm/benchmark/metrics/ifeval/instructions.py +1574 -0
  114. helm/benchmark/metrics/ifeval/instructions_registry.py +182 -0
  115. helm/benchmark/metrics/ifeval/instructions_registry.pyi +3 -0
  116. helm/benchmark/metrics/ifeval/instructions_util.py +153 -0
  117. helm/benchmark/metrics/ifeval_metrics.py +55 -0
  118. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +1 -1
  119. helm/benchmark/metrics/image_generation/detection_metrics.py +1 -1
  120. helm/benchmark/metrics/image_generation/detectors/vitdet.py +1 -1
  121. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +1 -1
  122. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +1 -1
  123. helm/benchmark/metrics/image_generation/nsfw_metrics.py +1 -1
  124. helm/benchmark/metrics/image_generation/q16/test_q16.py +3 -1
  125. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +1 -1
  126. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +2 -2
  127. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +1 -1
  128. helm/benchmark/metrics/image_generation/watermark_metrics.py +1 -1
  129. helm/benchmark/metrics/instruction_following_critique_metrics.py +4 -4
  130. helm/benchmark/metrics/kpi_edgar_metrics.py +121 -0
  131. helm/benchmark/metrics/language_modeling_metrics.py +4 -4
  132. helm/benchmark/metrics/llm_jury_metrics.py +46 -0
  133. helm/benchmark/metrics/machine_translation_metrics.py +2 -2
  134. helm/benchmark/metrics/med_dialog_metrics.py +14 -0
  135. helm/benchmark/metrics/medalign_metrics.py +14 -0
  136. helm/benchmark/metrics/medcalc_bench_metrics.py +124 -0
  137. helm/benchmark/metrics/medec_metrics.py +101 -0
  138. helm/benchmark/metrics/medi_qa_metrics.py +14 -0
  139. helm/benchmark/metrics/medication_qa_metrics.py +10 -19
  140. helm/benchmark/metrics/melt_bias_metric.py +234 -0
  141. helm/benchmark/metrics/melt_bias_word_lists.py +1367 -0
  142. helm/benchmark/metrics/melt_metric_specs.py +43 -0
  143. helm/benchmark/metrics/melt_toxicity_metric.py +107 -0
  144. helm/benchmark/metrics/mental_health_metrics.py +14 -0
  145. helm/benchmark/metrics/metric.py +3 -3
  146. helm/benchmark/metrics/metric_service.py +11 -11
  147. helm/benchmark/metrics/mimic_bhc_metrics.py +14 -0
  148. helm/benchmark/metrics/mimic_rrs_metrics.py +14 -0
  149. helm/benchmark/metrics/mimiciv_billing_code_metrics.py +96 -0
  150. helm/benchmark/metrics/mtsamples_procedures_metrics.py +14 -0
  151. helm/benchmark/metrics/mtsamples_replicate_metrics.py +14 -0
  152. helm/benchmark/metrics/nltk_helper.py +32 -0
  153. helm/benchmark/metrics/numeracy_metrics.py +4 -4
  154. helm/benchmark/metrics/omni_math_metrics.py +32 -0
  155. helm/benchmark/metrics/openai_mrcr_metrics.py +52 -0
  156. helm/benchmark/metrics/output_processing_metric.py +60 -0
  157. helm/benchmark/metrics/output_processors.py +15 -0
  158. helm/benchmark/metrics/paraphrase_generation_metrics.py +2 -2
  159. helm/benchmark/metrics/ranking_metrics.py +3 -3
  160. helm/benchmark/metrics/reference_metric.py +3 -3
  161. helm/benchmark/metrics/ruler_qa_metrics.py +34 -0
  162. helm/benchmark/metrics/{bhasa_metrics.py → seahelm_metrics.py} +3 -3
  163. helm/benchmark/metrics/seahelm_metrics_specs.py +10 -0
  164. helm/benchmark/metrics/spider_metrics.py +7 -0
  165. helm/benchmark/metrics/starr_patient_instructions_metrics.py +14 -0
  166. helm/benchmark/metrics/statistic.py +1 -1
  167. helm/benchmark/metrics/summac/model_summac.py +2 -3
  168. helm/benchmark/metrics/summarization_critique_metrics.py +4 -4
  169. helm/benchmark/metrics/summarization_metrics.py +20 -9
  170. helm/benchmark/metrics/test_bias_metrics.py +5 -1
  171. helm/benchmark/metrics/test_classification_metrics.py +140 -68
  172. helm/benchmark/metrics/test_evaluate_reference_metrics.py +15 -0
  173. helm/benchmark/metrics/test_metric.py +1 -1
  174. helm/benchmark/metrics/test_statistic.py +2 -2
  175. helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py +1 -1
  176. helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +6 -6
  177. helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py +1 -1
  178. helm/benchmark/metrics/tokens/free_token_cost_estimator.py +1 -1
  179. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +1 -1
  180. helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
  181. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +1 -1
  182. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +3 -3
  183. helm/benchmark/metrics/toxicity_metrics.py +6 -6
  184. helm/benchmark/metrics/unitxt_metrics.py +7 -5
  185. helm/benchmark/metrics/vision_language/emd_utils.py +4 -2
  186. helm/benchmark/metrics/vision_language/image_metrics.py +1 -1
  187. helm/benchmark/metrics/vision_language/image_utils.py +2 -2
  188. helm/benchmark/metrics/wildbench_metrics.py +34 -0
  189. helm/benchmark/model_deployment_registry.py +6 -8
  190. helm/benchmark/model_metadata_registry.py +16 -0
  191. helm/benchmark/presentation/contamination.py +3 -3
  192. helm/benchmark/presentation/create_plots.py +33 -12
  193. helm/benchmark/presentation/run_display.py +13 -0
  194. helm/benchmark/presentation/schema.py +2 -1
  195. helm/benchmark/presentation/summarize.py +97 -67
  196. helm/benchmark/presentation/torr_robustness_summarizer.py +178 -0
  197. helm/benchmark/reeval_run.py +202 -0
  198. helm/benchmark/reeval_runner.py +355 -0
  199. helm/benchmark/run.py +86 -90
  200. helm/benchmark/run_expander.py +90 -9
  201. helm/benchmark/run_spec_factory.py +13 -0
  202. helm/benchmark/run_specs/air_bench_run_specs.py +21 -3
  203. helm/benchmark/run_specs/audio_run_specs.py +657 -0
  204. helm/benchmark/run_specs/call_center_run_specs.py +49 -0
  205. helm/benchmark/run_specs/capabilities_run_specs.py +308 -0
  206. helm/benchmark/run_specs/classic_run_specs.py +1 -69
  207. helm/benchmark/run_specs/enem_challenge_specs.py +31 -0
  208. helm/benchmark/run_specs/enterprise_run_specs.py +280 -0
  209. helm/benchmark/run_specs/experimental_run_specs.py +142 -3
  210. helm/benchmark/run_specs/imdb_ptbr_run_specs.py +30 -0
  211. helm/benchmark/run_specs/lite_run_specs.py +2 -2
  212. helm/benchmark/run_specs/long_context_run_specs.py +141 -0
  213. helm/benchmark/run_specs/medhelm_run_specs.py +1260 -0
  214. helm/benchmark/run_specs/melt_run_specs.py +783 -0
  215. helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py +49 -0
  216. helm/benchmark/run_specs/oab_exams_specs.py +32 -0
  217. helm/benchmark/run_specs/safety_run_specs.py +37 -0
  218. helm/benchmark/run_specs/{bhasa_run_specs.py → seahelm_run_specs.py} +44 -44
  219. helm/benchmark/run_specs/speech_disorder_audio_run_specs.py +169 -0
  220. helm/benchmark/run_specs/sql_run_specs.py +54 -0
  221. helm/benchmark/run_specs/tweetsentbr_run_specs.py +32 -0
  222. helm/benchmark/run_specs/unitxt_run_specs.py +14 -5
  223. helm/benchmark/run_specs/vlm_run_specs.py +103 -2
  224. helm/benchmark/run_specs/winogrande_afr_run_specs.py +47 -0
  225. helm/benchmark/runner.py +5 -5
  226. helm/benchmark/scenarios/aci_bench_scenario.py +126 -0
  227. helm/benchmark/scenarios/air_bench_scenario.py +6 -1
  228. helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +5 -3
  229. helm/benchmark/scenarios/anthropic_red_team_scenario.py +1 -1
  230. helm/benchmark/scenarios/audio_language/__init__.py +0 -0
  231. helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py +130 -0
  232. helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py +154 -0
  233. helm/benchmark/scenarios/audio_language/ami_scenario.py +96 -0
  234. helm/benchmark/scenarios/audio_language/audio_mnist_scenario.py +62 -0
  235. helm/benchmark/scenarios/audio_language/audio_pairs_scenario.py +62 -0
  236. helm/benchmark/scenarios/audio_language/audiocaps_scenario.py +59 -0
  237. helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py +152 -0
  238. helm/benchmark/scenarios/audio_language/common_voice_15_scenario.py +99 -0
  239. helm/benchmark/scenarios/audio_language/corebench_scenario.py +77 -0
  240. helm/benchmark/scenarios/audio_language/covost2_scenario.py +163 -0
  241. helm/benchmark/scenarios/audio_language/fleurs_fairness_scenario.py +83 -0
  242. helm/benchmark/scenarios/audio_language/fleurs_scenario.py +312 -0
  243. helm/benchmark/scenarios/audio_language/iemocap_audio_scenario.py +83 -0
  244. helm/benchmark/scenarios/audio_language/librispeech_fairness_scenario.py +96 -0
  245. helm/benchmark/scenarios/audio_language/librispeech_scenario.py +80 -0
  246. helm/benchmark/scenarios/audio_language/meld_audio_scenario.py +113 -0
  247. helm/benchmark/scenarios/audio_language/multilingual_librispeech_scenario.py +80 -0
  248. helm/benchmark/scenarios/audio_language/mustard_scenario.py +142 -0
  249. helm/benchmark/scenarios/audio_language/mutox_scenario.py +254 -0
  250. helm/benchmark/scenarios/audio_language/parade_scenario.py +97 -0
  251. helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py +124 -0
  252. helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification.py +103 -0
  253. helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py +110 -0
  254. helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py +78 -0
  255. helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py +109 -0
  256. helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py +83 -0
  257. helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py +87 -0
  258. helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py +105 -0
  259. helm/benchmark/scenarios/autobencher_capabilities_scenario.py +68 -0
  260. helm/benchmark/scenarios/autobencher_safety_scenario.py +51 -0
  261. helm/benchmark/scenarios/babi_qa_scenario.py +1 -1
  262. helm/benchmark/scenarios/banking77_scenario.py +6 -1
  263. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  264. helm/benchmark/scenarios/big_bench_scenario.py +11 -1
  265. helm/benchmark/scenarios/bigcodebench_scenario.py +58 -0
  266. helm/benchmark/scenarios/bird_sql_scenario.py +94 -0
  267. helm/benchmark/scenarios/bird_sql_scenario_helper.py +118 -0
  268. helm/benchmark/scenarios/blimp_scenario.py +1 -1
  269. helm/benchmark/scenarios/bold_scenario.py +1 -1
  270. helm/benchmark/scenarios/boolq_scenario.py +1 -1
  271. helm/benchmark/scenarios/casehold_scenario.py +79 -0
  272. helm/benchmark/scenarios/chw_care_plan_scenario.py +106 -0
  273. helm/benchmark/scenarios/civil_comments_scenario.py +1 -1
  274. helm/benchmark/scenarios/clear_scenario.py +157 -0
  275. helm/benchmark/scenarios/cleva_scenario.py +2 -2
  276. helm/benchmark/scenarios/code_scenario.py +17 -4
  277. helm/benchmark/scenarios/commonsense_scenario.py +1 -1
  278. helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +97 -0
  279. helm/benchmark/scenarios/copyright_scenario.py +1 -1
  280. helm/benchmark/scenarios/covid_dialog_scenario.py +10 -1
  281. helm/benchmark/scenarios/cti_to_mitre_scenario.py +240 -0
  282. helm/benchmark/scenarios/custom_mcqa_scenario.py +1 -1
  283. helm/benchmark/scenarios/czech_bank_qa_scenario.py +130 -0
  284. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +1 -1
  285. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +1 -1
  286. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +1 -1
  287. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +1 -1
  288. helm/benchmark/scenarios/dialogue_scenarios.py +13 -2
  289. helm/benchmark/scenarios/dischargeme_scenario.py +172 -0
  290. helm/benchmark/scenarios/disinformation_scenario.py +10 -1
  291. helm/benchmark/scenarios/dyck_language_scenario.py +10 -1
  292. helm/benchmark/scenarios/echr_judgment_classification_scenario.py +113 -0
  293. helm/benchmark/scenarios/ehr_sql_scenario.py +137 -0
  294. helm/benchmark/scenarios/ehrshot_scenario.py +1519 -0
  295. helm/benchmark/scenarios/enem_challenge_scenario.py +58 -0
  296. helm/benchmark/scenarios/entity_data_imputation_scenario.py +11 -1
  297. helm/benchmark/scenarios/entity_matching_scenario.py +12 -2
  298. helm/benchmark/scenarios/financial_phrasebank_scenario.py +94 -0
  299. helm/benchmark/scenarios/gold_commodity_news_scenario.py +124 -0
  300. helm/benchmark/scenarios/gpqa_scenario.py +80 -0
  301. helm/benchmark/scenarios/grammar.py +2 -2
  302. helm/benchmark/scenarios/grammar_scenario.py +2 -2
  303. helm/benchmark/scenarios/gsm_scenario.py +10 -1
  304. helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +50 -0
  305. helm/benchmark/scenarios/harm_bench_scenario.py +1 -1
  306. helm/benchmark/scenarios/headqa_scenario.py +136 -0
  307. helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +37 -0
  308. helm/benchmark/scenarios/ice_scenario.py +8 -4
  309. helm/benchmark/scenarios/ifeval_scenario.py +53 -0
  310. helm/benchmark/scenarios/imdb_ptbr_scenario.py +60 -0
  311. helm/benchmark/scenarios/imdb_scenario.py +11 -2
  312. helm/benchmark/scenarios/infinite_bench_en_qa_scenario.py +85 -0
  313. helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py +79 -0
  314. helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py +2 -2
  315. helm/benchmark/scenarios/koala_scenario.py +1 -1
  316. helm/benchmark/scenarios/kpi_edgar_scenario.py +151 -0
  317. helm/benchmark/scenarios/legal_contract_summarization_scenario.py +129 -0
  318. helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py +77 -0
  319. helm/benchmark/scenarios/legal_summarization_scenario.py +11 -1
  320. helm/benchmark/scenarios/legal_support_scenario.py +11 -1
  321. helm/benchmark/scenarios/legalbench_scenario.py +22 -3
  322. helm/benchmark/scenarios/lex_glue_scenario.py +12 -2
  323. helm/benchmark/scenarios/lextreme_scenario.py +11 -1
  324. helm/benchmark/scenarios/live_qa_scenario.py +1 -1
  325. helm/benchmark/scenarios/lm_entry_scenario.py +1 -1
  326. helm/benchmark/scenarios/lsat_qa_scenario.py +1 -1
  327. helm/benchmark/scenarios/math_scenario.py +9 -1
  328. helm/benchmark/scenarios/me_q_sum_scenario.py +10 -1
  329. helm/benchmark/scenarios/med_dialog_scenario.py +25 -22
  330. helm/benchmark/scenarios/med_mcqa_scenario.py +10 -1
  331. helm/benchmark/scenarios/med_paragraph_simplification_scenario.py +10 -1
  332. helm/benchmark/scenarios/med_qa_scenario.py +10 -1
  333. helm/benchmark/scenarios/medalign_scenario.py +94 -0
  334. helm/benchmark/scenarios/medalign_scenario_helper.py +432 -0
  335. helm/benchmark/scenarios/medbullets_scenario.py +145 -0
  336. helm/benchmark/scenarios/medcalc_bench_scenario.py +127 -0
  337. helm/benchmark/scenarios/medec_scenario.py +125 -0
  338. helm/benchmark/scenarios/medhallu_scenario.py +72 -0
  339. helm/benchmark/scenarios/medi_qa_scenario.py +111 -0
  340. helm/benchmark/scenarios/medication_qa_scenario.py +8 -2
  341. helm/benchmark/scenarios/melt_ir_scenario.py +171 -0
  342. helm/benchmark/scenarios/melt_knowledge_scenario.py +246 -0
  343. helm/benchmark/scenarios/melt_lm_scenarios.py +252 -0
  344. helm/benchmark/scenarios/melt_scenarios.py +793 -0
  345. helm/benchmark/scenarios/melt_srn_scenario.py +342 -0
  346. helm/benchmark/scenarios/melt_synthetic_reasoning_scenario.py +222 -0
  347. helm/benchmark/scenarios/melt_translation_scenario.py +152 -0
  348. helm/benchmark/scenarios/mental_health_scenario.py +123 -0
  349. helm/benchmark/scenarios/mimic_bhc_scenario.py +103 -0
  350. helm/benchmark/scenarios/mimic_rrs_scenario.py +98 -0
  351. helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +77 -0
  352. helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py +74 -0
  353. helm/benchmark/scenarios/mmlu_pro_scenario.py +95 -0
  354. helm/benchmark/scenarios/mmlu_scenario.py +11 -1
  355. helm/benchmark/scenarios/msmarco_scenario.py +1 -1
  356. helm/benchmark/scenarios/mtsamples_procedures_scenario.py +144 -0
  357. helm/benchmark/scenarios/mtsamples_replicate_scenario.py +142 -0
  358. helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +277 -0
  359. helm/benchmark/scenarios/narrativeqa_scenario.py +1 -1
  360. helm/benchmark/scenarios/natural_qa_scenario.py +1 -1
  361. helm/benchmark/scenarios/newsqa_scenario.py +1 -1
  362. helm/benchmark/scenarios/numeracy_scenario.py +12 -2
  363. helm/benchmark/scenarios/oab_exams_scenario.py +57 -0
  364. helm/benchmark/scenarios/omni_math_scenario.py +53 -0
  365. helm/benchmark/scenarios/open_assistant_scenario.py +11 -2
  366. helm/benchmark/scenarios/openai_mrcr_scenario.py +79 -0
  367. helm/benchmark/scenarios/opinions_qa_scenario.py +1 -1
  368. helm/benchmark/scenarios/pubmed_qa_scenario.py +59 -43
  369. helm/benchmark/scenarios/quac_scenario.py +10 -1
  370. helm/benchmark/scenarios/race_based_med_scenario.py +152 -0
  371. helm/benchmark/scenarios/raft_scenario.py +17 -2
  372. helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +1 -1
  373. helm/benchmark/scenarios/ruler_qa_scenario_helper.py +171 -0
  374. helm/benchmark/scenarios/ruler_qa_scenarios.py +88 -0
  375. helm/benchmark/scenarios/scenario.py +9 -1
  376. helm/benchmark/scenarios/{bhasa_scenario.py → seahelm_scenario.py} +7 -2
  377. helm/benchmark/scenarios/self_instruct_scenario.py +1 -1
  378. helm/benchmark/scenarios/shc_bmt_scenario.py +75 -0
  379. helm/benchmark/scenarios/shc_cdi_scenario.py +75 -0
  380. helm/benchmark/scenarios/shc_conf_scenario.py +76 -0
  381. helm/benchmark/scenarios/shc_ent_scenario.py +77 -0
  382. helm/benchmark/scenarios/shc_gip_scenario.py +74 -0
  383. helm/benchmark/scenarios/shc_privacy_scenario.py +78 -0
  384. helm/benchmark/scenarios/shc_proxy_scenario.py +76 -0
  385. helm/benchmark/scenarios/shc_ptbm_scenario.py +81 -0
  386. helm/benchmark/scenarios/shc_sei_scenario.py +94 -0
  387. helm/benchmark/scenarios/shc_sequoia_scenario.py +77 -0
  388. helm/benchmark/scenarios/simple_safety_tests_scenario.py +1 -1
  389. helm/benchmark/scenarios/spider_scenario.py +91 -0
  390. helm/benchmark/scenarios/starr_patient_instructions_scenario.py +97 -0
  391. helm/benchmark/scenarios/summarization_scenario.py +11 -1
  392. helm/benchmark/scenarios/sumosum_scenario.py +157 -0
  393. helm/benchmark/scenarios/synthetic_efficiency_scenario.py +1 -1
  394. helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +11 -1
  395. helm/benchmark/scenarios/synthetic_reasoning_scenario.py +11 -1
  396. helm/benchmark/scenarios/test_bigcodebench_scenario.py +26 -0
  397. helm/benchmark/scenarios/test_czech_bank_qa_scenario.py +18 -0
  398. helm/benchmark/scenarios/test_enem_challenge_scenario.py +53 -0
  399. helm/benchmark/scenarios/test_ewok_scenario.py +6 -2
  400. helm/benchmark/scenarios/test_gold_commodity_news_scenario.py +18 -0
  401. helm/benchmark/scenarios/test_gpqa_scenario.py +44 -0
  402. helm/benchmark/scenarios/test_ifeval_scenario.py +36 -0
  403. helm/benchmark/scenarios/test_imdb_ptbr_scenario.py +27 -0
  404. helm/benchmark/scenarios/test_infinite_bench_en_qa_scenario.py +18 -0
  405. helm/benchmark/scenarios/test_infinite_bench_en_sum_scenario.py +31 -0
  406. helm/benchmark/scenarios/test_math_scenario.py +1 -0
  407. helm/benchmark/scenarios/test_mmlu_clinical_afr_scenario.py +21 -0
  408. helm/benchmark/scenarios/test_mmlu_pro_scenario.py +53 -0
  409. helm/benchmark/scenarios/test_oab_exams_scenario.py +51 -0
  410. helm/benchmark/scenarios/test_omni_math_scenario.py +27 -0
  411. helm/benchmark/scenarios/test_tweetsentbr_scenario.py +24 -0
  412. helm/benchmark/scenarios/test_wildbench_scenario.py +15 -0
  413. helm/benchmark/scenarios/test_winogrande_afr_scenario.py +19 -0
  414. helm/benchmark/scenarios/thai_exam_scenario.py +10 -1
  415. helm/benchmark/scenarios/the_pile_scenario.py +1 -1
  416. helm/benchmark/scenarios/truthful_qa_scenario.py +12 -2
  417. helm/benchmark/scenarios/tweetsentbr_scenario.py +66 -0
  418. helm/benchmark/scenarios/twitter_aae_scenario.py +1 -1
  419. helm/benchmark/scenarios/unitxt_scenario.py +8 -2
  420. helm/benchmark/scenarios/verifiability_judgment_scenario.py +1 -1
  421. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  422. helm/benchmark/scenarios/vision_language/blink_scenario.py +140 -0
  423. helm/benchmark/scenarios/vision_language/mm_star_scenario.py +95 -0
  424. helm/benchmark/scenarios/vision_language/msr_vtt_scenario.py +75 -0
  425. helm/benchmark/scenarios/vision_language/vqa_rad_scenario.py +88 -0
  426. helm/benchmark/scenarios/wikifact_scenario.py +11 -1
  427. helm/benchmark/scenarios/wikitext_103_scenario.py +1 -1
  428. helm/benchmark/scenarios/wildbench_scenario.py +83 -0
  429. helm/benchmark/scenarios/winogrande_afr_scenario.py +78 -0
  430. helm/benchmark/scenarios/wmt_14_scenario.py +14 -2
  431. helm/benchmark/scenarios/xstest_scenario.py +1 -1
  432. helm/benchmark/server.py +13 -1
  433. helm/benchmark/slurm_runner.py +1 -1
  434. helm/benchmark/static/schema_audio.yaml +763 -0
  435. helm/benchmark/static/schema_autobencher.yaml +150 -0
  436. helm/benchmark/static/schema_call_center.yaml +97 -60
  437. helm/benchmark/static/{schema_medical.yaml → schema_capabilities.yaml} +100 -101
  438. helm/benchmark/static/schema_czech_bank.yaml +148 -0
  439. helm/benchmark/static/schema_enem_challenge.yaml +146 -0
  440. helm/benchmark/static/schema_enterprise.yaml +319 -0
  441. helm/benchmark/static/schema_finance.yaml +14 -12
  442. helm/benchmark/static/schema_heim.yaml +1389 -0
  443. helm/benchmark/static/schema_long_context.yaml +283 -0
  444. helm/benchmark/static/schema_medhelm.yaml +1140 -0
  445. helm/benchmark/static/schema_melt.yaml +1257 -0
  446. helm/benchmark/static/schema_mmlu_winogrande_afr.yaml +1045 -0
  447. helm/benchmark/static/schema_safety.yaml +18 -1
  448. helm/benchmark/static/{schema_bhasa.yaml → schema_seahelm.yaml} +30 -16
  449. helm/benchmark/static/schema_slphelm.yaml +162 -0
  450. helm/benchmark/static/schema_social_audio.yaml +224 -0
  451. helm/benchmark/static/schema_sql.yaml +171 -0
  452. helm/benchmark/static/{schema_tables.yaml → schema_torr.yaml} +169 -36
  453. helm/benchmark/static/schema_tweetsentbr.yaml +146 -0
  454. helm/benchmark/static/schema_vhelm.yaml +129 -56
  455. helm/benchmark/static/schema_video.yaml +219 -0
  456. helm/benchmark/static_build/assets/helm-safety-2907a7b6.png +0 -0
  457. helm/benchmark/static_build/assets/index-94295e78.js +10 -0
  458. helm/benchmark/static_build/assets/index-b9779128.css +1 -0
  459. helm/benchmark/static_build/assets/medhelm-overview-eac29843.png +0 -0
  460. helm/benchmark/static_build/assets/medhelm-v1-overview-3ddfcd65.png +0 -0
  461. helm/benchmark/static_build/assets/{react-d4a0b69b.js → react-f82877fd.js} +1 -1
  462. helm/benchmark/static_build/assets/{recharts-6d337683.js → recharts-4037aff0.js} +1 -1
  463. helm/benchmark/static_build/assets/{tremor-54a99cc4.js → tremor-38a10867.js} +2 -2
  464. helm/benchmark/static_build/config.js +1 -1
  465. helm/benchmark/static_build/index.html +6 -6
  466. helm/benchmark/window_services/default_window_service.py +1 -1
  467. helm/benchmark/window_services/encoder_decoder_window_service.py +4 -4
  468. helm/benchmark/window_services/ice_window_service.py +1 -1
  469. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +1 -1
  470. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +1 -1
  471. helm/benchmark/window_services/local_window_service.py +2 -2
  472. helm/benchmark/window_services/test_anthropic_window_service.py +3 -3
  473. helm/benchmark/window_services/test_bloom_window_service.py +3 -3
  474. helm/benchmark/window_services/test_gpt2_window_service.py +7 -2
  475. helm/benchmark/window_services/test_gpt4_window_service.py +8 -3
  476. helm/benchmark/window_services/test_gptj_window_service.py +8 -3
  477. helm/benchmark/window_services/test_gptneox_window_service.py +3 -3
  478. helm/benchmark/window_services/test_openai_window_service.py +8 -3
  479. helm/benchmark/window_services/test_opt_window_service.py +3 -3
  480. helm/benchmark/window_services/test_palmyra_window_service.py +3 -3
  481. helm/benchmark/window_services/test_t0pp_window_service.py +3 -3
  482. helm/benchmark/window_services/test_t511b_window_service.py +3 -3
  483. helm/benchmark/window_services/test_ul2_window_service.py +3 -3
  484. helm/benchmark/window_services/test_utils.py +4 -5
  485. helm/benchmark/window_services/test_yalm_window_service.py +3 -3
  486. helm/benchmark/window_services/tokenizer_service.py +7 -8
  487. helm/benchmark/window_services/yalm_window_service.py +1 -1
  488. helm/clients/ai21_client.py +3 -3
  489. helm/clients/aleph_alpha_client.py +1 -1
  490. helm/clients/anthropic_client.py +69 -29
  491. helm/clients/audio_language/__init__.py +0 -0
  492. helm/clients/audio_language/diva_llama_client.py +120 -0
  493. helm/clients/audio_language/llama_omni_client.py +198 -0
  494. helm/clients/audio_language/qwen2_5_omni_client.py +197 -0
  495. helm/clients/audio_language/qwen2_audiolm_client.py +190 -0
  496. helm/clients/audio_language/qwen_audiolm_client.py +152 -0
  497. helm/clients/audio_language/test.py +62 -0
  498. helm/clients/auto_client.py +4 -2
  499. helm/clients/azure_openai_client.py +55 -0
  500. helm/clients/bedrock_client.py +203 -7
  501. helm/clients/bedrock_utils.py +33 -0
  502. helm/clients/client.py +7 -7
  503. helm/clients/clip_scorers/clip_scorer.py +1 -1
  504. helm/clients/clip_scorers/multilingual_clip_scorer.py +1 -1
  505. helm/clients/cohere_client.py +3 -3
  506. helm/clients/google_client.py +1 -1
  507. helm/clients/grok_client.py +36 -0
  508. helm/clients/http_model_client.py +1 -1
  509. helm/clients/huggingface_client.py +52 -21
  510. helm/clients/huggingface_pipeline_client.py +138 -0
  511. helm/clients/ibm_client.py +267 -0
  512. helm/clients/image_generation/adobe_vision_client.py +1 -1
  513. helm/clients/image_generation/aleph_alpha_image_generation_client.py +1 -1
  514. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +3 -3
  515. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +5 -2
  516. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +5 -2
  517. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +2 -2
  518. helm/clients/image_generation/cogview2_client.py +1 -1
  519. helm/clients/image_generation/dalle2_client.py +1 -1
  520. helm/clients/image_generation/dalle3_client.py +2 -2
  521. helm/clients/image_generation/dalle_mini/__init__.py +1 -1
  522. helm/clients/image_generation/dalle_mini/data.py +1 -1
  523. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -5
  524. helm/clients/image_generation/dalle_mini/model/configuration.py +2 -2
  525. helm/clients/image_generation/dalle_mini/model/modeling.py +3 -3
  526. helm/clients/image_generation/dalle_mini/model/processor.py +5 -5
  527. helm/clients/image_generation/dalle_mini/model/tokenizer.py +2 -2
  528. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -1
  529. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +2 -2
  530. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +1 -1
  531. helm/clients/image_generation/dalle_mini_client.py +1 -1
  532. helm/clients/image_generation/deep_floyd_client.py +1 -1
  533. helm/clients/image_generation/huggingface_diffusers_client.py +1 -1
  534. helm/clients/image_generation/lexica_client.py +1 -1
  535. helm/clients/image_generation/mindalle/models/__init__.py +6 -6
  536. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +1 -1
  537. helm/clients/image_generation/mindalle/models/stage2/transformer.py +1 -1
  538. helm/clients/image_generation/mindalle/utils/__init__.py +3 -3
  539. helm/clients/image_generation/mindalle_client.py +1 -1
  540. helm/clients/image_generation/together_image_generation_client.py +1 -1
  541. helm/clients/lit_gpt_client.py +2 -2
  542. helm/clients/mistral_client.py +62 -18
  543. helm/clients/nvidia_nim_client.py +0 -3
  544. helm/clients/openai_client.py +308 -43
  545. helm/clients/openai_responses_client.py +174 -0
  546. helm/clients/palmyra_client.py +3 -9
  547. helm/clients/reka_client.py +3 -3
  548. helm/clients/stanfordhealthcare_azure_openai_client.py +58 -0
  549. helm/clients/stanfordhealthcare_claude_client.py +31 -0
  550. helm/clients/stanfordhealthcare_google_client.py +43 -0
  551. helm/clients/stanfordhealthcare_http_model_client.py +93 -0
  552. helm/clients/stanfordhealthcare_openai_client.py +62 -0
  553. helm/clients/stanfordhealthcare_shc_openai_client.py +42 -0
  554. helm/clients/test_client.py +1 -1
  555. helm/clients/test_together_client.py +6 -1
  556. helm/clients/together_client.py +76 -9
  557. helm/clients/upstage_client.py +23 -0
  558. helm/clients/vertexai_client.py +45 -13
  559. helm/clients/vision_language/huggingface_vision2seq_client.py +6 -4
  560. helm/clients/vision_language/huggingface_vlm_client.py +2 -2
  561. helm/clients/vision_language/idefics_client.py +6 -2
  562. helm/clients/vision_language/open_flamingo/__init__.py +2 -2
  563. helm/clients/vision_language/open_flamingo/src/factory.py +3 -3
  564. helm/clients/vision_language/open_flamingo/src/flamingo.py +2 -2
  565. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +2 -2
  566. helm/clients/vision_language/paligemma_client.py +2 -2
  567. helm/clients/vision_language/qwen2_vlm_client.py +188 -0
  568. helm/clients/vision_language/qwen_vlm_client.py +7 -5
  569. helm/clients/vllm_client.py +4 -6
  570. helm/clients/writer_client.py +102 -0
  571. helm/clients/yi_client.py +0 -3
  572. helm/common/audio_utils.py +111 -0
  573. helm/common/context.py +80 -0
  574. helm/common/credentials_utils.py +5 -5
  575. helm/common/file_caches/local_file_cache.py +1 -1
  576. helm/common/file_caches/test_local_file_cache.py +1 -1
  577. helm/common/general.py +9 -2
  578. helm/common/hierarchical_logger.py +46 -3
  579. helm/common/images_utils.py +2 -2
  580. helm/common/local_context.py +140 -0
  581. helm/common/media_object.py +2 -2
  582. helm/common/multimodal_request_utils.py +26 -0
  583. helm/common/reeval_parameters.py +12 -0
  584. helm/common/remote_context.py +61 -0
  585. helm/common/request.py +14 -2
  586. helm/common/response_format.py +18 -0
  587. helm/common/test_media_object.py +1 -1
  588. helm/config/model_deployments.yaml +1792 -28
  589. helm/config/model_metadata.yaml +1606 -51
  590. helm/config/tokenizer_configs.yaml +521 -4
  591. helm/proxy/cli.py +5 -3
  592. helm/proxy/critique/mechanical_turk_utils.py +1 -1
  593. helm/proxy/example_queries.py +1 -1
  594. helm/proxy/server.py +11 -4
  595. helm/proxy/services/remote_service.py +1 -1
  596. helm/proxy/services/server_service.py +22 -86
  597. helm/proxy/services/test_remote_service.py +2 -2
  598. helm/proxy/services/test_service.py +1 -1
  599. helm/proxy/static/general.js +122 -0
  600. helm/proxy/static/help.html +99 -0
  601. helm/proxy/static/index.css +57 -0
  602. helm/proxy/static/index.html +40 -0
  603. helm/proxy/static/index.js +456 -0
  604. helm/proxy/static/info-icon.png +0 -0
  605. helm/proxy/test_retry.py +1 -1
  606. helm/proxy/token_counters/auto_token_counter.py +1 -1
  607. helm/tokenizers/aleph_alpha_tokenizer.py +1 -1
  608. helm/tokenizers/caching_tokenizer.py +2 -30
  609. helm/tokenizers/grok_tokenizer.py +53 -0
  610. helm/tokenizers/http_model_tokenizer.py +1 -1
  611. helm/tokenizers/huggingface_tokenizer.py +3 -3
  612. helm/tokenizers/lit_gpt_tokenizer.py +1 -1
  613. helm/tokenizers/test_anthropic_tokenizer.py +6 -2
  614. helm/tokenizers/test_grok_tokenizer.py +33 -0
  615. helm/tokenizers/test_huggingface_tokenizer.py +1 -1
  616. helm/tokenizers/test_yalm_tokenizer.py +1 -1
  617. helm/tokenizers/tiktoken_tokenizer.py +1 -1
  618. helm/tokenizers/tokenizer.py +3 -1
  619. helm/tokenizers/yalm_tokenizer.py +3 -3
  620. helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py +1 -1
  621. crfm_helm-0.5.4.dist-info/METADATA +0 -350
  622. crfm_helm-0.5.4.dist-info/RECORD +0 -697
  623. helm/benchmark/metrics/bhasa_metrics_specs.py +0 -10
  624. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  625. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  626. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  627. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  628. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  629. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  630. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  631. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  632. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  633. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  634. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  635. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  636. helm/benchmark/static_build/assets/index-05c76bb1.css +0 -1
  637. helm/benchmark/static_build/assets/index-3ee38b3d.js +0 -10
  638. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  639. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  640. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  641. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  642. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  643. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  644. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  645. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  646. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  647. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  648. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  649. helm/tokenizers/anthropic_tokenizer.py +0 -52
  650. {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info}/entry_points.txt +0 -0
  651. {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info/licenses}/LICENSE +0 -0
  652. {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,138 @@
1
+ from threading import Lock
2
+ from typing import Any, Dict, List, Optional, Union
3
+
4
+ import transformers
5
+
6
+ from helm.clients.client import CachingClient
7
+ from helm.common.cache import CacheConfig
8
+ from helm.common.hierarchical_logger import htrack_block, hwarn
9
+ from helm.common.request import GeneratedOutput, Request, RequestResult, wrap_request_time
10
+ from helm.proxy.retry import NonRetriableException
11
+
12
+
13
+ _pipelines: Dict[str, transformers.Pipeline] = {}
14
+ _pipelines_lock: Lock = Lock()
15
+
16
+
17
+ def _get_pipeline(
18
+ helm_model_name: str,
19
+ pipeline_kwargs: Dict[str, Any],
20
+ ) -> Any:
21
+ """
22
+ Checks if the desired HuggingFaceModel is cached. Creates the HuggingFaceModel if it's not cached.
23
+ Returns the HuggingFaceModel.
24
+ """
25
+ global _pipelines
26
+ global _pipelines_lock
27
+ with _pipelines_lock:
28
+ if helm_model_name not in _pipelines:
29
+ huggingface_model_name = pipeline_kwargs["model"]
30
+ with htrack_block(
31
+ f"Loading HuggingFace model {huggingface_model_name} (kwargs={pipeline_kwargs}) "
32
+ f"for HELM model {helm_model_name} with transformers.pipeline"
33
+ ):
34
+ _pipelines[helm_model_name] = transformers.pipeline(**pipeline_kwargs)
35
+
36
+ return _pipelines[helm_model_name]
37
+
38
+
39
+ class HuggingFacePipelineClient(CachingClient):
40
+ def __init__(
41
+ self,
42
+ cache_config: CacheConfig,
43
+ model_name: str,
44
+ pretrained_model_name_or_path: Optional[str] = None,
45
+ apply_chat_template: Optional[bool] = None,
46
+ **kwargs,
47
+ ):
48
+ # Include `pretrained_model_name_or_path` parameter so that model deployments can use
49
+ # the `pretrained_model_name_or_path` arg to override `model_name`
50
+ super().__init__(cache_config=cache_config)
51
+ self._helm_model_name = model_name
52
+ self._pipeline_kwargs = {
53
+ "model": pretrained_model_name_or_path or self._helm_model_name,
54
+ "task": "text-generation",
55
+ **kwargs,
56
+ }
57
+ self._pipeline = _get_pipeline(self._helm_model_name, self._pipeline_kwargs)
58
+ if apply_chat_template is not None:
59
+ self._apply_chat_template = apply_chat_template
60
+ else:
61
+ # If the user did not explicitly configure whether the model is a chat model with `apply_chat_template` arg,
62
+ # auto-infer if the model is a chat model based on whether the tokenizer has a chat template.
63
+ # Note: Auto-inference is incorrect for some non-chat models that still have chat templates
64
+ # e.g. Qwen2, Qwen 2.5.
65
+ # For these models, the `apply_chat_template` arg should be explicitly set to false.
66
+ self._apply_chat_template = bool(self._pipeline.tokenizer.chat_template)
67
+ hwarn(
68
+ f"Automatically set `apply_chat_template` to {self._apply_chat_template} based on "
69
+ "whether the tokenizer has a chat template. "
70
+ "If this is incorrect, please explicitly set `apply_chat_template`."
71
+ )
72
+
73
+ def make_text_inputs(self, request: Request) -> Union[str, List[Dict[str, str]]]:
74
+ if request.prompt and request.messages:
75
+ raise NonRetriableException(f"More than one of `prompt` and `messages` was set in request: {request}")
76
+ # Chat model expects a list of messages as input
77
+ if self._apply_chat_template:
78
+ if request.messages:
79
+ return request.messages
80
+ else:
81
+ return [{"role": "user", "content": request.prompt}]
82
+ # Base non-chat model expects a string as input
83
+ else:
84
+ if request.messages:
85
+ raise NonRetriableException("Chat mesages not supported by non-chat model")
86
+ else:
87
+ return request.prompt
88
+
89
+ def make_request(self, request: Request) -> RequestResult:
90
+ """Make a request"""
91
+ if request.model != self._helm_model_name:
92
+ raise NonRetriableException(
93
+ f"This instance of HuggingFacePipelineClient has loaded model {self._helm_model_name} but the request was for model {request.model}" # noqa: E501
94
+ )
95
+ completions: List[GeneratedOutput] = []
96
+ do_sample = request.temperature > 0.0
97
+ raw_request = {
98
+ "text_inputs": self.make_text_inputs(request),
99
+ "return_full_text": request.echo_prompt,
100
+ "temperature": request.temperature if do_sample else None,
101
+ "num_return_sequences": request.num_completions,
102
+ "max_new_tokens": request.max_tokens,
103
+ "top_p": request.top_p,
104
+ "top_k": request.top_k_per_token if do_sample else None,
105
+ "do_sample": do_sample,
106
+ "return_dict_in_generate": True,
107
+ }
108
+ if request.stop_sequences:
109
+ stop_sequence_ids = self._pipeline.tokenizer(
110
+ request.stop_sequences, return_token_type_ids=False, add_special_tokens=False
111
+ )
112
+ if len(stop_sequence_ids.input_ids) == 1 and len(stop_sequence_ids.input_ids[0]) == 1:
113
+ raw_request["eos_token_id"] = stop_sequence_ids.input_ids[0][0]
114
+ else:
115
+ raise NonRetriableException(
116
+ "Multiple stop sequences and stop sequences of multiple tokens, are not yet supported by HuggingFacePipelineClient" # noqa: E501
117
+ )
118
+
119
+ def do_it() -> Dict[str, Any]:
120
+ pipeline_outputs = self._pipeline(**raw_request)
121
+ return {"outputs": pipeline_outputs}
122
+
123
+ cache_key = CachingClient.make_cache_key(
124
+ {"pipeline_kwargs": self._pipeline_kwargs, **raw_request},
125
+ request,
126
+ )
127
+
128
+ response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
129
+ for raw_output in response["outputs"]:
130
+ completions.append(GeneratedOutput(text=raw_output["generated_text"], logprob=0, tokens=[]))
131
+ return RequestResult(
132
+ success=True,
133
+ cached=cached,
134
+ request_time=response["request_time"],
135
+ request_datetime=response["request_datetime"],
136
+ completions=completions,
137
+ embedding=[],
138
+ )
@@ -0,0 +1,267 @@
1
+ from abc import ABC
2
+ from abc import abstractmethod
3
+
4
+ from helm.common.hierarchical_logger import hlog
5
+ from helm.common.cache import CacheConfig
6
+ from helm.common.request import (
7
+ Request,
8
+ RequestResult,
9
+ Token,
10
+ wrap_request_time,
11
+ EMBEDDING_UNAVAILABLE_REQUEST_RESULT,
12
+ GeneratedOutput,
13
+ )
14
+
15
+ from helm.clients.client import CachingClient
16
+ from helm.common.optional_dependencies import handle_module_not_found_error
17
+ from typing import TypeVar, Generic
18
+ from typing import Any, Dict, List
19
+ from threading import Semaphore, Lock
20
+ import threading
21
+
22
+ try:
23
+ from ibm_watsonx_ai import Credentials
24
+ from ibm_watsonx_ai.foundation_models import ModelInference
25
+ from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
26
+ from ibm_watsonx_ai.foundation_models.schema import (
27
+ TextChatParameters,
28
+ TextGenParameters,
29
+ ReturnOptionProperties,
30
+ )
31
+
32
+ except ModuleNotFoundError as e:
33
+ handle_module_not_found_error(e, ["ibm"])
34
+
35
+ # Define the maximum number of parallel executions is limited by IBM API
36
+ MAX_CONCURRENT_REQUESTS = 8
37
+ __semaphores: Dict[str, Semaphore] = dict()
38
+ __semaphores_lock = Lock()
39
+
40
+
41
+ def _get_semaphore(model: str) -> Semaphore:
42
+ with __semaphores_lock:
43
+ if model not in __semaphores:
44
+ __semaphores[model] = threading.Semaphore(MAX_CONCURRENT_REQUESTS)
45
+
46
+ return __semaphores[model]
47
+
48
+
49
+ T = TypeVar("T", TextGenParameters, TextChatParameters)
50
+
51
+
52
+ class ModelInferenceHandler(ABC, Generic[T]):
53
+ @abstractmethod
54
+ def __init__(self, inference_engine: ModelInference):
55
+ """
56
+ :type inference_engine: object
57
+ """
58
+ self.inference_engine = inference_engine
59
+
60
+ @abstractmethod
61
+ def serve_request(self, prompt: str, params: T) -> Dict:
62
+ pass
63
+
64
+ @abstractmethod
65
+ def parse_response(self, response: dict) -> List[GeneratedOutput]:
66
+ pass
67
+
68
+ @abstractmethod
69
+ def create_params(self, request: Request) -> T:
70
+ pass
71
+
72
+
73
+ class GenerateInferenceHandler(ModelInferenceHandler[TextGenParameters]):
74
+
75
+ def __init__(self, inference_engine: ModelInference):
76
+ self.inference_engine = inference_engine
77
+
78
+ def create_params(self, request: Request) -> TextGenParameters:
79
+ def set_temperature_requirements():
80
+ # Default temperature 0.05 required by ibm/granite-13b-instruct-v2
81
+ if self.inference_engine.model_id == "ibm/granite-13b-instruct-v2":
82
+ return 0.05
83
+ return 1e-7 if request.temperature == 0 else request.temperature
84
+
85
+ return TextGenParameters(
86
+ temperature=set_temperature_requirements(),
87
+ top_p=request.top_p,
88
+ max_new_tokens=request.max_tokens,
89
+ return_options=ReturnOptionProperties(
90
+ input_text=True,
91
+ generated_tokens=True,
92
+ input_tokens=False,
93
+ token_logprobs=True,
94
+ token_ranks=False,
95
+ ),
96
+ include_stop_sequence=False,
97
+ prompt_variables=None,
98
+ )
99
+
100
+ def serve_request(self, prompt: str, params: TextGenParameters) -> Dict:
101
+ semaphore = _get_semaphore(self.inference_engine.model_id)
102
+
103
+ with semaphore:
104
+ response = self.inference_engine.generate(
105
+ prompt=prompt,
106
+ params=params,
107
+ )
108
+ return response
109
+
110
+ def parse_response(self, response: dict) -> List[GeneratedOutput]:
111
+ completions = []
112
+ try:
113
+ for r in response["results"]:
114
+ sequence_logprob: float = 0
115
+ tokens: List[Token] = []
116
+ generated_text = r["generated_text"]
117
+ for token_and_logprob in r["generated_tokens"]:
118
+ logprob = token_and_logprob.get("logprob", 0)
119
+ text = token_and_logprob["text"]
120
+ tokens.append(Token(text=text, logprob=logprob))
121
+ sequence_logprob += logprob
122
+
123
+ completion = GeneratedOutput(text=generated_text, logprob=sequence_logprob, tokens=tokens)
124
+ completions.append(completion)
125
+ except Exception as e:
126
+ hlog(f"GenerateInferenceHandler failed with exception {e} during parse_response {response}")
127
+ return completions
128
+
129
+
130
+ class ChatModelInferenceHandler(ModelInferenceHandler[TextChatParameters]):
131
+ def __init__(self, inference_engine: ModelInference):
132
+ self.inference_engine = inference_engine
133
+
134
+ def create_params(self, request: Request) -> TextChatParameters:
135
+ return TextChatParameters(
136
+ logprobs=True,
137
+ presence_penalty=0,
138
+ frequency_penalty=0,
139
+ temperature=request.temperature,
140
+ max_tokens=request.max_tokens,
141
+ top_p=request.top_p,
142
+ )
143
+
144
+ def parse_response(self, response: dict) -> List[GeneratedOutput]:
145
+ completions = []
146
+ try:
147
+ for raw_completion in response["choices"]:
148
+ sequence_logprob: float = 0
149
+ tokens: List[Token] = []
150
+ generated_text = raw_completion["message"]["content"]
151
+
152
+ for token_and_logprob in raw_completion["logprobs"]["content"]:
153
+ logprob = token_and_logprob["logprob"]
154
+ text = token_and_logprob["token"]
155
+ tokens.append(Token(text=text, logprob=logprob))
156
+ sequence_logprob += logprob
157
+
158
+ completion = GeneratedOutput(text=generated_text, logprob=sequence_logprob, tokens=tokens)
159
+ completions.append(completion)
160
+ except Exception as e:
161
+ hlog(f"ChatModelInferenceHandler failed with exception {e} during parse_response {response}")
162
+ return completions
163
+
164
+ def serve_request(self, prompt: str, params: TextChatParameters) -> Dict:
165
+ semaphore = _get_semaphore(self.inference_engine.model_id)
166
+
167
+ with semaphore:
168
+ response = self.inference_engine.chat(
169
+ messages=[{"role": "user", "content": prompt}],
170
+ params=params,
171
+ )
172
+ return response
173
+
174
+
175
+ class IbmClient(CachingClient, ABC):
176
+ def __init__(
177
+ self,
178
+ cache_config: CacheConfig,
179
+ api_key: str,
180
+ region: str,
181
+ location: dict,
182
+ watsonx_model_name: str,
183
+ **kwargs,
184
+ ):
185
+ super().__init__(cache_config=cache_config)
186
+ self.project_id = None
187
+ self.url = None
188
+ self.watsonx_model_name = watsonx_model_name
189
+ self.api_key = api_key
190
+ self.region = region
191
+ self.kwargs = kwargs
192
+ for entry in location:
193
+ if entry["region"].lower() == self.region.lower():
194
+ self.project_id = entry["project_id"]
195
+ self.url = entry["url"]
196
+
197
+ assert self.project_id is not None, (
198
+ "Missed project_id for specified region configuration in credentials.conf, should be in list "
199
+ "of JSON objects with 'region', 'url', 'project_id' per region"
200
+ )
201
+ assert self.url is not None, (
202
+ "Missed url for specified region configuration in credentials.conf, should be in list "
203
+ "of JSON objects with 'region', 'url', 'project_id' per region"
204
+ )
205
+
206
+ self.inference_engine = ModelInference(
207
+ model_id=self.watsonx_model_name,
208
+ params={GenParams.MAX_NEW_TOKENS: 2000},
209
+ credentials=Credentials(api_key=api_key, url=self.url),
210
+ project_id=self.project_id,
211
+ )
212
+
213
+ hlog("Started IBM Client")
214
+
215
+ @abstractmethod
216
+ def make_request(self, request: Request) -> RequestResult:
217
+ pass
218
+
219
+ def do_call(self, inference_handler: ModelInferenceHandler, request: Request) -> RequestResult:
220
+ params = inference_handler.create_params(request=request)
221
+
222
+ def do_it() -> Dict[str, Any]:
223
+ return inference_handler.serve_request(prompt=request.prompt, params=params)
224
+
225
+ raw_request = {"prompt": request.prompt, "params": params.to_dict(), "model": request.model}
226
+
227
+ cache_key = CachingClient.make_cache_key(raw_request, request)
228
+ response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
229
+ completions = inference_handler.parse_response(response)
230
+ return RequestResult(
231
+ success=True,
232
+ cached=cached,
233
+ request_time=response["request_time"],
234
+ request_datetime=response.get("request_datetime"),
235
+ completions=completions,
236
+ embedding=[],
237
+ )
238
+
239
+
240
+ class IbmChatClient(IbmClient):
241
+
242
+ def make_request(self, request: Request) -> RequestResult:
243
+ # Embedding not supported for this model
244
+ if request.embedding:
245
+ return EMBEDDING_UNAVAILABLE_REQUEST_RESULT
246
+ try:
247
+ return self.do_call(
248
+ inference_handler=ChatModelInferenceHandler(inference_engine=self.inference_engine), request=request
249
+ )
250
+
251
+ except Exception as e:
252
+ error: str = f"IBM Chat client Model error: {e}"
253
+ return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
254
+
255
+
256
+ class IbmTextClient(IbmClient):
257
+ def make_request(self, request: Request) -> RequestResult:
258
+ # Embedding not supported for this model
259
+ if request.embedding:
260
+ return EMBEDDING_UNAVAILABLE_REQUEST_RESULT
261
+ try:
262
+ return self.do_call(
263
+ inference_handler=GenerateInferenceHandler(inference_engine=self.inference_engine), request=request
264
+ )
265
+ except Exception as e:
266
+ error: str = f"IBM Text client Model error: {e}"
267
+ return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
@@ -9,7 +9,7 @@ from helm.common.tokenization_request import (
9
9
  DecodeRequestResult,
10
10
  )
11
11
  from helm.clients.client import Client, CachingClient
12
- from .image_generation_client_utils import get_single_image_multimedia_object
12
+ from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
13
13
 
14
14
 
15
15
  class AdobeVisionClient(Client):
@@ -9,7 +9,7 @@ from helm.common.tokenization_request import (
9
9
  DecodeRequestResult,
10
10
  )
11
11
  from helm.clients.client import Client, CachingClient
12
- from .image_generation_client_utils import get_single_image_multimedia_object
12
+ from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
13
13
 
14
14
 
15
15
  class AlephAlphaImageGenerationClient(Client):
@@ -6,9 +6,9 @@
6
6
  @Contact : dm18@mails.tsinghua.edu.cn
7
7
  """
8
8
 
9
- from .direct_sr import DirectSuperResolution
10
- from .iterative_sr import IterativeSuperResolution
11
- from .sr_group import SRGroup
9
+ from helm.clients.image_generation.cogview2.sr_pipeline.direct_sr import DirectSuperResolution
10
+ from helm.clients.image_generation.cogview2.sr_pipeline.iterative_sr import IterativeSuperResolution
11
+ from helm.clients.image_generation.cogview2.sr_pipeline.sr_group import SRGroup
12
12
 
13
13
  DirectSuperResolution
14
14
  IterativeSuperResolution
@@ -10,8 +10,11 @@
10
10
  import torch
11
11
  from icetk import icetk as tokenizer
12
12
 
13
- from .dsr_sampling import filling_sequence_dsr, IterativeEntfilterStrategy
14
- from .dsr_model import DsrModel
13
+ from helm.clients.image_generation.cogview2.sr_pipeline.dsr_sampling import (
14
+ filling_sequence_dsr,
15
+ IterativeEntfilterStrategy,
16
+ )
17
+ from helm.clients.image_generation.cogview2.sr_pipeline.dsr_model import DsrModel
15
18
  from helm.common.optional_dependencies import handle_module_not_found_error
16
19
 
17
20
 
@@ -8,8 +8,11 @@
8
8
  import torch
9
9
  from icetk import icetk as tokenizer
10
10
 
11
- from .itersr_sampling import filling_sequence_itersr, IterativeEntfilterStrategy
12
- from .itersr_model import ItersrModel
11
+ from helm.clients.image_generation.cogview2.sr_pipeline.itersr_sampling import (
12
+ filling_sequence_itersr,
13
+ IterativeEntfilterStrategy,
14
+ )
15
+ from helm.clients.image_generation.cogview2.sr_pipeline.itersr_model import ItersrModel
13
16
  from helm.common.optional_dependencies import handle_module_not_found_error
14
17
 
15
18
 
@@ -7,8 +7,8 @@
7
7
  """
8
8
 
9
9
  # here put the import lib
10
- from .direct_sr import DirectSuperResolution
11
- from .iterative_sr import IterativeSuperResolution
10
+ from helm.clients.image_generation.cogview2.sr_pipeline.direct_sr import DirectSuperResolution
11
+ from helm.clients.image_generation.cogview2.sr_pipeline.iterative_sr import IterativeSuperResolution
12
12
 
13
13
  from helm.common.optional_dependencies import handle_module_not_found_error
14
14
 
@@ -20,7 +20,7 @@ from helm.common.tokenization_request import (
20
20
  )
21
21
  from helm.clients.client import Client, CachingClient
22
22
  from helm.clients.image_generation.cogview2.coglm_strategy import CoglmStrategy
23
- from .image_generation_client_utils import get_single_image_multimedia_object
23
+ from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
24
24
 
25
25
 
26
26
  class CogView2Client(Client):
@@ -15,7 +15,7 @@ from helm.common.tokenization_request import (
15
15
  )
16
16
  from helm.clients.moderation_api_client import ModerationAPIClient
17
17
  from helm.clients.client import Client, CachingClient
18
- from .image_generation_client_utils import get_single_image_multimedia_object
18
+ from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
19
19
 
20
20
  try:
21
21
  import openai
@@ -7,8 +7,8 @@ from helm.common.optional_dependencies import handle_module_not_found_error
7
7
  from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
8
8
  from helm.clients.moderation_api_client import ModerationAPIClient
9
9
  from helm.clients.client import CachingClient
10
- from .dalle2_client import DALLE2Client
11
- from .image_generation_client_utils import get_single_image_multimedia_object
10
+ from helm.clients.image_generation.dalle2_client import DALLE2Client
11
+ from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
12
12
 
13
13
  try:
14
14
  import openai
@@ -1,3 +1,3 @@
1
1
  __version__ = "0.1.4"
2
2
 
3
- from .model import DalleBart, DalleBartProcessor
3
+ from helm.clients.image_generation.dalle_mini.model import DalleBart, DalleBartProcessor
@@ -6,7 +6,7 @@ from pathlib import Path
6
6
  import numpy as np
7
7
  from datasets import Dataset, load_dataset
8
8
 
9
- from .model.text import TextNormalizer
9
+ from helm.clients.image_generation.dalle_mini.model.text import TextNormalizer
10
10
  from helm.common.optional_dependencies import handle_module_not_found_error
11
11
 
12
12
  try:
@@ -1,5 +1,5 @@
1
- from .configuration import DalleBartConfig
2
- from .modeling import DalleBart
3
- from .partitions import set_partitions
4
- from .processor import DalleBartProcessor
5
- from .tokenizer import DalleBartTokenizer
1
+ from helm.clients.image_generation.dalle_mini.model.configuration import DalleBartConfig
2
+ from helm.clients.image_generation.dalle_mini.model.modeling import DalleBart
3
+ from helm.clients.image_generation.dalle_mini.model.partitions import set_partitions
4
+ from helm.clients.image_generation.dalle_mini.model.processor import DalleBartProcessor
5
+ from helm.clients.image_generation.dalle_mini.model.tokenizer import DalleBartTokenizer
@@ -12,13 +12,13 @@
12
12
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
- """ DalleBart model configuration """
15
+ """DalleBart model configuration"""
16
16
  import warnings
17
17
 
18
18
  from transformers.configuration_utils import PretrainedConfig
19
19
  from transformers.utils import logging
20
20
 
21
- from .utils import PretrainedFromWandbMixin
21
+ from helm.clients.image_generation.dalle_mini.model.utils import PretrainedFromWandbMixin
22
22
 
23
23
  logger = logging.get_logger(__name__)
24
24
 
@@ -12,7 +12,7 @@
12
12
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
- """ DalleBart model. """
15
+ """DalleBart model."""
16
16
 
17
17
  import math
18
18
  from functools import partial
@@ -35,8 +35,8 @@ from transformers.utils import ModelOutput, logging
35
35
  from transformers.generation.configuration_utils import GenerationConfig
36
36
 
37
37
  from helm.common.optional_dependencies import handle_module_not_found_error
38
- from .configuration import DalleBartConfig
39
- from .utils import PretrainedFromWandbMixin
38
+ from helm.clients.image_generation.dalle_mini.model.configuration import DalleBartConfig
39
+ from helm.clients.image_generation.dalle_mini.model.utils import PretrainedFromWandbMixin
40
40
 
41
41
  try:
42
42
  import flax
@@ -1,11 +1,11 @@
1
- """ DalleBart processor """
1
+ """DalleBart processor"""
2
2
 
3
3
  from typing import List
4
4
 
5
- from .configuration import DalleBartConfig
6
- from .text import TextNormalizer
7
- from .tokenizer import DalleBartTokenizer
8
- from .utils import PretrainedFromWandbMixin
5
+ from helm.clients.image_generation.dalle_mini.model.configuration import DalleBartConfig
6
+ from helm.clients.image_generation.dalle_mini.model.text import TextNormalizer
7
+ from helm.clients.image_generation.dalle_mini.model.tokenizer import DalleBartTokenizer
8
+ from helm.clients.image_generation.dalle_mini.model.utils import PretrainedFromWandbMixin
9
9
  from helm.common.optional_dependencies import handle_module_not_found_error
10
10
 
11
11
 
@@ -1,8 +1,8 @@
1
- """ DalleBart tokenizer """
1
+ """DalleBart tokenizer"""
2
2
 
3
3
  from transformers import BartTokenizerFast
4
4
 
5
- from .utils import PretrainedFromWandbMixin
5
+ from helm.clients.image_generation.dalle_mini.model.utils import PretrainedFromWandbMixin
6
6
 
7
7
 
8
8
  class DalleBartTokenizer(PretrainedFromWandbMixin, BartTokenizerFast):
@@ -1 +1 @@
1
- from . import *
1
+ from helm.clients.image_generation.dalle_mini.vqgan_jax import *
@@ -2,8 +2,8 @@ import re
2
2
 
3
3
  import torch
4
4
 
5
- from .modeling_flax_vqgan import VQModel
6
- from .configuration_vqgan import VQGANConfig
5
+ from helm.clients.image_generation.dalle_mini.vqgan_jax.modeling_flax_vqgan import VQModel
6
+ from helm.clients.image_generation.dalle_mini.vqgan_jax.configuration_vqgan import VQGANConfig
7
7
  from helm.common.optional_dependencies import handle_module_not_found_error
8
8
 
9
9
  try:
@@ -6,7 +6,7 @@ import math
6
6
 
7
7
  from transformers.modeling_flax_utils import FlaxPreTrainedModel
8
8
 
9
- from .configuration_vqgan import VQGANConfig
9
+ from helm.clients.image_generation.dalle_mini.vqgan_jax.configuration_vqgan import VQGANConfig
10
10
  from helm.common.optional_dependencies import handle_module_not_found_error
11
11
 
12
12
  try:
@@ -15,7 +15,7 @@ from helm.common.tokenization_request import (
15
15
  TokenizationRequestResult,
16
16
  )
17
17
  from helm.clients.client import Client, CachingClient
18
- from .image_generation_client_utils import get_single_image_multimedia_object
18
+ from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
19
19
 
20
20
 
21
21
  class DALLEMiniClient(Client):
@@ -9,7 +9,7 @@ from helm.common.tokenization_request import (
9
9
  DecodeRequestResult,
10
10
  )
11
11
  from helm.clients.client import Client, CachingClient
12
- from .image_generation_client_utils import get_single_image_multimedia_object
12
+ from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
13
13
 
14
14
 
15
15
  class DeepFloydClient(Client):
@@ -17,7 +17,7 @@ from helm.common.tokenization_request import (
17
17
  TokenizationRequestResult,
18
18
  )
19
19
  from helm.clients.client import Client, CachingClient
20
- from .image_generation_client_utils import get_single_image_multimedia_object
20
+ from helm.clients.image_generation.image_generation_client_utils import get_single_image_multimedia_object
21
21
 
22
22
 
23
23
  _models_lock: Lock = Lock()