crfm-helm 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (606) hide show
  1. crfm_helm-0.5.5.dist-info/METADATA +413 -0
  2. crfm_helm-0.5.5.dist-info/RECORD +894 -0
  3. {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +13 -1
  5. helm/benchmark/adaptation/adapters/adapter_factory.py +15 -1
  6. helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -1
  7. helm/benchmark/adaptation/adapters/chat_adapter.py +49 -0
  8. helm/benchmark/adaptation/adapters/ehr_instruction_adapter.py +108 -0
  9. helm/benchmark/adaptation/adapters/generation_adapter.py +1 -1
  10. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +1 -1
  11. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +1 -1
  12. helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +4 -2
  13. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -1
  14. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +1 -1
  15. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +4 -2
  16. helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +1 -1
  17. helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py +1 -1
  18. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +2 -2
  19. helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py +87 -0
  20. helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -1
  21. helm/benchmark/adaptation/adapters/test_generation_adapter.py +3 -3
  22. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +2 -2
  23. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +2 -2
  24. helm/benchmark/adaptation/common_adapter_specs.py +69 -4
  25. helm/benchmark/adaptation/prompt.py +1 -1
  26. helm/benchmark/annotation/aci_bench_annotator.py +95 -0
  27. helm/benchmark/annotation/air_bench_annotator.py +20 -5
  28. helm/benchmark/annotation/annotator.py +5 -0
  29. helm/benchmark/annotation/annotator_factory.py +3 -20
  30. helm/benchmark/annotation/anthropic_red_team_annotator.py +11 -24
  31. helm/benchmark/annotation/autobencher_capabilities_annotator.py +107 -0
  32. helm/benchmark/annotation/autobencher_safety_annotator.py +98 -0
  33. helm/benchmark/annotation/bigcodebench_annotator.py +108 -0
  34. helm/benchmark/annotation/bird_sql_annotator.py +58 -0
  35. helm/benchmark/annotation/call_center_annotator.py +22 -11
  36. helm/benchmark/annotation/chw_care_plan_annotator.py +98 -0
  37. helm/benchmark/annotation/czech_bank_qa_annotator.py +78 -0
  38. helm/benchmark/annotation/dischargeme_annotator.py +107 -0
  39. helm/benchmark/annotation/ehr_sql_annotator.py +87 -0
  40. helm/benchmark/annotation/harm_bench_annotator.py +11 -24
  41. helm/benchmark/annotation/helpdesk_call_summarization_annotator.py +131 -0
  42. helm/benchmark/annotation/image2struct/image_compiler_annotator.py +6 -1
  43. helm/benchmark/annotation/live_qa_annotator.py +10 -5
  44. helm/benchmark/annotation/med_dialog_annotator.py +99 -0
  45. helm/benchmark/annotation/medalign_annotator.py +100 -0
  46. helm/benchmark/annotation/medi_qa_annotator.py +98 -0
  47. helm/benchmark/annotation/medication_qa_annotator.py +90 -61
  48. helm/benchmark/annotation/mental_health_annotator.py +98 -0
  49. helm/benchmark/annotation/mimic_rrs_annotator.py +100 -0
  50. helm/benchmark/annotation/model_as_judge.py +281 -18
  51. helm/benchmark/annotation/mtsamples_procedures_annotator.py +98 -0
  52. helm/benchmark/annotation/mtsamples_replicate_annotator.py +101 -0
  53. helm/benchmark/annotation/omni_math/gpt_evaluation_template.txt +152 -0
  54. helm/benchmark/annotation/omni_math/gpt_evaluation_zero_shot_template.txt +36 -0
  55. helm/benchmark/annotation/omni_math_annotator.py +132 -0
  56. helm/benchmark/annotation/simple_safety_tests_annotator.py +11 -25
  57. helm/benchmark/annotation/spider_annotator.py +18 -0
  58. helm/benchmark/annotation/starr_patient_instructions_annotator.py +98 -0
  59. helm/benchmark/annotation/wildbench/eval_template.pairwise.v2.md +75 -0
  60. helm/benchmark/annotation/wildbench/eval_template.score.v2.md +66 -0
  61. helm/benchmark/annotation/wildbench_annotator.py +119 -0
  62. helm/benchmark/annotation/xstest_annotator.py +20 -30
  63. helm/benchmark/annotation_executor.py +35 -15
  64. helm/benchmark/augmentations/cleva_perturbation.py +9 -8
  65. helm/benchmark/augmentations/contraction_expansion_perturbation.py +2 -2
  66. helm/benchmark/augmentations/contrast_sets_perturbation.py +2 -2
  67. helm/benchmark/augmentations/dialect_perturbation.py +4 -5
  68. helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
  69. helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
  70. helm/benchmark/augmentations/gender_perturbation.py +2 -2
  71. helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
  72. helm/benchmark/augmentations/mild_mix_perturbation.py +6 -6
  73. helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
  74. helm/benchmark/augmentations/person_name_perturbation.py +4 -5
  75. helm/benchmark/augmentations/perturbation.py +1 -1
  76. helm/benchmark/augmentations/space_perturbation.py +2 -2
  77. helm/benchmark/augmentations/suffix_perturbation.py +2 -2
  78. helm/benchmark/augmentations/synonym_perturbation.py +4 -3
  79. helm/benchmark/augmentations/test_perturbation.py +16 -13
  80. helm/benchmark/augmentations/translate_perturbation.py +2 -2
  81. helm/benchmark/augmentations/typos_perturbation.py +2 -2
  82. helm/benchmark/data_preprocessor.py +2 -2
  83. helm/benchmark/huggingface_registration.py +2 -7
  84. helm/benchmark/metrics/aci_bench_metrics.py +34 -0
  85. helm/benchmark/metrics/basic_metrics.py +6 -6
  86. helm/benchmark/metrics/bbq_metrics.py +2 -2
  87. helm/benchmark/metrics/bias_metrics.py +12 -3
  88. helm/benchmark/metrics/bigcodebench_metrics.py +25 -0
  89. helm/benchmark/metrics/bird_sql_metrics.py +28 -0
  90. helm/benchmark/metrics/chw_care_plan_metrics.py +34 -0
  91. helm/benchmark/metrics/classification_metrics.py +76 -12
  92. helm/benchmark/metrics/cleva_harms_metrics.py +8 -7
  93. helm/benchmark/metrics/code_metrics.py +5 -5
  94. helm/benchmark/metrics/comet_metric.py +125 -0
  95. helm/benchmark/metrics/common_metric_specs.py +9 -2
  96. helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +72 -0
  97. helm/benchmark/metrics/copyright_metrics.py +4 -4
  98. helm/benchmark/metrics/czech_bank_qa_metrics.py +29 -0
  99. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +2 -2
  100. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +2 -2
  101. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +2 -2
  102. helm/benchmark/metrics/dischargeme_metrics.py +34 -0
  103. helm/benchmark/metrics/disinformation_metrics.py +4 -4
  104. helm/benchmark/metrics/dry_run_metrics.py +5 -5
  105. helm/benchmark/metrics/efficiency_metrics.py +3 -3
  106. helm/benchmark/metrics/ehr_sql_metrics.py +103 -0
  107. helm/benchmark/metrics/evaluate_instances_metric.py +3 -3
  108. helm/benchmark/metrics/evaluate_reference_metrics.py +144 -16
  109. helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +103 -0
  110. helm/benchmark/metrics/gpt4_audio_critique_metrics.py +167 -0
  111. helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +36 -0
  112. helm/benchmark/metrics/ifeval/instructions.py +1574 -0
  113. helm/benchmark/metrics/ifeval/instructions_registry.py +182 -0
  114. helm/benchmark/metrics/ifeval/instructions_registry.pyi +3 -0
  115. helm/benchmark/metrics/ifeval/instructions_util.py +153 -0
  116. helm/benchmark/metrics/ifeval_metrics.py +55 -0
  117. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +1 -1
  118. helm/benchmark/metrics/image_generation/detection_metrics.py +1 -1
  119. helm/benchmark/metrics/image_generation/detectors/vitdet.py +1 -1
  120. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +1 -1
  121. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +1 -1
  122. helm/benchmark/metrics/image_generation/nsfw_metrics.py +1 -1
  123. helm/benchmark/metrics/image_generation/q16/test_q16.py +3 -1
  124. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +1 -1
  125. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +2 -2
  126. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +1 -1
  127. helm/benchmark/metrics/image_generation/watermark_metrics.py +1 -1
  128. helm/benchmark/metrics/instruction_following_critique_metrics.py +4 -4
  129. helm/benchmark/metrics/language_modeling_metrics.py +4 -4
  130. helm/benchmark/metrics/machine_translation_metrics.py +2 -2
  131. helm/benchmark/metrics/med_dialog_metrics.py +34 -0
  132. helm/benchmark/metrics/medalign_metrics.py +34 -0
  133. helm/benchmark/metrics/medcalc_bench_metrics.py +124 -0
  134. helm/benchmark/metrics/medec_metrics.py +101 -0
  135. helm/benchmark/metrics/medi_qa_metrics.py +34 -0
  136. helm/benchmark/metrics/medication_qa_metrics.py +15 -4
  137. helm/benchmark/metrics/mental_health_metrics.py +34 -0
  138. helm/benchmark/metrics/metric.py +3 -3
  139. helm/benchmark/metrics/mimic_rrs_metrics.py +34 -0
  140. helm/benchmark/metrics/mimiciv_billing_code_metrics.py +96 -0
  141. helm/benchmark/metrics/mtsamples_procedures_metrics.py +34 -0
  142. helm/benchmark/metrics/mtsamples_replicate_metrics.py +34 -0
  143. helm/benchmark/metrics/nltk_helper.py +32 -0
  144. helm/benchmark/metrics/numeracy_metrics.py +4 -4
  145. helm/benchmark/metrics/omni_math_metrics.py +32 -0
  146. helm/benchmark/metrics/output_processing_metric.py +60 -0
  147. helm/benchmark/metrics/output_processors.py +15 -0
  148. helm/benchmark/metrics/paraphrase_generation_metrics.py +2 -2
  149. helm/benchmark/metrics/ranking_metrics.py +3 -3
  150. helm/benchmark/metrics/reference_metric.py +3 -3
  151. helm/benchmark/metrics/safety_metrics.py +39 -17
  152. helm/benchmark/metrics/{bhasa_metrics.py → seahelm_metrics.py} +3 -3
  153. helm/benchmark/metrics/seahelm_metrics_specs.py +10 -0
  154. helm/benchmark/metrics/spider_metrics.py +7 -0
  155. helm/benchmark/metrics/starr_patient_instructions_metrics.py +34 -0
  156. helm/benchmark/metrics/statistic.py +1 -1
  157. helm/benchmark/metrics/summac/model_summac.py +1 -1
  158. helm/benchmark/metrics/summarization_critique_metrics.py +4 -4
  159. helm/benchmark/metrics/summarization_metrics.py +19 -9
  160. helm/benchmark/metrics/test_bias_metrics.py +5 -1
  161. helm/benchmark/metrics/test_classification_metrics.py +140 -68
  162. helm/benchmark/metrics/test_evaluate_reference_metrics.py +15 -0
  163. helm/benchmark/metrics/test_metric.py +1 -1
  164. helm/benchmark/metrics/test_statistic.py +2 -2
  165. helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py +1 -1
  166. helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +6 -6
  167. helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py +1 -1
  168. helm/benchmark/metrics/tokens/free_token_cost_estimator.py +1 -1
  169. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +1 -1
  170. helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
  171. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +1 -1
  172. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +1 -1
  173. helm/benchmark/metrics/toxicity_metrics.py +4 -4
  174. helm/benchmark/metrics/unitxt_metrics.py +21 -4
  175. helm/benchmark/metrics/vision_language/image_metrics.py +7 -3
  176. helm/benchmark/metrics/wildbench_metrics.py +34 -0
  177. helm/benchmark/model_metadata_registry.py +16 -0
  178. helm/benchmark/presentation/create_plots.py +1 -1
  179. helm/benchmark/presentation/schema.py +3 -0
  180. helm/benchmark/presentation/summarize.py +119 -256
  181. helm/benchmark/presentation/test_summarize.py +145 -3
  182. helm/benchmark/presentation/torr_robustness_summarizer.py +178 -0
  183. helm/benchmark/reeval_run.py +203 -0
  184. helm/benchmark/reeval_runner.py +355 -0
  185. helm/benchmark/run.py +8 -17
  186. helm/benchmark/run_expander.py +105 -8
  187. helm/benchmark/run_spec_factory.py +12 -0
  188. helm/benchmark/run_specs/air_bench_run_specs.py +21 -3
  189. helm/benchmark/run_specs/audio_run_specs.py +613 -0
  190. helm/benchmark/run_specs/call_center_run_specs.py +49 -0
  191. helm/benchmark/run_specs/capabilities_run_specs.py +308 -0
  192. helm/benchmark/run_specs/classic_run_specs.py +1 -69
  193. helm/benchmark/run_specs/enem_challenge_specs.py +31 -0
  194. helm/benchmark/run_specs/enterprise_run_specs.py +260 -0
  195. helm/benchmark/run_specs/experimental_run_specs.py +112 -3
  196. helm/benchmark/run_specs/finance_run_specs.py +6 -2
  197. helm/benchmark/run_specs/imdb_ptbr_run_specs.py +30 -0
  198. helm/benchmark/run_specs/lite_run_specs.py +2 -2
  199. helm/benchmark/run_specs/long_context_run_specs.py +89 -0
  200. helm/benchmark/run_specs/medhelm_run_specs.py +1155 -0
  201. helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py +49 -0
  202. helm/benchmark/run_specs/oab_exams_specs.py +32 -0
  203. helm/benchmark/run_specs/safety_run_specs.py +37 -0
  204. helm/benchmark/run_specs/{bhasa_run_specs.py → seahelm_run_specs.py} +66 -52
  205. helm/benchmark/run_specs/sql_run_specs.py +54 -0
  206. helm/benchmark/run_specs/tweetsentbr_run_specs.py +32 -0
  207. helm/benchmark/run_specs/unitxt_run_specs.py +14 -5
  208. helm/benchmark/run_specs/vlm_run_specs.py +83 -5
  209. helm/benchmark/run_specs/winogrande_afr_run_specs.py +47 -0
  210. helm/benchmark/scenarios/aci_bench_scenario.py +120 -0
  211. helm/benchmark/scenarios/air_bench_scenario.py +6 -1
  212. helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +5 -3
  213. helm/benchmark/scenarios/anthropic_red_team_scenario.py +1 -1
  214. helm/benchmark/scenarios/audio_language/__init__.py +0 -0
  215. helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py +128 -0
  216. helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py +154 -0
  217. helm/benchmark/scenarios/audio_language/ami_scenario.py +96 -0
  218. helm/benchmark/scenarios/audio_language/audio_mnist_scenario.py +62 -0
  219. helm/benchmark/scenarios/audio_language/audio_pairs_scenario.py +62 -0
  220. helm/benchmark/scenarios/audio_language/audiocaps_scenario.py +59 -0
  221. helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py +152 -0
  222. helm/benchmark/scenarios/audio_language/common_voice_15_scenario.py +99 -0
  223. helm/benchmark/scenarios/audio_language/covost2_scenario.py +163 -0
  224. helm/benchmark/scenarios/audio_language/fleurs_fairness_scenario.py +83 -0
  225. helm/benchmark/scenarios/audio_language/fleurs_scenario.py +312 -0
  226. helm/benchmark/scenarios/audio_language/iemocap_audio_scenario.py +83 -0
  227. helm/benchmark/scenarios/audio_language/librispeech_fairness_scenario.py +96 -0
  228. helm/benchmark/scenarios/audio_language/librispeech_scenario.py +80 -0
  229. helm/benchmark/scenarios/audio_language/meld_audio_scenario.py +113 -0
  230. helm/benchmark/scenarios/audio_language/multilingual_librispeech_scenario.py +80 -0
  231. helm/benchmark/scenarios/audio_language/mustard_scenario.py +142 -0
  232. helm/benchmark/scenarios/audio_language/mutox_scenario.py +254 -0
  233. helm/benchmark/scenarios/audio_language/parade_scenario.py +97 -0
  234. helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py +124 -0
  235. helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py +69 -0
  236. helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py +87 -0
  237. helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py +106 -0
  238. helm/benchmark/scenarios/autobencher_capabilities_scenario.py +68 -0
  239. helm/benchmark/scenarios/autobencher_safety_scenario.py +51 -0
  240. helm/benchmark/scenarios/babi_qa_scenario.py +1 -1
  241. helm/benchmark/scenarios/banking77_scenario.py +6 -1
  242. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  243. helm/benchmark/scenarios/big_bench_scenario.py +11 -1
  244. helm/benchmark/scenarios/bigcodebench_scenario.py +58 -0
  245. helm/benchmark/scenarios/bird_sql_scenario.py +94 -0
  246. helm/benchmark/scenarios/bird_sql_scenario_helper.py +118 -0
  247. helm/benchmark/scenarios/blimp_scenario.py +1 -1
  248. helm/benchmark/scenarios/bold_scenario.py +1 -1
  249. helm/benchmark/scenarios/boolq_scenario.py +1 -1
  250. helm/benchmark/scenarios/casehold_scenario.py +79 -0
  251. helm/benchmark/scenarios/chw_care_plan_scenario.py +105 -0
  252. helm/benchmark/scenarios/civil_comments_scenario.py +1 -1
  253. helm/benchmark/scenarios/clear_scenario.py +153 -0
  254. helm/benchmark/scenarios/cleva_scenario.py +2 -2
  255. helm/benchmark/scenarios/code_scenario.py +17 -4
  256. helm/benchmark/scenarios/commonsense_scenario.py +1 -1
  257. helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +97 -0
  258. helm/benchmark/scenarios/copyright_scenario.py +1 -1
  259. helm/benchmark/scenarios/covid_dialog_scenario.py +10 -1
  260. helm/benchmark/scenarios/cti_to_mitre_scenario.py +240 -0
  261. helm/benchmark/scenarios/custom_mcqa_scenario.py +1 -1
  262. helm/benchmark/scenarios/czech_bank_qa_scenario.py +130 -0
  263. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +1 -1
  264. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +1 -1
  265. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +1 -1
  266. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +1 -1
  267. helm/benchmark/scenarios/dialogue_scenarios.py +13 -2
  268. helm/benchmark/scenarios/dischargeme_scenario.py +157 -0
  269. helm/benchmark/scenarios/disinformation_scenario.py +10 -1
  270. helm/benchmark/scenarios/dyck_language_scenario.py +10 -1
  271. helm/benchmark/scenarios/echr_judgment_classification_scenario.py +113 -0
  272. helm/benchmark/scenarios/ehr_sql_scenario.py +131 -0
  273. helm/benchmark/scenarios/ehrshot_scenario.py +1546 -0
  274. helm/benchmark/scenarios/enem_challenge_scenario.py +58 -0
  275. helm/benchmark/scenarios/entity_data_imputation_scenario.py +11 -1
  276. helm/benchmark/scenarios/entity_matching_scenario.py +12 -2
  277. helm/benchmark/scenarios/financial_phrasebank_scenario.py +94 -0
  278. helm/benchmark/scenarios/gold_commodity_news_scenario.py +124 -0
  279. helm/benchmark/scenarios/gpqa_scenario.py +80 -0
  280. helm/benchmark/scenarios/grammar_scenario.py +2 -2
  281. helm/benchmark/scenarios/gsm_scenario.py +10 -1
  282. helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +50 -0
  283. helm/benchmark/scenarios/harm_bench_scenario.py +1 -1
  284. helm/benchmark/scenarios/headqa_scenario.py +131 -0
  285. helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +37 -0
  286. helm/benchmark/scenarios/ice_scenario.py +8 -4
  287. helm/benchmark/scenarios/ifeval_scenario.py +53 -0
  288. helm/benchmark/scenarios/imdb_ptbr_scenario.py +60 -0
  289. helm/benchmark/scenarios/imdb_scenario.py +11 -2
  290. helm/benchmark/scenarios/infinite_bench_sum_scenario.py +82 -0
  291. helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py +2 -2
  292. helm/benchmark/scenarios/koala_scenario.py +1 -1
  293. helm/benchmark/scenarios/legal_contract_summarization_scenario.py +129 -0
  294. helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py +77 -0
  295. helm/benchmark/scenarios/legal_summarization_scenario.py +11 -1
  296. helm/benchmark/scenarios/legal_support_scenario.py +11 -1
  297. helm/benchmark/scenarios/legalbench_scenario.py +22 -3
  298. helm/benchmark/scenarios/lex_glue_scenario.py +12 -2
  299. helm/benchmark/scenarios/lextreme_scenario.py +11 -1
  300. helm/benchmark/scenarios/live_qa_scenario.py +1 -1
  301. helm/benchmark/scenarios/lm_entry_scenario.py +1 -1
  302. helm/benchmark/scenarios/lsat_qa_scenario.py +1 -1
  303. helm/benchmark/scenarios/math_scenario.py +9 -1
  304. helm/benchmark/scenarios/me_q_sum_scenario.py +10 -1
  305. helm/benchmark/scenarios/med_dialog_scenario.py +22 -24
  306. helm/benchmark/scenarios/med_mcqa_scenario.py +10 -1
  307. helm/benchmark/scenarios/med_paragraph_simplification_scenario.py +10 -1
  308. helm/benchmark/scenarios/med_qa_scenario.py +10 -1
  309. helm/benchmark/scenarios/medalign_scenario.py +88 -0
  310. helm/benchmark/scenarios/medalign_scenario_helper.py +429 -0
  311. helm/benchmark/scenarios/medbullets_scenario.py +140 -0
  312. helm/benchmark/scenarios/medcalc_bench_scenario.py +125 -0
  313. helm/benchmark/scenarios/medec_scenario.py +120 -0
  314. helm/benchmark/scenarios/medhallu_scenario.py +66 -0
  315. helm/benchmark/scenarios/medi_qa_scenario.py +105 -0
  316. helm/benchmark/scenarios/medication_qa_scenario.py +2 -2
  317. helm/benchmark/scenarios/mental_health_scenario.py +112 -0
  318. helm/benchmark/scenarios/mimic_bhc_scenario.py +98 -0
  319. helm/benchmark/scenarios/mimic_rrs_scenario.py +89 -0
  320. helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +71 -0
  321. helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py +74 -0
  322. helm/benchmark/scenarios/mmlu_pro_scenario.py +95 -0
  323. helm/benchmark/scenarios/mmlu_scenario.py +11 -1
  324. helm/benchmark/scenarios/msmarco_scenario.py +1 -1
  325. helm/benchmark/scenarios/mtsamples_procedures_scenario.py +141 -0
  326. helm/benchmark/scenarios/mtsamples_replicate_scenario.py +141 -0
  327. helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +271 -0
  328. helm/benchmark/scenarios/narrativeqa_scenario.py +1 -1
  329. helm/benchmark/scenarios/natural_qa_scenario.py +1 -1
  330. helm/benchmark/scenarios/newsqa_scenario.py +1 -1
  331. helm/benchmark/scenarios/numeracy_scenario.py +10 -1
  332. helm/benchmark/scenarios/oab_exams_scenario.py +57 -0
  333. helm/benchmark/scenarios/omni_math_scenario.py +53 -0
  334. helm/benchmark/scenarios/open_assistant_scenario.py +11 -2
  335. helm/benchmark/scenarios/opinions_qa_scenario.py +1 -1
  336. helm/benchmark/scenarios/pubmed_qa_scenario.py +54 -43
  337. helm/benchmark/scenarios/quac_scenario.py +10 -1
  338. helm/benchmark/scenarios/race_based_med_scenario.py +142 -0
  339. helm/benchmark/scenarios/raft_scenario.py +18 -3
  340. helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +1 -1
  341. helm/benchmark/scenarios/ruler_qa_scenario_helper.py +171 -0
  342. helm/benchmark/scenarios/ruler_qa_scenarios.py +88 -0
  343. helm/benchmark/scenarios/scenario.py +9 -1
  344. helm/benchmark/scenarios/{bhasa_scenario.py → seahelm_scenario.py} +233 -84
  345. helm/benchmark/scenarios/self_instruct_scenario.py +1 -1
  346. helm/benchmark/scenarios/shc_bmt_scenario.py +69 -0
  347. helm/benchmark/scenarios/shc_cdi_scenario.py +70 -0
  348. helm/benchmark/scenarios/shc_conf_scenario.py +70 -0
  349. helm/benchmark/scenarios/shc_ent_scenario.py +72 -0
  350. helm/benchmark/scenarios/shc_gip_scenario.py +66 -0
  351. helm/benchmark/scenarios/shc_ptbm_scenario.py +76 -0
  352. helm/benchmark/scenarios/shc_sei_scenario.py +89 -0
  353. helm/benchmark/scenarios/shc_sequoia_scenario.py +69 -0
  354. helm/benchmark/scenarios/simple_safety_tests_scenario.py +1 -1
  355. helm/benchmark/scenarios/spider_scenario.py +91 -0
  356. helm/benchmark/scenarios/starr_patient_instructions_scenario.py +90 -0
  357. helm/benchmark/scenarios/summarization_scenario.py +11 -1
  358. helm/benchmark/scenarios/sumosum_scenario.py +157 -0
  359. helm/benchmark/scenarios/synthetic_efficiency_scenario.py +1 -1
  360. helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +11 -1
  361. helm/benchmark/scenarios/synthetic_reasoning_scenario.py +11 -1
  362. helm/benchmark/scenarios/test_bigcodebench_scenario.py +26 -0
  363. helm/benchmark/scenarios/test_czech_bank_qa_scenario.py +18 -0
  364. helm/benchmark/scenarios/test_enem_challenge_scenario.py +53 -0
  365. helm/benchmark/scenarios/test_ewok_scenario.py +6 -2
  366. helm/benchmark/scenarios/test_gold_commodity_news_scenario.py +18 -0
  367. helm/benchmark/scenarios/test_gpqa_scenario.py +44 -0
  368. helm/benchmark/scenarios/test_ifeval_scenario.py +36 -0
  369. helm/benchmark/scenarios/test_imdb_ptbr_scenario.py +27 -0
  370. helm/benchmark/scenarios/test_infinite_bench_sum_scenario.py +46 -0
  371. helm/benchmark/scenarios/test_math_scenario.py +1 -0
  372. helm/benchmark/scenarios/test_mmlu_clinical_afr_scenario.py +21 -0
  373. helm/benchmark/scenarios/test_mmlu_pro_scenario.py +53 -0
  374. helm/benchmark/scenarios/test_oab_exams_scenario.py +51 -0
  375. helm/benchmark/scenarios/test_omni_math_scenario.py +27 -0
  376. helm/benchmark/scenarios/test_tweetsentbr_scenario.py +24 -0
  377. helm/benchmark/scenarios/test_wildbench_scenario.py +15 -0
  378. helm/benchmark/scenarios/test_winogrande_afr_scenario.py +19 -0
  379. helm/benchmark/scenarios/thai_exam_scenario.py +10 -1
  380. helm/benchmark/scenarios/the_pile_scenario.py +1 -1
  381. helm/benchmark/scenarios/truthful_qa_scenario.py +10 -1
  382. helm/benchmark/scenarios/tweetsentbr_scenario.py +66 -0
  383. helm/benchmark/scenarios/twitter_aae_scenario.py +1 -1
  384. helm/benchmark/scenarios/unitxt_scenario.py +8 -2
  385. helm/benchmark/scenarios/verifiability_judgment_scenario.py +1 -1
  386. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  387. helm/benchmark/scenarios/vision_language/blink_scenario.py +140 -0
  388. helm/benchmark/scenarios/vision_language/mm_star_scenario.py +95 -0
  389. helm/benchmark/scenarios/vision_language/vqa_rad_scenario.py +88 -0
  390. helm/benchmark/scenarios/wikifact_scenario.py +11 -1
  391. helm/benchmark/scenarios/wikitext_103_scenario.py +1 -1
  392. helm/benchmark/scenarios/wildbench_scenario.py +83 -0
  393. helm/benchmark/scenarios/winogrande_afr_scenario.py +78 -0
  394. helm/benchmark/scenarios/wmt_14_scenario.py +14 -2
  395. helm/benchmark/scenarios/xstest_scenario.py +1 -1
  396. helm/benchmark/server.py +11 -0
  397. helm/benchmark/slurm_runner.py +1 -1
  398. helm/benchmark/static/schema_audio.yaml +752 -0
  399. helm/benchmark/static/schema_autobencher.yaml +150 -0
  400. helm/benchmark/static/schema_call_center.yaml +97 -60
  401. helm/benchmark/static/schema_capabilities.yaml +254 -0
  402. helm/benchmark/static/schema_czech_bank.yaml +148 -0
  403. helm/benchmark/static/schema_enem_challenge.yaml +146 -0
  404. helm/benchmark/static/schema_enterprise.yaml +298 -0
  405. helm/benchmark/static/schema_finance.yaml +14 -12
  406. helm/benchmark/static/schema_heim.yaml +1389 -0
  407. helm/benchmark/static/schema_legal.yaml +566 -0
  408. helm/benchmark/static/{schema_medical.yaml → schema_long_context.yaml} +67 -82
  409. helm/benchmark/static/schema_medhelm.yaml +1081 -0
  410. helm/benchmark/static/schema_mmlu_winogrande_afr.yaml +1045 -0
  411. helm/benchmark/static/schema_safety.yaml +42 -6
  412. helm/benchmark/static/{schema_bhasa.yaml → schema_seahelm.yaml} +40 -26
  413. helm/benchmark/static/schema_social_audio.yaml +224 -0
  414. helm/benchmark/static/schema_sql.yaml +171 -0
  415. helm/benchmark/static/{schema_tables.yaml → schema_torr.yaml} +187 -30
  416. helm/benchmark/static/schema_tweetsentbr.yaml +146 -0
  417. helm/benchmark/static/schema_vhelm.yaml +151 -47
  418. helm/benchmark/static_build/assets/helm-safety-2907a7b6.png +0 -0
  419. helm/benchmark/static_build/assets/index-262903c1.js +10 -0
  420. helm/benchmark/static_build/assets/index-42060d71.css +1 -0
  421. helm/benchmark/static_build/assets/medhelm-overview-3ddfcd65.png +0 -0
  422. helm/benchmark/static_build/assets/{react-d4a0b69b.js → react-f82877fd.js} +1 -1
  423. helm/benchmark/static_build/assets/{recharts-6d337683.js → recharts-4037aff0.js} +1 -1
  424. helm/benchmark/static_build/assets/{tremor-54a99cc4.js → tremor-9cefc3c5.js} +1 -1
  425. helm/benchmark/static_build/assets/vhelm-aspects-1437d673.png +0 -0
  426. helm/benchmark/static_build/assets/vhelm-framework-a1ca3f3f.png +0 -0
  427. helm/benchmark/static_build/assets/vhelm-model-8afb7616.png +0 -0
  428. helm/benchmark/static_build/config.js +1 -1
  429. helm/benchmark/static_build/index.html +5 -5
  430. helm/benchmark/window_services/default_window_service.py +1 -1
  431. helm/benchmark/window_services/encoder_decoder_window_service.py +1 -1
  432. helm/benchmark/window_services/ice_window_service.py +1 -1
  433. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +1 -1
  434. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +1 -1
  435. helm/benchmark/window_services/local_window_service.py +2 -2
  436. helm/benchmark/window_services/test_anthropic_window_service.py +3 -3
  437. helm/benchmark/window_services/test_bloom_window_service.py +3 -3
  438. helm/benchmark/window_services/test_gpt2_window_service.py +7 -2
  439. helm/benchmark/window_services/test_gpt4_window_service.py +8 -3
  440. helm/benchmark/window_services/test_gptj_window_service.py +8 -3
  441. helm/benchmark/window_services/test_gptneox_window_service.py +3 -3
  442. helm/benchmark/window_services/test_openai_window_service.py +8 -3
  443. helm/benchmark/window_services/test_opt_window_service.py +3 -3
  444. helm/benchmark/window_services/test_palmyra_window_service.py +3 -3
  445. helm/benchmark/window_services/test_t0pp_window_service.py +3 -3
  446. helm/benchmark/window_services/test_t511b_window_service.py +3 -3
  447. helm/benchmark/window_services/test_ul2_window_service.py +3 -3
  448. helm/benchmark/window_services/test_utils.py +1 -1
  449. helm/benchmark/window_services/test_yalm_window_service.py +3 -3
  450. helm/benchmark/window_services/tokenizer_service.py +0 -5
  451. helm/benchmark/window_services/yalm_window_service.py +1 -1
  452. helm/clients/ai21_client.py +3 -3
  453. helm/clients/aleph_alpha_client.py +1 -1
  454. helm/clients/audio_language/__init__.py +0 -0
  455. helm/clients/audio_language/diva_llama_client.py +118 -0
  456. helm/clients/audio_language/llama_omni_client.py +198 -0
  457. helm/clients/audio_language/qwen2_audiolm_client.py +188 -0
  458. helm/clients/audio_language/qwen_audiolm_client.py +150 -0
  459. helm/clients/auto_client.py +4 -2
  460. helm/clients/azure_openai_client.py +55 -0
  461. helm/clients/bedrock_client.py +201 -7
  462. helm/clients/bedrock_utils.py +33 -0
  463. helm/clients/clip_scorers/clip_scorer.py +1 -1
  464. helm/clients/clip_scorers/multilingual_clip_scorer.py +1 -1
  465. helm/clients/cohere_client.py +3 -3
  466. helm/clients/google_client.py +1 -1
  467. helm/clients/http_model_client.py +1 -1
  468. helm/clients/huggingface_client.py +10 -18
  469. helm/clients/ibm_client.py +267 -0
  470. helm/clients/image_generation/adobe_vision_client.py +1 -1
  471. helm/clients/image_generation/aleph_alpha_image_generation_client.py +1 -1
  472. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +3 -3
  473. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +5 -2
  474. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +5 -2
  475. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +2 -2
  476. helm/clients/image_generation/cogview2_client.py +1 -1
  477. helm/clients/image_generation/dalle2_client.py +1 -1
  478. helm/clients/image_generation/dalle3_client.py +2 -2
  479. helm/clients/image_generation/dalle_mini/__init__.py +1 -1
  480. helm/clients/image_generation/dalle_mini/data.py +1 -1
  481. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -5
  482. helm/clients/image_generation/dalle_mini/model/configuration.py +1 -1
  483. helm/clients/image_generation/dalle_mini/model/modeling.py +2 -2
  484. helm/clients/image_generation/dalle_mini/model/processor.py +4 -4
  485. helm/clients/image_generation/dalle_mini/model/tokenizer.py +1 -1
  486. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -1
  487. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +2 -2
  488. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +1 -1
  489. helm/clients/image_generation/dalle_mini_client.py +1 -1
  490. helm/clients/image_generation/deep_floyd_client.py +1 -1
  491. helm/clients/image_generation/huggingface_diffusers_client.py +1 -1
  492. helm/clients/image_generation/lexica_client.py +1 -1
  493. helm/clients/image_generation/mindalle/models/__init__.py +6 -6
  494. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +1 -1
  495. helm/clients/image_generation/mindalle/models/stage2/transformer.py +1 -1
  496. helm/clients/image_generation/mindalle/utils/__init__.py +3 -3
  497. helm/clients/image_generation/mindalle_client.py +1 -1
  498. helm/clients/image_generation/together_image_generation_client.py +1 -1
  499. helm/clients/lit_gpt_client.py +2 -2
  500. helm/clients/mistral_client.py +62 -18
  501. helm/clients/nvidia_nim_client.py +0 -3
  502. helm/clients/openai_client.py +255 -21
  503. helm/clients/palmyra_client.py +2 -6
  504. helm/clients/reka_client.py +1 -1
  505. helm/clients/stanfordhealthcare_azure_openai_client.py +58 -0
  506. helm/clients/stanfordhealthcare_claude_client.py +31 -0
  507. helm/clients/stanfordhealthcare_google_client.py +43 -0
  508. helm/clients/stanfordhealthcare_http_model_client.py +93 -0
  509. helm/clients/stanfordhealthcare_openai_client.py +62 -0
  510. helm/clients/stanfordhealthcare_shc_openai_client.py +42 -0
  511. helm/clients/test_client.py +1 -1
  512. helm/clients/test_together_client.py +6 -1
  513. helm/clients/together_client.py +69 -7
  514. helm/clients/upstage_client.py +23 -0
  515. helm/clients/vertexai_client.py +39 -13
  516. helm/clients/vision_language/open_flamingo/__init__.py +2 -2
  517. helm/clients/vision_language/open_flamingo/src/factory.py +3 -3
  518. helm/clients/vision_language/open_flamingo/src/flamingo.py +2 -2
  519. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +2 -2
  520. helm/clients/vision_language/qwen2_vlm_client.py +175 -0
  521. helm/clients/vllm_client.py +4 -6
  522. helm/clients/yi_client.py +0 -3
  523. helm/common/audio_utils.py +111 -0
  524. helm/common/cache.py +8 -30
  525. helm/common/file_caches/local_file_cache.py +1 -1
  526. helm/common/file_caches/test_local_file_cache.py +1 -1
  527. helm/common/images_utils.py +2 -2
  528. helm/common/key_value_store.py +9 -9
  529. helm/common/media_object.py +2 -2
  530. helm/common/mongo_key_value_store.py +3 -3
  531. helm/common/multimodal_request_utils.py +26 -0
  532. helm/common/reeval_parameters.py +12 -0
  533. helm/common/request.py +6 -2
  534. helm/common/response_format.py +18 -0
  535. helm/common/test_cache.py +1 -48
  536. helm/common/test_media_object.py +1 -1
  537. helm/common/tokenization_request.py +0 -9
  538. helm/config/model_deployments.yaml +1258 -33
  539. helm/config/model_metadata.yaml +1110 -41
  540. helm/config/tokenizer_configs.yaml +403 -3
  541. helm/proxy/cli.py +2 -2
  542. helm/proxy/example_queries.py +1 -1
  543. helm/proxy/server.py +11 -13
  544. helm/proxy/services/remote_service.py +1 -7
  545. helm/proxy/services/server_service.py +6 -19
  546. helm/proxy/services/service.py +0 -6
  547. helm/proxy/services/test_remote_service.py +2 -2
  548. helm/proxy/services/test_service.py +1 -1
  549. helm/proxy/static/general.js +122 -0
  550. helm/proxy/static/help.html +99 -0
  551. helm/proxy/static/index.css +57 -0
  552. helm/proxy/static/index.html +40 -0
  553. helm/proxy/static/index.js +456 -0
  554. helm/proxy/static/info-icon.png +0 -0
  555. helm/proxy/test_retry.py +1 -1
  556. helm/proxy/token_counters/auto_token_counter.py +1 -1
  557. helm/tokenizers/aleph_alpha_tokenizer.py +1 -1
  558. helm/tokenizers/caching_tokenizer.py +2 -30
  559. helm/tokenizers/http_model_tokenizer.py +1 -1
  560. helm/tokenizers/huggingface_tokenizer.py +2 -2
  561. helm/tokenizers/lit_gpt_tokenizer.py +1 -1
  562. helm/tokenizers/test_anthropic_tokenizer.py +6 -2
  563. helm/tokenizers/test_huggingface_tokenizer.py +1 -1
  564. helm/tokenizers/test_yalm_tokenizer.py +1 -1
  565. helm/tokenizers/tiktoken_tokenizer.py +1 -1
  566. helm/tokenizers/tokenizer.py +3 -1
  567. helm/tokenizers/yalm_tokenizer.py +3 -3
  568. helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py +1 -1
  569. crfm_helm-0.5.3.dist-info/METADATA +0 -355
  570. crfm_helm-0.5.3.dist-info/RECORD +0 -699
  571. helm/benchmark/data_overlap/data_overlap_spec.py +0 -86
  572. helm/benchmark/data_overlap/export_scenario_text.py +0 -119
  573. helm/benchmark/data_overlap/light_scenario.py +0 -60
  574. helm/benchmark/metrics/bhasa_metrics_specs.py +0 -10
  575. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  576. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  577. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  578. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  579. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  580. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  581. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  582. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  583. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  584. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  585. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  586. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  587. helm/benchmark/static_build/assets/index-05c76bb1.css +0 -1
  588. helm/benchmark/static_build/assets/index-58f97dcd.js +0 -10
  589. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  590. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  591. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  592. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  593. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  594. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  595. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  596. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  597. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  598. helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  599. helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  600. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  601. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  602. helm/tokenizers/anthropic_tokenizer.py +0 -52
  603. {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info}/entry_points.txt +0 -0
  604. {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info/licenses}/LICENSE +0 -0
  605. {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info}/top_level.txt +0 -0
  606. /helm/benchmark/{data_overlap → metrics/ifeval}/__init__.py +0 -0
@@ -0,0 +1,43 @@
1
+ from typing import Any, Dict, List
2
+
3
+ from helm.common.request import (
4
+ Request,
5
+ GeneratedOutput,
6
+ )
7
+ from helm.clients.stanfordhealthcare_http_model_client import StanfordHealthCareHTTPModelClient
8
+
9
+
10
+ class StanfordHealthCareGoogleClient(StanfordHealthCareHTTPModelClient):
11
+ """
12
+ Client for accessing Google models hosted on Stanford Health Care's model API.
13
+
14
+ Configure by setting the following in prod_env/credentials.conf:
15
+
16
+ ```
17
+ stanfordhealthcareEndpoint: https://your-domain-name/
18
+ stanfordhealthcareApiKey: your-private-key
19
+ ```
20
+ """
21
+
22
+ def get_request(self, request: Request) -> Dict[str, Any]:
23
+ return {
24
+ "contents": {
25
+ "role": "user",
26
+ "parts": {"text": request.prompt},
27
+ },
28
+ "generation_config": {
29
+ "temperature": request.temperature,
30
+ "topP": request.top_p,
31
+ "topK": request.top_k_per_token,
32
+ "candidateCount": 1,
33
+ "maxOutputTokens": request.max_tokens,
34
+ "stopSequences": request.stop_sequences,
35
+ },
36
+ }
37
+
38
+ def parse_response(self, response: Dict[str, Any]) -> List[GeneratedOutput]:
39
+ completion = ""
40
+ for item in response["content"]:
41
+ if "content" in item["candidates"][0]:
42
+ completion += item["candidates"][0]["content"]["parts"][0]["text"]
43
+ return [GeneratedOutput(text=completion, logprob=0, tokens=[])]
@@ -0,0 +1,93 @@
1
+ import requests
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import asdict
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ from helm.common.cache import CacheConfig
8
+ from helm.common.request import (
9
+ wrap_request_time,
10
+ Request,
11
+ RequestResult,
12
+ GeneratedOutput,
13
+ EMBEDDING_UNAVAILABLE_REQUEST_RESULT,
14
+ )
15
+ from helm.clients.client import CachingClient
16
+
17
+
18
+ class StanfordHealthCareHTTPModelClient(CachingClient, ABC):
19
+ """
20
+ Client for accessing Stanford Health Care models via HTTP requests.
21
+
22
+ Configure by setting the following in prod_env/credentials.conf:
23
+
24
+ ```
25
+ stanfordhealthcareEndpoint: https://your-domain-name/
26
+ stanfordhealthcareApiKey: your-private-key
27
+ ```
28
+ """
29
+
30
+ CREDENTIAL_HEADER_NAME = "Ocp-Apim-Subscription-Key"
31
+
32
+ def __init__(
33
+ self,
34
+ cache_config: CacheConfig,
35
+ deployment: str,
36
+ endpoint: str = "http://localhost:8080",
37
+ do_cache: bool = False,
38
+ timeout: int = 3000,
39
+ api_key: Optional[str] = None,
40
+ model: Optional[str] = None,
41
+ ):
42
+ super().__init__(cache_config=cache_config)
43
+ assert api_key, "API key must be provided"
44
+ self.endpoint = endpoint
45
+ self.timeout = timeout
46
+ self.do_cache = do_cache
47
+ self.deployment = deployment
48
+ self.model = model
49
+ self.default_headers = {StanfordHealthCareHTTPModelClient.CREDENTIAL_HEADER_NAME: api_key}
50
+
51
+ def make_request(self, request: Request) -> RequestResult:
52
+ cache_key = asdict(request)
53
+ if request.embedding:
54
+ return EMBEDDING_UNAVAILABLE_REQUEST_RESULT
55
+
56
+ raw_request = self.get_request(request)
57
+
58
+ try:
59
+
60
+ def do_it() -> Dict[str, Any]:
61
+ url = f"{self.endpoint}/{self.deployment}"
62
+ response = requests.post(url, json=raw_request, headers=self.default_headers, timeout=self.timeout)
63
+ response.raise_for_status()
64
+ response_json = response.json()
65
+ if type(response_json) == list:
66
+ response_json = {"content": response_json}
67
+ return response_json
68
+
69
+ if self.do_cache:
70
+ response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
71
+ else:
72
+ response, cached = wrap_request_time(do_it)(), False
73
+
74
+ completions = self.parse_response(response)
75
+
76
+ return RequestResult(
77
+ success=True,
78
+ cached=cached,
79
+ error=None,
80
+ completions=completions,
81
+ embedding=[],
82
+ request_time=response["request_time"],
83
+ )
84
+ except requests.exceptions.RequestException as e:
85
+ return RequestResult(success=False, cached=False, error=f"Request error: {e}", completions=[], embedding=[])
86
+
87
+ @abstractmethod
88
+ def get_request(self, request: Request) -> Dict[str, Any]:
89
+ pass
90
+
91
+ @abstractmethod
92
+ def parse_response(self, response: Dict[str, Any]) -> List[GeneratedOutput]:
93
+ pass
@@ -0,0 +1,62 @@
1
+ from typing import Optional
2
+
3
+ from helm.clients.openai_client import OpenAIClient
4
+ from helm.common.cache import CacheConfig
5
+ from helm.common.optional_dependencies import handle_module_not_found_error
6
+ from helm.proxy.retry import NonRetriableException
7
+ from helm.tokenizers.tokenizer import Tokenizer
8
+
9
+ try:
10
+ from openai import OpenAI
11
+ except ModuleNotFoundError as e:
12
+ handle_module_not_found_error(e, ["openai"])
13
+
14
+
15
+ class StanfordHealthCareOpenAIClient(OpenAIClient):
16
+ """
17
+ Client for accessing OpenAI models hosted on Stanford Health Care's model API.
18
+
19
+ Configure by setting the following in prod_env/credentials.conf:
20
+
21
+ ```
22
+ stanfordhealthcareEndpoint: https://your-domain-name/
23
+ stanfordhealthcareApiKey: your-private-key
24
+ ```
25
+ """
26
+
27
+ CREDENTIAL_HEADER_NAME = "Ocp-Apim-Subscription-Key"
28
+
29
+ def __init__(
30
+ self,
31
+ tokenizer: Tokenizer,
32
+ tokenizer_name: str,
33
+ cache_config: CacheConfig,
34
+ model_name: str,
35
+ api_key: Optional[str] = None,
36
+ endpoint: Optional[str] = None,
37
+ base_url: Optional[str] = None,
38
+ openai_model_name: Optional[str] = None,
39
+ output_processor: Optional[str] = None,
40
+ ):
41
+ super().__init__(
42
+ tokenizer=tokenizer,
43
+ tokenizer_name=tokenizer_name,
44
+ cache_config=cache_config,
45
+ api_key="unused",
46
+ openai_model_name=openai_model_name,
47
+ output_processor=output_processor,
48
+ )
49
+ if not endpoint:
50
+ raise NonRetriableException("Must provide endpoint through credentials.conf")
51
+ if not api_key:
52
+ raise NonRetriableException("Must provide API key through credentials.conf")
53
+ if not base_url:
54
+ raise NonRetriableException("Must provide base url through model_deployments")
55
+ # Guess the base URL part based on the model name
56
+ # Maybe make this configurable instead?
57
+ base_url = base_url.format(endpoint=endpoint)
58
+ self.client = OpenAI(
59
+ api_key="dummy",
60
+ base_url=base_url,
61
+ default_headers={StanfordHealthCareOpenAIClient.CREDENTIAL_HEADER_NAME: api_key},
62
+ )
@@ -0,0 +1,42 @@
1
+ from typing import Dict, Optional
2
+
3
+ from helm.clients.azure_openai_client import AzureOpenAIClient
4
+ from helm.common.cache import CacheConfig
5
+ from helm.proxy.retry import NonRetriableException
6
+ from helm.tokenizers.tokenizer import Tokenizer
7
+
8
+
9
+ class StanfordHealthCareSHCOpenAIClient(AzureOpenAIClient):
10
+ """
11
+ Client for accessing OpenAI models hosted on Stanford Health Care's model API.
12
+
13
+ Configure by setting the following in prod_env/credentials.conf:
14
+
15
+ ```
16
+ stanfordhealthcareEndpoint: https://your-domain-name/
17
+ stanfordhealthcareApiKey: your-private-key
18
+ ```
19
+ """
20
+
21
+ API_VERSION = "2024-08-01-preview"
22
+
23
+ def __init__(
24
+ self,
25
+ tokenizer: Tokenizer,
26
+ tokenizer_name: str,
27
+ cache_config: CacheConfig,
28
+ api_key: Optional[str] = None,
29
+ endpoint: Optional[str] = None,
30
+ default_headers: Optional[Dict[str, str]] = None,
31
+ ):
32
+ if not api_key:
33
+ raise NonRetriableException("Must provide API key through credentials.conf")
34
+ super().__init__(
35
+ tokenizer=tokenizer,
36
+ tokenizer_name=tokenizer_name,
37
+ cache_config=cache_config,
38
+ api_key=api_key,
39
+ endpoint=endpoint,
40
+ api_version=StanfordHealthCareSHCOpenAIClient.API_VERSION,
41
+ default_headers=default_headers,
42
+ )
@@ -1,6 +1,6 @@
1
1
  from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
2
2
  from helm.tokenizers.auto_tokenizer import AutoTokenizer
3
- from .client import truncate_sequence, truncate_and_tokenize_response_text
3
+ from helm.clients.client import truncate_sequence, truncate_and_tokenize_response_text
4
4
  from typing import List
5
5
  from helm.common.request import Request, GeneratedOutput, Token
6
6
 
@@ -5,7 +5,12 @@ import tempfile
5
5
  from helm.common.cache import BlackHoleCacheConfig, SqliteCacheConfig
6
6
  from helm.common.request import Request
7
7
 
8
- from .together_client import TogetherClient, TogetherChatClient, TogetherCompletionClient, TogetherClientError
8
+ from helm.clients.together_client import (
9
+ TogetherClient,
10
+ TogetherChatClient,
11
+ TogetherCompletionClient,
12
+ TogetherClientError,
13
+ )
9
14
 
10
15
 
11
16
  class TestTogetherClient:
@@ -1,12 +1,15 @@
1
1
  from copy import deepcopy
2
2
  from itertools import zip_longest
3
3
  import threading
4
- from typing import List, Dict, Any, Mapping, Optional, TypedDict, Union
4
+ from typing import Callable, List, Dict, Any, Mapping, Optional, TypedDict, Union
5
+ from typing_extensions import NotRequired
5
6
 
6
7
  import requests
7
8
  from retrying import retry
8
9
 
9
10
  from helm.common.cache import CacheConfig
11
+ from helm.common.media_object import IMAGE_TYPE, TEXT_TYPE
12
+ from helm.common.object_spec import get_class_by_name
10
13
  from helm.common.optional_dependencies import handle_module_not_found_error
11
14
  from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput, Token
12
15
  from helm.clients.client import CachingClient, truncate_sequence, cleanup_str
@@ -312,26 +315,65 @@ class TogetherRawChatRequest(TypedDict):
312
315
  logprobs: int
313
316
  echo: bool
314
317
  n: int
318
+ response_format: NotRequired[Dict[str, Any]]
315
319
 
316
320
 
317
321
  class TogetherChatClient(CachingClient):
318
322
  """Client that uses the Python Together library for chat models."""
319
323
 
320
- def __init__(self, cache_config: CacheConfig, api_key: Optional[str], together_model: Optional[str] = None):
324
+ def __init__(
325
+ self,
326
+ cache_config: CacheConfig,
327
+ api_key: Optional[str],
328
+ together_model: Optional[str] = None,
329
+ disable_logprobs: Optional[bool] = None,
330
+ output_processor: Optional[str] = None,
331
+ ):
321
332
  super().__init__(cache_config=cache_config)
322
333
  self._client = Together(api_key=api_key)
323
334
  self._together_model = together_model
335
+ self._disable_logprobs = bool(disable_logprobs)
336
+ # self.output_processor is actually a function, not a class
337
+
338
+ self.output_processor: Optional[Callable[[str], str]] = (
339
+ get_class_by_name(output_processor) if output_processor else None
340
+ )
324
341
 
325
342
  def convert_to_raw_chat_request(self, request: Request) -> TogetherRawChatRequest:
343
+ request.validate()
344
+ messages: List[Dict[str, Any]]
326
345
  if request.messages:
327
346
  messages = request.messages
347
+ elif request.multimodal_prompt:
348
+ message_contents = []
349
+ for media_object in request.multimodal_prompt.media_objects:
350
+ if media_object.is_type(IMAGE_TYPE) and media_object.location:
351
+ assert media_object.location
352
+ if media_object.is_local_file:
353
+ from helm.common.images_utils import encode_base64
354
+
355
+ base64_image: str = encode_base64(media_object.location)
356
+ image_url = f"data:image/jpeg;base64,{base64_image}"
357
+ else:
358
+ image_url = media_object.location
359
+ message_contents.append({"type": "image_url", "image_url": {"url": image_url}})
360
+ elif media_object.is_type(TEXT_TYPE):
361
+ assert media_object.text
362
+ message_contents.append({"type": "text", "text": media_object.text})
363
+ else:
364
+ raise ValueError(f"Unrecognized MediaObject type {media_object.type}")
365
+ messages = [{"role": "user", "content": message_contents}]
328
366
  else:
329
367
  messages = [{"role": "user", "content": request.prompt}]
330
368
  if self._together_model is not None:
331
369
  model = self._together_model
332
370
  else:
333
371
  model = request.model
334
- return {
372
+ if self._disable_logprobs:
373
+ logprobs = 0
374
+ else:
375
+ logprobs = min(request.top_k_per_token, 1)
376
+ raw_chat_request: TogetherRawChatRequest = {
335
377
  "messages": messages,
336
378
  "model": model,
337
379
  "max_tokens": request.max_tokens,
@@ -339,10 +381,16 @@ class TogetherChatClient(CachingClient):
339
381
  "temperature": request.temperature,
340
382
  "top_p": request.top_p,
341
383
  "top_k": request.top_k_per_token,
342
- "logprobs": min(request.top_k_per_token, 1),
384
+ "logprobs": logprobs,
343
385
  "echo": request.echo_prompt,
344
386
  "n": request.num_completions,
345
387
  }
388
+ if request.response_format and request.response_format.json_schema:
389
+ raw_chat_request["response_format"] = {
390
+ "type": "json_object",
391
+ "schema": request.response_format.json_schema,
392
+ }
393
+ return raw_chat_request
346
394
 
347
395
  def make_request(self, request: Request) -> RequestResult:
348
396
  raw_request = self.convert_to_raw_chat_request(request)
@@ -377,7 +425,10 @@ class TogetherChatClient(CachingClient):
377
425
  break
378
426
  tokens.append(Token(text=token_text, logprob=token_logprob or 0.0))
379
427
  assert choice.message.role == "assistant"
380
- generated_outputs.append(GeneratedOutput(text=choice.message.content, logprob=0.0, tokens=tokens))
428
+ output_text = choice.message.content
429
+ if self.output_processor:
430
+ output_text = self.output_processor(output_text)
431
+ generated_outputs.append(GeneratedOutput(text=output_text, logprob=0.0, tokens=tokens))
381
432
  return RequestResult(
382
433
  success=True,
383
434
  cached=cached,
@@ -404,16 +455,27 @@ class TogetherRawCompletionRequest(TypedDict):
404
455
  class TogetherCompletionClient(CachingClient):
405
456
  """Client that uses the Python Together library for text completion models."""
406
457
 
407
- def __init__(self, cache_config: CacheConfig, api_key: Optional[str], together_model: Optional[str] = None):
458
+ def __init__(
459
+ self,
460
+ cache_config: CacheConfig,
461
+ api_key: Optional[str],
462
+ together_model: Optional[str] = None,
463
+ disable_logprobs: Optional[bool] = None,
464
+ ):
408
465
  super().__init__(cache_config=cache_config)
409
466
  self._client = Together(api_key=api_key)
410
467
  self._together_model = together_model
468
+ self._disable_logprobs = bool(disable_logprobs)
411
469
 
412
470
  def convert_to_raw_completion_request(self, request: Request) -> TogetherRawCompletionRequest:
413
471
  if self._together_model is not None:
414
472
  model = self._together_model
415
473
  else:
416
474
  model = request.model
475
+ if self._disable_logprobs:
476
+ logprobs = 0
477
+ else:
478
+ logprobs = min(request.top_k_per_token, 1)
417
479
  return {
418
480
  "prompt": request.prompt,
419
481
  "model": model,
@@ -422,7 +484,7 @@ class TogetherCompletionClient(CachingClient):
422
484
  "temperature": request.temperature,
423
485
  "top_p": request.top_p,
424
486
  "top_k": request.top_k_per_token,
425
- "logprobs": min(request.top_k_per_token, 1),
487
+ "logprobs": logprobs,
426
488
  "echo": request.echo_prompt,
427
489
  "n": request.num_completions,
428
490
  }
@@ -0,0 +1,23 @@
1
+ from helm.clients.openai_client import OpenAIClient
2
+ from helm.common.cache import CacheConfig
3
+ from helm.tokenizers.tokenizer import Tokenizer
4
+
5
+
6
+ class UpstageChatClient(OpenAIClient):
7
+ """Sends request to a Upstage model using a OpenAI-compatible Chat API."""
8
+
9
+ def __init__(
10
+ self,
11
+ tokenizer: Tokenizer,
12
+ tokenizer_name: str,
13
+ cache_config: CacheConfig,
14
+ api_key: str,
15
+ ):
16
+ super().__init__(
17
+ tokenizer=tokenizer,
18
+ tokenizer_name=tokenizer_name,
19
+ cache_config=cache_config,
20
+ api_key=api_key,
21
+ org_id=None,
22
+ base_url="https://api.upstage.ai/v1/solar",
23
+ )
@@ -4,6 +4,7 @@ from threading import Lock
4
4
  from typing import Any, Dict, Mapping, Optional, List, Union
5
5
 
6
6
  from helm.common.cache import CacheConfig
7
+ from helm.common.multimodal_request_utils import get_contents_as_bytes
7
8
  from helm.common.media_object import TEXT_TYPE
8
9
  from helm.common.optional_dependencies import handle_module_not_found_error
9
10
  from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput, ErrorFlags
@@ -12,7 +13,14 @@ from helm.clients.client import CachingClient, truncate_sequence, generate_uid_f
12
13
  try:
13
14
  import vertexai
14
15
  from vertexai.language_models import TextGenerationModel, TextGenerationResponse # PaLM2
15
- from vertexai.preview.generative_models import GenerativeModel, GenerationResponse, Candidate, Part, Image # Gemini
16
+ from vertexai.preview.generative_models import (
17
+ GenerativeModel,
18
+ GenerationResponse,
19
+ Candidate,
20
+ Content,
21
+ Part,
22
+ Image,
23
+ ) # Gemini
16
24
  from google.cloud.aiplatform_v1beta1.types import SafetySetting, HarmCategory
17
25
  except ModuleNotFoundError as e:
18
26
  handle_module_not_found_error(e, ["google"])
@@ -48,17 +56,16 @@ def _get_safety_settings_for_preset(
48
56
  raise ValueError(f"Unknown safety_settings_preset: {safety_settings_preset}")
49
57
 
50
58
 
51
- def _get_model_name_for_request(request: Request) -> str:
52
- # We have to strip "-safety-" suffixes from model names because they are not part of the Vertex AI model name
53
- # TODO: Clean up this hack
54
- return request.model_engine.split("-safety-")[0]
55
-
56
-
57
59
  class VertexAIClient(CachingClient, ABC):
58
60
  """Client for Vertex AI models"""
59
61
 
60
62
  def __init__(
61
- self, cache_config: CacheConfig, project_id: str, location: str, safety_settings_preset: Optional[str] = None
63
+ self,
64
+ cache_config: CacheConfig,
65
+ project_id: str,
66
+ location: str,
67
+ safety_settings_preset: Optional[str] = None,
68
+ vertexai_model: Optional[str] = None,
62
69
  ) -> None:
63
70
  super().__init__(cache_config=cache_config)
64
71
  self.project_id = project_id
@@ -67,8 +74,15 @@ class VertexAIClient(CachingClient, ABC):
67
74
  self.safety_settings_preset = safety_settings_preset
68
75
  self.safety_settings = _get_safety_settings_for_preset(safety_settings_preset)
69
76
 
77
+ self.vertexai_model = vertexai_model
78
+
70
79
  vertexai.init(project=self.project_id, location=self.location)
71
80
 
81
+ def _get_model_name_for_request(self, request: Request) -> str:
82
+ if self.vertexai_model is not None:
83
+ return self.vertexai_model
84
+ return request.model_engine
85
+
72
86
  def make_cache_key_with_safety_settings_preset(self, raw_request: Mapping, request: Request) -> Mapping:
73
87
  """Construct the key for the cache using the raw request.
74
88
 
@@ -111,7 +125,7 @@ class VertexAITextClient(VertexAIClient):
111
125
  }
112
126
 
113
127
  completions: List[GeneratedOutput] = []
114
- model_name: str = _get_model_name_for_request(request)
128
+ model_name: str = self._get_model_name_for_request(request)
115
129
 
116
130
  try:
117
131
 
@@ -193,12 +207,20 @@ class VertexAIChatClient(VertexAIClient):
193
207
 
194
208
  def make_request(self, request: Request) -> RequestResult:
195
209
  """Make a request"""
196
- contents: str = request.prompt
210
+ contents = [request.prompt]
197
211
 
198
212
  # For the multimodal case, build up the content with the media objects of `request.multimodal_prompt`
199
213
  if request.multimodal_prompt is not None:
200
214
  return self._make_multimodal_request(request)
201
215
 
216
+ if request.messages is not None:
217
+ contents = []
218
+ role_mapping = {"user": "user", "assistant": "model"}
219
+ for msg in request.messages:
220
+ contents.append(
221
+ Content(role=role_mapping.get(msg["role"], "user"), parts=[Part.from_text(msg["content"])])
222
+ )
223
+
202
224
  parameters = {
203
225
  "temperature": request.temperature,
204
226
  "max_output_tokens": request.max_tokens,
@@ -217,7 +239,7 @@ class VertexAIChatClient(VertexAIClient):
217
239
  }
218
240
 
219
241
  completions: List[GeneratedOutput] = []
220
- model_name: str = _get_model_name_for_request(request)
242
+ model_name: str = self._get_model_name_for_request(request)
221
243
  model = self.get_model(model_name)
222
244
 
223
245
  try:
@@ -263,7 +285,7 @@ class VertexAIChatClient(VertexAIClient):
263
285
  cache_key = self.make_cache_key_with_safety_settings_preset(
264
286
  {
265
287
  "model_name": model_name,
266
- "prompt": request.prompt,
288
+ "prompt": request.messages or request.prompt,
267
289
  **parameters,
268
290
  },
269
291
  request,
@@ -338,6 +360,10 @@ class VertexAIChatClient(VertexAIClient):
338
360
  for media_object in request.multimodal_prompt.media_objects:
339
361
  if media_object.is_type("image") and media_object.location:
340
362
  contents.append(Part.from_image(Image.load_from_file(media_object.location)))
363
+ elif media_object.is_type("audio") and media_object.location:
364
+ contents.append(
365
+ Part.from_data(get_contents_as_bytes(media_object.location), mime_type=media_object.content_type)
366
+ )
341
367
  elif media_object.is_type(TEXT_TYPE):
342
368
  if media_object.text is None:
343
369
  raise ValueError("MediaObject of text type has missing text field value")
@@ -355,7 +381,7 @@ class VertexAIChatClient(VertexAIClient):
355
381
  }
356
382
 
357
383
  completions: List[GeneratedOutput] = []
358
- model_name: str = _get_model_name_for_request(request)
384
+ model_name: str = self._get_model_name_for_request(request)
359
385
  model = self.get_model(model_name)
360
386
 
361
387
  request_time = 0
@@ -1,2 +1,2 @@
1
- from .src.flamingo import Flamingo
2
- from .src.factory import create_model_and_transforms
1
+ from helm.clients.vision_language.open_flamingo.src.flamingo import Flamingo
2
+ from helm.clients.vision_language.open_flamingo.src.factory import create_model_and_transforms
@@ -7,9 +7,9 @@ from typing import Optional
7
7
  from transformers import AutoModelForCausalLM, AutoTokenizer
8
8
 
9
9
  from helm.common.general import handle_module_not_found_error
10
- from .flamingo import Flamingo
11
- from .flamingo_lm import FlamingoLMMixin
12
- from .utils import extend_instance
10
+ from helm.clients.vision_language.open_flamingo.src.flamingo import Flamingo
11
+ from helm.clients.vision_language.open_flamingo.src.flamingo_lm import FlamingoLMMixin
12
+ from helm.clients.vision_language.open_flamingo.src.utils import extend_instance
13
13
 
14
14
 
15
15
  def create_model_and_transforms(
@@ -5,7 +5,7 @@ Source: https://github.com/mlfoundations/open_flamingo
5
5
  import torch
6
6
  from einops import rearrange
7
7
  from torch import nn
8
- from .helpers import PerceiverResampler
8
+ from helm.clients.vision_language.open_flamingo.src.helpers import PerceiverResampler
9
9
  from torch.distributed.fsdp.wrap import (
10
10
  enable_wrap,
11
11
  wrap,
@@ -15,7 +15,7 @@ from torch.distributed.fsdp import (
15
15
  FullyShardedDataParallel as FSDP,
16
16
  )
17
17
 
18
- from .utils import apply_with_stopping_condition
18
+ from helm.clients.vision_language.open_flamingo.src.utils import apply_with_stopping_condition
19
19
 
20
20
 
21
21
  class Flamingo(nn.Module):
@@ -3,8 +3,8 @@ Source: https://github.com/mlfoundations/open_flamingo
3
3
  """
4
4
 
5
5
  import torch.nn as nn
6
- from .helpers import GatedCrossAttentionBlock
7
- from .utils import getattr_recursive, setattr_recursive
6
+ from helm.clients.vision_language.open_flamingo.src.helpers import GatedCrossAttentionBlock
7
+ from helm.clients.vision_language.open_flamingo.src.utils import getattr_recursive, setattr_recursive
8
8
 
9
9
 
10
10
  class FlamingoLayer(nn.Module):