crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/METADATA +144 -36
  2. crfm_helm-0.5.0.dist-info/RECORD +642 -0
  3. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +37 -2
  5. helm/benchmark/adaptation/adapters/adapter.py +4 -42
  6. helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
  7. helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -0
  8. helm/benchmark/adaptation/adapters/generation_adapter.py +2 -0
  9. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +21 -4
  10. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +12 -5
  11. helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +1 -0
  12. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
  13. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
  14. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +5 -1
  15. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +1 -0
  16. helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -0
  17. helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
  18. helm/benchmark/adaptation/adapters/test_generation_adapter.py +59 -14
  19. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +40 -5
  20. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +78 -10
  21. helm/benchmark/adaptation/common_adapter_specs.py +376 -0
  22. helm/benchmark/adaptation/prompt.py +7 -1
  23. helm/benchmark/adaptation/request_state.py +6 -1
  24. helm/benchmark/adaptation/scenario_state.py +6 -2
  25. helm/benchmark/annotation/annotator.py +43 -0
  26. helm/benchmark/annotation/annotator_factory.py +61 -0
  27. helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
  28. helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
  29. helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
  30. helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
  31. helm/benchmark/annotation/test_annotator_factory.py +26 -0
  32. helm/benchmark/annotation/test_dummy_annotator.py +44 -0
  33. helm/benchmark/annotation_executor.py +124 -0
  34. helm/benchmark/augmentations/cleva_perturbation.py +7 -14
  35. helm/benchmark/augmentations/contraction_expansion_perturbation.py +3 -3
  36. helm/benchmark/augmentations/contrast_sets_perturbation.py +0 -3
  37. helm/benchmark/augmentations/data_augmenter.py +0 -2
  38. helm/benchmark/augmentations/dialect_perturbation.py +2 -2
  39. helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
  40. helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
  41. helm/benchmark/augmentations/gender_perturbation.py +3 -3
  42. helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
  43. helm/benchmark/augmentations/mild_mix_perturbation.py +2 -2
  44. helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
  45. helm/benchmark/augmentations/person_name_perturbation.py +0 -7
  46. helm/benchmark/augmentations/perturbation.py +20 -7
  47. helm/benchmark/augmentations/perturbation_description.py +1 -1
  48. helm/benchmark/augmentations/space_perturbation.py +2 -2
  49. helm/benchmark/augmentations/suffix_perturbation.py +29 -0
  50. helm/benchmark/augmentations/synonym_perturbation.py +2 -2
  51. helm/benchmark/augmentations/test_perturbation.py +11 -7
  52. helm/benchmark/augmentations/translate_perturbation.py +30 -0
  53. helm/benchmark/augmentations/typos_perturbation.py +2 -2
  54. helm/benchmark/config_registry.py +38 -0
  55. helm/benchmark/executor.py +46 -16
  56. helm/benchmark/huggingface_registration.py +37 -7
  57. helm/benchmark/metrics/basic_metrics.py +172 -641
  58. helm/benchmark/metrics/bbq_metrics.py +3 -4
  59. helm/benchmark/metrics/bias_metrics.py +6 -6
  60. helm/benchmark/metrics/classification_metrics.py +11 -8
  61. helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
  62. helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
  63. helm/benchmark/metrics/code_metrics.py +4 -3
  64. helm/benchmark/metrics/code_metrics_helper.py +0 -2
  65. helm/benchmark/metrics/common_metric_specs.py +167 -0
  66. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
  67. helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
  68. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
  69. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
  70. helm/benchmark/metrics/disinformation_metrics.py +6 -112
  71. helm/benchmark/metrics/dry_run_metrics.py +5 -3
  72. helm/benchmark/metrics/efficiency_metrics.py +206 -0
  73. helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
  74. helm/benchmark/metrics/evaluate_reference_metrics.py +376 -0
  75. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
  76. helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
  77. helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
  78. helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
  79. helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
  80. helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
  81. helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
  82. helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
  83. helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
  84. helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
  85. helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
  86. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
  87. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
  88. helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
  89. helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
  90. helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
  91. helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
  92. helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
  93. helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
  94. helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
  95. helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
  96. helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
  97. helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
  98. helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
  99. helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
  100. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
  101. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
  102. helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
  103. helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
  104. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
  105. helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
  106. helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
  107. helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
  108. helm/benchmark/metrics/language_modeling_metrics.py +99 -0
  109. helm/benchmark/metrics/machine_translation_metrics.py +5 -5
  110. helm/benchmark/metrics/metric.py +93 -172
  111. helm/benchmark/metrics/metric_name.py +0 -1
  112. helm/benchmark/metrics/metric_service.py +16 -0
  113. helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
  114. helm/benchmark/metrics/ranking_metrics.py +6 -7
  115. helm/benchmark/metrics/reference_metric.py +148 -0
  116. helm/benchmark/metrics/summac/model_summac.py +0 -2
  117. helm/benchmark/metrics/summarization_metrics.py +8 -8
  118. helm/benchmark/metrics/test_classification_metrics.py +9 -6
  119. helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
  120. helm/benchmark/metrics/test_evaluate_reference_metrics.py +30 -0
  121. helm/benchmark/metrics/test_metric.py +2 -2
  122. helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +1 -1
  123. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +13 -3
  124. helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
  125. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -0
  126. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +9 -2
  127. helm/benchmark/metrics/toxicity_metrics.py +1 -1
  128. helm/benchmark/metrics/toxicity_utils.py +23 -0
  129. helm/benchmark/metrics/unitxt_metrics.py +81 -0
  130. helm/benchmark/metrics/vision_language/__init__.py +0 -0
  131. helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
  132. helm/benchmark/metrics/vision_language/image_metrics.py +450 -0
  133. helm/benchmark/metrics/vision_language/image_utils.py +100 -0
  134. helm/benchmark/model_deployment_registry.py +164 -41
  135. helm/benchmark/model_metadata_registry.py +181 -35
  136. helm/benchmark/multi_gpu_runner.py +133 -0
  137. helm/benchmark/presentation/contamination.py +3 -3
  138. helm/benchmark/presentation/create_plots.py +8 -7
  139. helm/benchmark/presentation/run_display.py +50 -17
  140. helm/benchmark/presentation/schema.py +28 -46
  141. helm/benchmark/presentation/summarize.py +213 -96
  142. helm/benchmark/presentation/table.py +8 -8
  143. helm/benchmark/presentation/test_contamination.py +2 -2
  144. helm/benchmark/presentation/test_run_entry.py +14 -9
  145. helm/benchmark/presentation/test_summarize.py +5 -0
  146. helm/benchmark/run.py +66 -54
  147. helm/benchmark/run_expander.py +342 -31
  148. helm/benchmark/run_spec.py +93 -0
  149. helm/benchmark/run_spec_factory.py +162 -0
  150. helm/benchmark/run_specs/__init__.py +0 -0
  151. helm/benchmark/{run_specs.py → run_specs/classic_run_specs.py} +217 -1330
  152. helm/benchmark/run_specs/cleva_run_specs.py +277 -0
  153. helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
  154. helm/benchmark/run_specs/heim_run_specs.py +623 -0
  155. helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
  156. helm/benchmark/run_specs/lite_run_specs.py +307 -0
  157. helm/benchmark/run_specs/simple_run_specs.py +104 -0
  158. helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
  159. helm/benchmark/run_specs/vlm_run_specs.py +501 -0
  160. helm/benchmark/runner.py +116 -69
  161. helm/benchmark/runner_config_registry.py +21 -0
  162. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  163. helm/benchmark/scenarios/bold_scenario.py +2 -2
  164. helm/benchmark/scenarios/cleva_scenario.py +43 -46
  165. helm/benchmark/scenarios/code_scenario.py +3 -2
  166. helm/benchmark/scenarios/commonsense_scenario.py +171 -191
  167. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
  168. helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
  169. helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
  170. helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
  171. helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
  172. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
  173. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
  174. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
  175. helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
  176. helm/benchmark/scenarios/entity_matching_scenario.py +1 -1
  177. helm/benchmark/scenarios/image_generation/__init__.py +0 -0
  178. helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
  179. helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
  180. helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
  181. helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
  182. helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
  183. helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
  184. helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
  185. helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
  186. helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
  187. helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
  188. helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
  189. helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
  190. helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
  191. helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
  192. helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
  193. helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
  194. helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
  195. helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
  196. helm/benchmark/scenarios/imdb_scenario.py +0 -1
  197. helm/benchmark/scenarios/legalbench_scenario.py +123 -0
  198. helm/benchmark/scenarios/live_qa_scenario.py +94 -0
  199. helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
  200. helm/benchmark/scenarios/lsat_qa_scenario.py +4 -2
  201. helm/benchmark/scenarios/math_scenario.py +19 -2
  202. helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
  203. helm/benchmark/scenarios/numeracy_scenario.py +3 -3
  204. helm/benchmark/scenarios/opinions_qa_scenario.py +6 -10
  205. helm/benchmark/scenarios/raft_scenario.py +2 -6
  206. helm/benchmark/scenarios/scenario.py +14 -2
  207. helm/benchmark/scenarios/simple_scenarios.py +122 -1
  208. helm/benchmark/scenarios/test_math_scenario.py +22 -0
  209. helm/benchmark/scenarios/test_scenario.py +6 -3
  210. helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
  211. helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
  212. helm/benchmark/scenarios/the_pile_scenario.py +6 -7
  213. helm/benchmark/scenarios/unitxt_scenario.py +56 -0
  214. helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
  215. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  216. helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
  217. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +92 -0
  218. helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
  219. helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
  220. helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
  221. helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
  222. helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
  223. helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
  224. helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
  225. helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
  226. helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
  227. helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
  228. helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
  229. helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
  230. helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
  231. helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
  232. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
  233. helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
  234. helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
  235. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
  236. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
  237. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +107 -0
  238. helm/benchmark/scenarios/vision_language/vqa_scenario.py +1 -1
  239. helm/benchmark/scenarios/wmt_14_scenario.py +18 -18
  240. helm/benchmark/server.py +59 -2
  241. helm/benchmark/slurm_jobs.py +12 -0
  242. helm/benchmark/slurm_runner.py +79 -51
  243. helm/benchmark/static/benchmarking.js +3 -4
  244. helm/benchmark/static/contamination.yaml +1 -1
  245. helm/benchmark/static/images/organizations/together.png +0 -0
  246. helm/benchmark/static/json-urls.js +4 -0
  247. helm/benchmark/static/{schema.yaml → schema_classic.yaml} +346 -930
  248. helm/benchmark/static/schema_instruction_following.yaml +210 -0
  249. helm/benchmark/static/schema_lite.yaml +824 -0
  250. helm/benchmark/static/schema_mmlu.yaml +1507 -0
  251. helm/benchmark/static/schema_unitxt.yaml +428 -0
  252. helm/benchmark/static/schema_vlm.yaml +576 -0
  253. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  254. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  255. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  256. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  257. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  258. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  259. helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
  260. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  261. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  262. helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
  263. helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
  264. helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
  265. helm/benchmark/static_build/assets/index-5088afcb.css +1 -0
  266. helm/benchmark/static_build/assets/index-d839df55.js +9 -0
  267. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  268. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  269. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  270. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  271. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  272. helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
  273. helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
  274. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  275. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  276. helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
  277. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  278. helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  279. helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  280. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  281. helm/benchmark/static_build/config.js +4 -0
  282. helm/benchmark/static_build/index.html +20 -0
  283. helm/benchmark/test_data_preprocessor.py +3 -3
  284. helm/benchmark/test_model_deployment_definition.py +90 -0
  285. helm/benchmark/test_run_expander.py +1 -1
  286. helm/benchmark/tokenizer_config_registry.py +10 -14
  287. helm/benchmark/window_services/ai21_window_service.py +22 -33
  288. helm/benchmark/window_services/cohere_window_service.py +1 -63
  289. helm/benchmark/window_services/default_window_service.py +2 -35
  290. helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
  291. helm/benchmark/window_services/ice_window_service.py +0 -34
  292. helm/benchmark/window_services/image_generation/__init__.py +0 -0
  293. helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
  294. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
  295. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
  296. helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
  297. helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
  298. helm/benchmark/window_services/local_window_service.py +21 -4
  299. helm/benchmark/window_services/no_decoding_window_service.py +32 -0
  300. helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
  301. helm/benchmark/window_services/test_bloom_window_service.py +2 -1
  302. helm/benchmark/window_services/test_cohere_window_service.py +2 -1
  303. helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
  304. helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
  305. helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
  306. helm/benchmark/window_services/test_gptj_window_service.py +3 -2
  307. helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
  308. helm/benchmark/window_services/test_ice_window_service.py +2 -1
  309. helm/benchmark/window_services/test_openai_window_service.py +2 -1
  310. helm/benchmark/window_services/test_opt_window_service.py +3 -2
  311. helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
  312. helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
  313. helm/benchmark/window_services/test_t511b_window_service.py +2 -1
  314. helm/benchmark/window_services/test_ul2_window_service.py +2 -1
  315. helm/benchmark/window_services/test_utils.py +3 -2
  316. helm/benchmark/window_services/test_yalm_window_service.py +2 -1
  317. helm/benchmark/window_services/window_service.py +42 -0
  318. helm/benchmark/window_services/window_service_factory.py +24 -269
  319. helm/benchmark/window_services/yalm_window_service.py +0 -27
  320. helm/clients/__init__.py +0 -0
  321. helm/{proxy/clients → clients}/ai21_client.py +5 -12
  322. helm/clients/aleph_alpha_client.py +112 -0
  323. helm/{proxy/clients → clients}/anthropic_client.py +213 -24
  324. helm/clients/auto_client.py +215 -0
  325. helm/clients/bedrock_client.py +128 -0
  326. helm/clients/bedrock_utils.py +72 -0
  327. helm/{proxy/clients → clients}/client.py +67 -55
  328. helm/clients/clip_score_client.py +49 -0
  329. helm/clients/clip_scorers/__init__.py +0 -0
  330. helm/clients/clip_scorers/base_clip_scorer.py +18 -0
  331. helm/clients/clip_scorers/clip_scorer.py +50 -0
  332. helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
  333. helm/{proxy/clients → clients}/cohere_client.py +6 -17
  334. helm/clients/gcs_client.py +82 -0
  335. helm/{proxy/clients → clients}/google_client.py +7 -8
  336. helm/clients/google_translate_client.py +35 -0
  337. helm/{proxy/clients → clients}/http_model_client.py +6 -10
  338. helm/{proxy/clients → clients}/huggingface_client.py +134 -92
  339. helm/clients/image_generation/__init__.py +0 -0
  340. helm/clients/image_generation/adobe_vision_client.py +78 -0
  341. helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
  342. helm/clients/image_generation/cogview2/__init__.py +0 -0
  343. helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
  344. helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
  345. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
  346. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
  347. helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
  348. helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
  349. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
  350. helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
  351. helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
  352. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
  353. helm/clients/image_generation/cogview2_client.py +191 -0
  354. helm/clients/image_generation/dalle2_client.py +192 -0
  355. helm/clients/image_generation/dalle3_client.py +108 -0
  356. helm/clients/image_generation/dalle_mini/__init__.py +3 -0
  357. helm/clients/image_generation/dalle_mini/data.py +442 -0
  358. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
  359. helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
  360. helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
  361. helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
  362. helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
  363. helm/clients/image_generation/dalle_mini/model/text.py +251 -0
  364. helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
  365. helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
  366. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
  367. helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
  368. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
  369. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
  370. helm/clients/image_generation/dalle_mini_client.py +190 -0
  371. helm/clients/image_generation/deep_floyd_client.py +78 -0
  372. helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
  373. helm/clients/image_generation/image_generation_client_utils.py +9 -0
  374. helm/clients/image_generation/lexica_client.py +86 -0
  375. helm/clients/image_generation/mindalle/__init__.py +0 -0
  376. helm/clients/image_generation/mindalle/models/__init__.py +216 -0
  377. helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
  378. helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
  379. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
  380. helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
  381. helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
  382. helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
  383. helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
  384. helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
  385. helm/clients/image_generation/mindalle/utils/config.py +129 -0
  386. helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
  387. helm/clients/image_generation/mindalle/utils/utils.py +89 -0
  388. helm/clients/image_generation/mindalle_client.py +115 -0
  389. helm/clients/image_generation/nudity_check_client.py +64 -0
  390. helm/clients/image_generation/together_image_generation_client.py +111 -0
  391. helm/{proxy/clients → clients}/lit_gpt_client.py +7 -5
  392. helm/{proxy/clients → clients}/megatron_client.py +13 -7
  393. helm/clients/mistral_client.py +134 -0
  394. helm/clients/moderation_api_client.py +109 -0
  395. helm/clients/open_lm_client.py +43 -0
  396. helm/clients/openai_client.py +302 -0
  397. helm/{proxy/clients → clients}/palmyra_client.py +15 -12
  398. helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
  399. helm/clients/simple_client.py +64 -0
  400. helm/{proxy/clients → clients}/test_auto_client.py +15 -15
  401. helm/clients/test_client.py +100 -0
  402. helm/clients/test_huggingface_client.py +70 -0
  403. helm/clients/test_simple_client.py +19 -0
  404. helm/{proxy/clients → clients}/test_together_client.py +23 -12
  405. helm/{proxy/clients → clients}/together_client.py +18 -71
  406. helm/clients/vertexai_client.py +391 -0
  407. helm/clients/vision_language/__init__.py +0 -0
  408. helm/clients/vision_language/huggingface_vlm_client.py +104 -0
  409. helm/{proxy/clients → clients}/vision_language/idefics_client.py +59 -52
  410. helm/clients/vision_language/open_flamingo/__init__.py +2 -0
  411. helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
  412. helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
  413. helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
  414. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
  415. helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
  416. helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
  417. helm/clients/vision_language/open_flamingo_client.py +155 -0
  418. helm/clients/vision_language/qwen_vlm_client.py +171 -0
  419. helm/clients/vllm_client.py +46 -0
  420. helm/common/cache.py +24 -179
  421. helm/common/cache_backend_config.py +47 -0
  422. helm/common/clip_score_request.py +41 -0
  423. helm/common/concurrency.py +32 -0
  424. helm/common/credentials_utils.py +28 -0
  425. helm/common/file_caches/__init__.py +0 -0
  426. helm/common/file_caches/file_cache.py +16 -0
  427. helm/common/file_caches/local_file_cache.py +61 -0
  428. helm/common/file_caches/test_local_file_cache.py +25 -0
  429. helm/common/file_upload_request.py +27 -0
  430. helm/common/general.py +29 -10
  431. helm/common/image_generation_parameters.py +25 -0
  432. helm/common/images_utils.py +24 -1
  433. helm/common/key_value_store.py +113 -0
  434. helm/common/media_object.py +13 -0
  435. helm/common/moderations_api_request.py +71 -0
  436. helm/common/mongo_key_value_store.py +88 -0
  437. helm/common/multimodal_request_utils.py +31 -0
  438. helm/common/nudity_check_request.py +29 -0
  439. helm/common/object_spec.py +2 -2
  440. helm/common/request.py +36 -27
  441. helm/common/test_general.py +6 -0
  442. helm/common/tokenization_request.py +6 -3
  443. helm/config/__init__.py +0 -0
  444. helm/config/model_deployments.yaml +1942 -0
  445. helm/config/model_metadata.yaml +2201 -0
  446. helm/config/tokenizer_configs.yaml +362 -0
  447. helm/proxy/accounts.py +31 -4
  448. helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
  449. helm/proxy/critique/model_critique_client.py +13 -5
  450. helm/proxy/example_queries.py +29 -17
  451. helm/proxy/retry.py +8 -2
  452. helm/proxy/server.py +77 -5
  453. helm/proxy/services/remote_service.py +31 -0
  454. helm/proxy/services/server_service.py +103 -20
  455. helm/proxy/services/service.py +34 -2
  456. helm/proxy/services/test_remote_service.py +7 -6
  457. helm/proxy/services/test_service.py +27 -18
  458. helm/proxy/test_accounts.py +32 -0
  459. helm/proxy/token_counters/auto_token_counter.py +37 -37
  460. helm/proxy/token_counters/test_auto_token_counter.py +164 -0
  461. helm/proxy/token_counters/token_counter.py +3 -5
  462. helm/py.typed +0 -0
  463. helm/tokenizers/__init__.py +0 -0
  464. helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
  465. helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +3 -1
  466. helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +17 -11
  467. helm/tokenizers/auto_tokenizer.py +93 -0
  468. helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +8 -2
  469. helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
  470. helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
  471. helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +56 -60
  472. helm/tokenizers/simple_tokenizer.py +33 -0
  473. helm/tokenizers/test_anthropic_tokenizer.py +82 -0
  474. helm/tokenizers/test_huggingface_tokenizer.py +136 -0
  475. helm/tokenizers/test_simple_tokenizer.py +33 -0
  476. helm/tokenizers/vertexai_tokenizer.py +97 -0
  477. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
  478. helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
  479. helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
  480. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
  481. crfm_helm-0.3.0.dist-info/RECORD +0 -396
  482. helm/benchmark/vlm_run_specs.py +0 -71
  483. helm/benchmark/window_services/anthropic_window_service.py +0 -68
  484. helm/benchmark/window_services/bloom_window_service.py +0 -35
  485. helm/benchmark/window_services/flan_t5_window_service.py +0 -29
  486. helm/benchmark/window_services/gpt2_window_service.py +0 -32
  487. helm/benchmark/window_services/gptj_window_service.py +0 -38
  488. helm/benchmark/window_services/gptneox_window_service.py +0 -41
  489. helm/benchmark/window_services/http_model_window_service.py +0 -28
  490. helm/benchmark/window_services/huggingface_window_service.py +0 -59
  491. helm/benchmark/window_services/lit_gpt_window_service.py +0 -27
  492. helm/benchmark/window_services/llama_window_service.py +0 -28
  493. helm/benchmark/window_services/luminous_window_service.py +0 -67
  494. helm/benchmark/window_services/megatron_window_service.py +0 -10
  495. helm/benchmark/window_services/mt_nlg_window_service.py +0 -27
  496. helm/benchmark/window_services/openai_window_service.py +0 -13
  497. helm/benchmark/window_services/opt_window_service.py +0 -35
  498. helm/benchmark/window_services/palmyra_window_service.py +0 -45
  499. helm/benchmark/window_services/remote_window_service.py +0 -48
  500. helm/benchmark/window_services/santacoder_window_service.py +0 -27
  501. helm/benchmark/window_services/starcoder_window_service.py +0 -27
  502. helm/benchmark/window_services/t0pp_window_service.py +0 -35
  503. helm/benchmark/window_services/t511b_window_service.py +0 -30
  504. helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
  505. helm/benchmark/window_services/ul2_window_service.py +0 -30
  506. helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
  507. helm/benchmark/window_services/wider_openai_window_service.py +0 -52
  508. helm/proxy/clients/aleph_alpha_client.py +0 -99
  509. helm/proxy/clients/auto_client.py +0 -461
  510. helm/proxy/clients/goose_ai_client.py +0 -100
  511. helm/proxy/clients/microsoft_client.py +0 -182
  512. helm/proxy/clients/openai_client.py +0 -206
  513. helm/proxy/clients/remote_model_registry.py +0 -28
  514. helm/proxy/clients/simple_client.py +0 -61
  515. helm/proxy/clients/test_anthropic_client.py +0 -63
  516. helm/proxy/clients/test_client.py +0 -31
  517. helm/proxy/clients/test_huggingface_client.py +0 -87
  518. helm/proxy/models.py +0 -963
  519. helm/proxy/test_models.py +0 -27
  520. helm/proxy/token_counters/ai21_token_counter.py +0 -20
  521. helm/proxy/token_counters/cohere_token_counter.py +0 -13
  522. helm/proxy/token_counters/free_token_counter.py +0 -12
  523. helm/proxy/token_counters/gooseai_token_counter.py +0 -24
  524. helm/proxy/token_counters/openai_token_counter.py +0 -22
  525. helm/proxy/token_counters/test_ai21_token_counter.py +0 -86
  526. helm/proxy/token_counters/test_openai_token_counter.py +0 -79
  527. helm/proxy/tokenizers/simple_tokenizer.py +0 -32
  528. helm/proxy/tokenizers/test_huggingface_tokenizer.py +0 -56
  529. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/LICENSE +0 -0
  530. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/entry_points.txt +0 -0
  531. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/top_level.txt +0 -0
  532. /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
  533. /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
  534. /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
  535. /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
  536. /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
  537. /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
  538. /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
  539. /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
  540. /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
  541. /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
  542. /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
  543. /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
  544. /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
  545. /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
  546. /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
@@ -1,461 +0,0 @@
1
- import os
2
- from dataclasses import replace
3
- from pathlib import Path
4
- from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional
5
-
6
- from retrying import Attempt, RetryError
7
-
8
- from helm.benchmark.model_deployment_registry import get_model_deployment
9
- from helm.benchmark.tokenizer_config_registry import get_tokenizer_config
10
- from helm.common.cache import CacheConfig, MongoCacheConfig, SqliteCacheConfig
11
- from helm.common.hierarchical_logger import hlog
12
- from helm.common.object_spec import create_object, inject_object_spec_args
13
- from helm.common.request import Request, RequestResult
14
- from helm.common.tokenization_request import (
15
- DecodeRequest,
16
- DecodeRequestResult,
17
- TokenizationRequest,
18
- TokenizationRequestResult,
19
- )
20
- from helm.proxy.clients.client import Client
21
- from helm.proxy.critique.critique_client import CritiqueClient
22
- from helm.proxy.clients.toxicity_classifier_client import ToxicityClassifierClient
23
- from helm.proxy.retry import NonRetriableException, retry_request
24
- from helm.proxy.tokenizers.tokenizer import Tokenizer
25
- from helm.proxy.tokenizers.huggingface_tokenizer import HuggingFaceTokenizer
26
-
27
- from .http_model_client import HTTPModelClient
28
-
29
- if TYPE_CHECKING:
30
- import helm.proxy.clients.huggingface_client
31
-
32
-
33
- class AuthenticationError(NonRetriableException):
34
- pass
35
-
36
-
37
- class AutoClient(Client):
38
- """Automatically dispatch to the proper `Client` based on the organization.
39
-
40
- The modules for each client are lazily imported when the respective client is created.
41
- This greatly speeds up the import time of this module, and allows the client modules to
42
- use optional dependencies."""
43
-
44
- def __init__(self, credentials: Mapping[str, Any], cache_path: str, mongo_uri: str = ""):
45
- self.credentials = credentials
46
- self.cache_path = cache_path
47
- self.mongo_uri = mongo_uri
48
- self.clients: Dict[str, Client] = {}
49
- self.tokenizers: Dict[str, Tokenizer] = {}
50
- # self._huggingface_client is lazily instantiated by get_huggingface_client()
51
- self._huggingface_client: Optional["helm.proxy.clients.huggingface_client.HuggingFaceClient"] = None
52
- # self._critique_client is lazily instantiated by get_critique_client()
53
- self._critique_client: Optional[CritiqueClient] = None
54
- hlog(f"AutoClient: cache_path = {cache_path}")
55
- hlog(f"AutoClient: mongo_uri = {mongo_uri}")
56
-
57
- def _build_cache_config(self, organization: str) -> CacheConfig:
58
- if self.mongo_uri:
59
- return MongoCacheConfig(self.mongo_uri, collection_name=organization)
60
-
61
- client_cache_path: str = os.path.join(self.cache_path, f"{organization}.sqlite")
62
- # TODO: Allow setting CacheConfig.follower_cache_path from a command line flag.
63
- return SqliteCacheConfig(client_cache_path)
64
-
65
- def _get_client(self, model: str) -> Client:
66
- """Return a client based on the model, creating it if necessary."""
67
- client: Optional[Client] = self.clients.get(model)
68
-
69
- if client is None:
70
- organization: str = model.split("/")[0]
71
- cache_config: CacheConfig = self._build_cache_config(organization)
72
- tokenizer: Tokenizer = self._get_tokenizer(organization)
73
-
74
- # TODO: Migrate all clients to use model deployments
75
- model_deployment = get_model_deployment(model)
76
- if model_deployment:
77
-
78
- def provide_api_key():
79
- if "deployments" not in self.credentials:
80
- raise AuthenticationError("Could not find key 'deployments' in credentials.conf")
81
- deployment_api_keys = self.credentials["deployments"]
82
- if model not in deployment_api_keys:
83
- raise AuthenticationError(
84
- f"Could not find key '{model}' under key 'deployments' in credentials.conf"
85
- )
86
- return deployment_api_keys[model]
87
-
88
- # Perform dependency injection to fill in remaining arguments.
89
- # Dependency injection is needed here for these reasons:
90
- #
91
- # 1. Different clients have different parameters. Dependency injection provides arguments
92
- # that match the parameters of the client.
93
- # 2. Some arguments, such as the tokenizer, are not static data objects that can be
94
- # in the users configuration file. Instead, they have to be constructed dynamically at
95
- # runtime.
96
- # 3. The providers must be lazily-evaluated, because eager evaluation can result in an
97
- # exception. For instance, some clients do not require an API key, so trying to fetch
98
- # the API key from configuration eagerly will result in an exception because the user
99
- # will not have configured an API key.
100
- client_spec = inject_object_spec_args(
101
- model_deployment.client_spec,
102
- constant_bindings={"cache_config": cache_config},
103
- provider_bindings={"api_key": provide_api_key},
104
- )
105
- client = create_object(client_spec)
106
- elif organization == "neurips":
107
- client = HTTPModelClient(tokenizer=tokenizer, cache_config=cache_config)
108
- elif organization == "openai":
109
- from helm.proxy.clients.openai_client import OpenAIClient
110
-
111
- org_id = self.credentials.get("openaiOrgId", None)
112
- api_key = self.credentials.get("openaiApiKey", None)
113
- client = OpenAIClient(
114
- tokenizer=tokenizer,
115
- cache_config=cache_config,
116
- api_key=api_key,
117
- org_id=org_id,
118
- )
119
- elif organization == "AlephAlpha":
120
- from helm.proxy.clients.aleph_alpha_client import AlephAlphaClient
121
-
122
- client = AlephAlphaClient(
123
- tokenizer=tokenizer,
124
- api_key=self.credentials["alephAlphaKey"],
125
- cache_config=cache_config,
126
- )
127
- elif organization == "ai21":
128
- from helm.proxy.clients.ai21_client import AI21Client
129
-
130
- client = AI21Client(
131
- tokenizer=tokenizer,
132
- api_key=self.credentials["ai21ApiKey"],
133
- cache_config=cache_config,
134
- )
135
- elif organization == "cohere":
136
- from helm.proxy.clients.cohere_client import CohereClient
137
-
138
- client = CohereClient(
139
- tokenizer=tokenizer,
140
- api_key=self.credentials["cohereApiKey"],
141
- cache_config=cache_config,
142
- )
143
- elif organization == "gooseai":
144
- from helm.proxy.clients.goose_ai_client import GooseAIClient
145
-
146
- org_id = self.credentials.get("gooseaiOrgId", None)
147
- client = GooseAIClient(
148
- tokenizer=tokenizer,
149
- api_key=self.credentials["gooseaiApiKey"],
150
- cache_config=cache_config,
151
- org_id=org_id,
152
- )
153
- elif organization == "huggingface":
154
- from helm.proxy.clients.huggingface_client import HuggingFaceClient
155
-
156
- client = HuggingFaceClient(tokenizer=tokenizer, cache_config=cache_config)
157
- elif organization == "anthropic":
158
- from helm.proxy.clients.anthropic_client import AnthropicClient
159
-
160
- client = AnthropicClient(
161
- api_key=self.credentials.get("anthropicApiKey", None),
162
- tokenizer=tokenizer,
163
- cache_config=cache_config,
164
- )
165
- elif organization == "microsoft":
166
- from helm.proxy.clients.microsoft_client import MicrosoftClient
167
-
168
- org_id = self.credentials.get("microsoftOrgId", None)
169
- lock_file_path: str = os.path.join(self.cache_path, f"{organization}.lock")
170
- client = MicrosoftClient(
171
- api_key=self.credentials.get("microsoftApiKey", None),
172
- tokenizer=tokenizer,
173
- lock_file_path=lock_file_path,
174
- cache_config=cache_config,
175
- org_id=org_id,
176
- )
177
- elif organization == "google":
178
- from helm.proxy.clients.google_client import GoogleClient
179
-
180
- client = GoogleClient(
181
- tokenizer=tokenizer,
182
- cache_config=cache_config,
183
- )
184
- elif organization in [
185
- "together",
186
- "databricks",
187
- "eleutherai",
188
- "lmsys",
189
- "meta",
190
- "mistralai",
191
- "mosaicml",
192
- "stabilityai",
193
- "stanford",
194
- "tiiuae",
195
- ]:
196
- from helm.proxy.clients.together_client import TogetherClient
197
-
198
- client = TogetherClient(
199
- api_key=self.credentials.get("togetherApiKey", None),
200
- tokenizer=tokenizer,
201
- cache_config=cache_config,
202
- )
203
- elif organization == "simple":
204
- from helm.proxy.clients.simple_client import SimpleClient
205
-
206
- client = SimpleClient(tokenizer=tokenizer, cache_config=cache_config)
207
- elif organization == "writer":
208
- from helm.proxy.clients.palmyra_client import PalmyraClient
209
-
210
- client = PalmyraClient(
211
- api_key=self.credentials["writerApiKey"],
212
- tokenizer=tokenizer,
213
- cache_config=cache_config,
214
- )
215
- elif organization == "nvidia":
216
- from helm.proxy.clients.megatron_client import MegatronClient
217
-
218
- client = MegatronClient(tokenizer=tokenizer, cache_config=cache_config)
219
-
220
- elif organization == "lightningai":
221
- from helm.proxy.clients.lit_gpt_client import LitGPTClient
222
-
223
- client = LitGPTClient(
224
- tokenizer=tokenizer,
225
- cache_config=cache_config,
226
- checkpoint_dir=Path(os.environ.get("LIT_GPT_CHECKPOINT_DIR", "")),
227
- precision=os.environ.get("LIT_GPT_PRECISION", "bf16-true"),
228
- )
229
- elif organization == "HuggingFaceM4":
230
- from helm.proxy.clients.vision_language.idefics_client import IDEFICSClient
231
-
232
- client = IDEFICSClient(tokenizer=tokenizer, cache_config=cache_config)
233
- else:
234
- raise ValueError(f"Could not find client for model: {model}")
235
- self.clients[model] = client
236
- return client
237
-
238
- def make_request(self, request: Request) -> RequestResult:
239
- """
240
- Dispatch based on the name of the model (e.g., openai/davinci).
241
- Retries if request fails.
242
- """
243
-
244
- # TODO: need to revisit this because this swallows up any exceptions that are raised.
245
- @retry_request
246
- def make_request_with_retry(client: Client, request: Request) -> RequestResult:
247
- return client.make_request(request)
248
-
249
- client: Client = self._get_client(request.model)
250
-
251
- try:
252
- return make_request_with_retry(client=client, request=request)
253
- except RetryError as e:
254
- last_attempt: Attempt = e.last_attempt
255
- retry_error: str = (
256
- f"Failed to make request to {request.model} after retrying {last_attempt.attempt_number} times"
257
- )
258
- hlog(retry_error)
259
-
260
- # Notify our user that we failed to make the request even after retrying.
261
- return replace(last_attempt.value, error=f"{retry_error}. Error: {last_attempt.value.error}")
262
-
263
- def _get_tokenizer(self, tokenizer_name: str) -> Tokenizer:
264
- # First try to find the tokenizer in the cache
265
- tokenizer: Optional[Tokenizer] = self.tokenizers.get(tokenizer_name)
266
- if tokenizer is not None:
267
- return tokenizer
268
-
269
- # Otherwise, create the tokenizer
270
- organization: str = tokenizer_name.split("/")[0]
271
- cache_config: CacheConfig = self._build_cache_config(organization)
272
-
273
- # TODO: Migrate all clients to use tokenizer configs
274
- tokenizer_config = get_tokenizer_config(tokenizer_name)
275
- if tokenizer_config:
276
- tokenizer_spec = inject_object_spec_args(
277
- tokenizer_config.tokenizer_spec, constant_bindings={"cache_config": cache_config}
278
- )
279
- return create_object(tokenizer_spec)
280
- elif organization in [
281
- "gooseai",
282
- "huggingface",
283
- "microsoft",
284
- "google",
285
- "writer", # Palmyra
286
- "nvidia",
287
- "EleutherAI",
288
- "facebook",
289
- "meta-llama",
290
- "hf-internal-testing",
291
- "mistralai",
292
- "HuggingFaceM4",
293
- # Together
294
- "together",
295
- "databricks",
296
- "eleutherai",
297
- "lmsys",
298
- "meta",
299
- "mosaicml",
300
- "stabilityai",
301
- "stanford",
302
- "tiiuae",
303
- "bigcode",
304
- "bigscience",
305
- ]:
306
- from helm.proxy.tokenizers.huggingface_tokenizer import HuggingFaceTokenizer
307
-
308
- tokenizer = HuggingFaceTokenizer(cache_config=cache_config)
309
- elif organization == "neurips":
310
- from helm.proxy.tokenizers.http_model_tokenizer import HTTPModelTokenizer
311
-
312
- tokenizer = HTTPModelTokenizer(cache_config=cache_config)
313
- elif organization == "openai":
314
- from helm.proxy.tokenizers.tiktoken_tokenizer import TiktokenTokenizer
315
-
316
- tokenizer = TiktokenTokenizer(cache_config=cache_config)
317
- elif organization == "AlephAlpha":
318
- from helm.proxy.tokenizers.aleph_alpha_tokenizer import AlephAlphaTokenizer
319
-
320
- tokenizer = AlephAlphaTokenizer(api_key=self.credentials["alephAlphaKey"], cache_config=cache_config)
321
- elif organization == "ai21":
322
- from helm.proxy.tokenizers.ai21_tokenizer import AI21Tokenizer
323
-
324
- tokenizer = AI21Tokenizer(api_key=self.credentials["ai21ApiKey"], cache_config=cache_config)
325
- elif organization == "cohere":
326
- from helm.proxy.tokenizers.cohere_tokenizer import CohereTokenizer
327
-
328
- tokenizer = CohereTokenizer(api_key=self.credentials["cohereApiKey"], cache_config=cache_config)
329
- elif organization == "anthropic":
330
- from helm.proxy.tokenizers.anthropic_tokenizer import AnthropicTokenizer
331
-
332
- tokenizer = AnthropicTokenizer(cache_config=cache_config)
333
- elif organization == "simple":
334
- from helm.proxy.tokenizers.simple_tokenizer import SimpleTokenizer
335
-
336
- tokenizer = SimpleTokenizer()
337
- elif organization == "lightningai":
338
- from helm.proxy.tokenizers.lit_gpt_tokenizer import LitGPTTokenizer
339
-
340
- tokenizer = LitGPTTokenizer(
341
- cache_config=cache_config,
342
- checkpoint_dir=Path(os.environ.get("LIT_GPT_CHECKPOINT_DIR", "")),
343
- )
344
- elif organization == "TsinghuaKEG":
345
- from helm.proxy.tokenizers.ice_tokenizer import ICETokenizer
346
-
347
- tokenizer = ICETokenizer(cache_config=cache_config)
348
- elif organization == "Yandex":
349
- from helm.proxy.tokenizers.yalm_tokenizer import YaLMTokenizer
350
-
351
- tokenizer = YaLMTokenizer(cache_config=cache_config)
352
-
353
- if tokenizer is None:
354
- raise ValueError(f"Could not find tokenizer for model: {tokenizer_name}")
355
-
356
- # Cache the tokenizer
357
- self.tokenizers[tokenizer_name] = tokenizer
358
-
359
- return tokenizer
360
-
361
- def tokenize(self, request: TokenizationRequest) -> TokenizationRequestResult:
362
- """Tokenizes based on the name of the tokenizer (e.g., huggingface/gpt2)."""
363
-
364
- def tokenize_with_retry(tokenizer: Tokenizer, request: TokenizationRequest) -> TokenizationRequestResult:
365
- return tokenizer.tokenize(request)
366
-
367
- tokenizer: Tokenizer = self._get_tokenizer(request.tokenizer)
368
-
369
- try:
370
- return tokenize_with_retry(tokenizer=tokenizer, request=request)
371
- except RetryError as e:
372
- last_attempt: Attempt = e.last_attempt
373
- retry_error: str = f"Failed to tokenize after retrying {last_attempt.attempt_number} times"
374
- hlog(retry_error)
375
- return replace(last_attempt.value, error=f"{retry_error}. Error: {last_attempt.value.error}")
376
-
377
- def decode(self, request: DecodeRequest) -> DecodeRequestResult:
378
- """Decodes based on the the name of the tokenizer (e.g., huggingface/gpt2)."""
379
-
380
- def decode_with_retry(tokenizer: Tokenizer, request: DecodeRequest) -> DecodeRequestResult:
381
- return tokenizer.decode(request)
382
-
383
- tokenizer: Tokenizer = self._get_tokenizer(request.tokenizer)
384
-
385
- try:
386
- return decode_with_retry(tokenizer=tokenizer, request=request)
387
- except RetryError as e:
388
- last_attempt: Attempt = e.last_attempt
389
- retry_error: str = f"Failed to decode after retrying {last_attempt.attempt_number} times"
390
- hlog(retry_error)
391
- return replace(last_attempt.value, error=f"{retry_error}. Error: {last_attempt.value.error}")
392
-
393
- def get_toxicity_classifier_client(self) -> ToxicityClassifierClient:
394
- """Get the toxicity classifier client. We currently only support Perspective API."""
395
- from helm.proxy.clients.perspective_api_client import PerspectiveAPIClient
396
-
397
- cache_config: CacheConfig = self._build_cache_config("perspectiveapi")
398
- return PerspectiveAPIClient(self.credentials.get("perspectiveApiKey", ""), cache_config)
399
-
400
- def get_critique_client(self) -> CritiqueClient:
401
- """Get the critique client."""
402
- if self._critique_client:
403
- return self._critique_client
404
- critique_type = self.credentials.get("critiqueType")
405
- if critique_type == "random":
406
- from helm.proxy.critique.critique_client import RandomCritiqueClient
407
-
408
- self._critique_client = RandomCritiqueClient()
409
- elif critique_type == "mturk":
410
- from helm.proxy.critique.mechanical_turk_critique_client import (
411
- MechanicalTurkCritiqueClient,
412
- )
413
-
414
- self._critique_client = MechanicalTurkCritiqueClient()
415
- elif critique_type == "surgeai":
416
- from helm.proxy.critique.surge_ai_critique_client import (
417
- SurgeAICritiqueClient,
418
- )
419
-
420
- surgeai_credentials = self.credentials.get("surgeaiApiKey")
421
- if not surgeai_credentials:
422
- raise ValueError("surgeaiApiKey credentials are required for SurgeAICritiqueClient")
423
- self._critique_client = SurgeAICritiqueClient(surgeai_credentials, self._build_cache_config("surgeai"))
424
- elif critique_type == "model":
425
- from helm.proxy.critique.model_critique_client import ModelCritiqueClient
426
-
427
- model_name: Optional[str] = self.credentials.get("critiqueModelName")
428
- if model_name is None:
429
- raise ValueError("critiqueModelName is required for ModelCritiqueClient")
430
- client: Client = self._get_client(model_name)
431
- self._critique_client = ModelCritiqueClient(client, model_name)
432
- elif critique_type == "scale":
433
- from helm.proxy.critique.scale_critique_client import ScaleCritiqueClient
434
-
435
- scale_credentials = self.credentials.get("scaleApiKey")
436
- scale_project = self.credentials.get("scaleProject", None)
437
- if not scale_project:
438
- raise ValueError("scaleProject is required for ScaleCritiqueClient.")
439
- if not scale_credentials:
440
- raise ValueError("scaleApiKey is required for ScaleCritiqueClient")
441
- self._critique_client = ScaleCritiqueClient(
442
- scale_credentials, self._build_cache_config("scale"), scale_project
443
- )
444
- else:
445
- raise ValueError(
446
- "CritiqueClient is not configured; set critiqueType to 'mturk',"
447
- "'mturk-sandbox', 'surgeai', 'scale' or 'random'"
448
- )
449
- return self._critique_client
450
-
451
- def get_huggingface_client(self) -> "helm.proxy.clients.huggingface_client.HuggingFaceClient":
452
- """Get the Hugging Face client."""
453
- from helm.proxy.clients.huggingface_client import HuggingFaceClient
454
-
455
- if self._huggingface_client:
456
- assert isinstance(self._huggingface_client, HuggingFaceClient)
457
- return self._huggingface_client
458
- cache_config = self._build_cache_config("huggingface")
459
- tokenizer = HuggingFaceTokenizer(cache_config)
460
- self._huggingface_client = HuggingFaceClient(tokenizer=tokenizer, cache_config=cache_config)
461
- return self._huggingface_client
@@ -1,100 +0,0 @@
1
- from typing import List, Optional
2
-
3
- import openai as gooseai
4
-
5
- from helm.common.cache import CacheConfig
6
- from helm.common.request import (
7
- wrap_request_time,
8
- EMBEDDING_UNAVAILABLE_REQUEST_RESULT,
9
- Request,
10
- RequestResult,
11
- Sequence,
12
- Token,
13
- )
14
- from helm.proxy.tokenizers.tokenizer import Tokenizer
15
- from .client import CachingClient, truncate_sequence
16
- from .openai_client import ORIGINAL_COMPLETION_ATTRIBUTES
17
-
18
-
19
- class GooseAIClient(CachingClient):
20
- """
21
- GooseAI API Client
22
- - How to use the API: https://goose.ai/docs/api
23
- - Supported models: https://goose.ai/docs/models
24
- """
25
-
26
- def __init__(self, api_key: str, tokenizer: Tokenizer, cache_config: CacheConfig, org_id: Optional[str] = None):
27
- super().__init__(cache_config=cache_config, tokenizer=tokenizer)
28
- self.org_id: Optional[str] = org_id
29
- self.api_key: str = api_key
30
- self.api_base: str = "https://api.goose.ai/v1"
31
-
32
- def make_request(self, request: Request) -> RequestResult:
33
- """
34
- Request parameters for GooseAI API documented here: https://goose.ai/docs/api/completions
35
- The only OpenAI API parameter not supported is `best_of`.
36
- """
37
- # Embedding not supported for this model
38
- if request.embedding:
39
- return EMBEDDING_UNAVAILABLE_REQUEST_RESULT
40
-
41
- raw_request = {
42
- "engine": request.model_engine,
43
- "prompt": request.prompt,
44
- "temperature": request.temperature,
45
- "n": request.num_completions,
46
- "max_tokens": request.max_tokens,
47
- "logprobs": request.top_k_per_token,
48
- "stop": request.stop_sequences or None, # API doesn't like empty list
49
- "top_p": request.top_p,
50
- "presence_penalty": request.presence_penalty,
51
- "frequency_penalty": request.frequency_penalty,
52
- "echo": request.echo_prompt,
53
- }
54
-
55
- try:
56
-
57
- def do_it():
58
- # Following https://beta.openai.com/docs/api-reference/authentication
59
- # `organization` can be set to None.
60
- gooseai.organization = self.org_id
61
- gooseai.api_key = self.api_key
62
- gooseai.api_base = self.api_base
63
- gooseai.api_resources.completion.Completion.__bases__ = ORIGINAL_COMPLETION_ATTRIBUTES
64
- return gooseai.Completion.create(**raw_request)
65
-
66
- cache_key = CachingClient.make_cache_key(raw_request, request)
67
- response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
68
- except gooseai.error.OpenAIError as e:
69
- error: str = f"OpenAI (GooseAI API) error: {e}"
70
- return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
71
-
72
- completions: List[Sequence] = []
73
- for raw_completion in response["choices"]:
74
- sequence_logprob = 0
75
- tokens: List[Token] = []
76
-
77
- raw_data = raw_completion["logprobs"]
78
- for text, logprob, top_logprobs in zip(
79
- raw_data["tokens"], raw_data["token_logprobs"], raw_data["top_logprobs"]
80
- ):
81
- tokens.append(Token(text=text, logprob=logprob or 0, top_logprobs=dict(top_logprobs or {})))
82
- sequence_logprob += logprob or 0
83
-
84
- completion = Sequence(
85
- text=raw_completion["text"],
86
- logprob=sequence_logprob,
87
- tokens=tokens,
88
- finish_reason={"reason": raw_completion["finish_reason"]},
89
- )
90
- completion = truncate_sequence(completion, request)
91
- completions.append(completion)
92
-
93
- return RequestResult(
94
- success=True,
95
- cached=cached,
96
- request_time=response["request_time"],
97
- request_datetime=response.get("request_datetime"),
98
- completions=completions,
99
- embedding=[],
100
- )