crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/METADATA +144 -36
  2. crfm_helm-0.5.0.dist-info/RECORD +642 -0
  3. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +37 -2
  5. helm/benchmark/adaptation/adapters/adapter.py +4 -42
  6. helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
  7. helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -0
  8. helm/benchmark/adaptation/adapters/generation_adapter.py +2 -0
  9. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +21 -4
  10. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +12 -5
  11. helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +1 -0
  12. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
  13. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
  14. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +5 -1
  15. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +1 -0
  16. helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -0
  17. helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
  18. helm/benchmark/adaptation/adapters/test_generation_adapter.py +59 -14
  19. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +40 -5
  20. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +78 -10
  21. helm/benchmark/adaptation/common_adapter_specs.py +376 -0
  22. helm/benchmark/adaptation/prompt.py +7 -1
  23. helm/benchmark/adaptation/request_state.py +6 -1
  24. helm/benchmark/adaptation/scenario_state.py +6 -2
  25. helm/benchmark/annotation/annotator.py +43 -0
  26. helm/benchmark/annotation/annotator_factory.py +61 -0
  27. helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
  28. helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
  29. helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
  30. helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
  31. helm/benchmark/annotation/test_annotator_factory.py +26 -0
  32. helm/benchmark/annotation/test_dummy_annotator.py +44 -0
  33. helm/benchmark/annotation_executor.py +124 -0
  34. helm/benchmark/augmentations/cleva_perturbation.py +7 -14
  35. helm/benchmark/augmentations/contraction_expansion_perturbation.py +3 -3
  36. helm/benchmark/augmentations/contrast_sets_perturbation.py +0 -3
  37. helm/benchmark/augmentations/data_augmenter.py +0 -2
  38. helm/benchmark/augmentations/dialect_perturbation.py +2 -2
  39. helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
  40. helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
  41. helm/benchmark/augmentations/gender_perturbation.py +3 -3
  42. helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
  43. helm/benchmark/augmentations/mild_mix_perturbation.py +2 -2
  44. helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
  45. helm/benchmark/augmentations/person_name_perturbation.py +0 -7
  46. helm/benchmark/augmentations/perturbation.py +20 -7
  47. helm/benchmark/augmentations/perturbation_description.py +1 -1
  48. helm/benchmark/augmentations/space_perturbation.py +2 -2
  49. helm/benchmark/augmentations/suffix_perturbation.py +29 -0
  50. helm/benchmark/augmentations/synonym_perturbation.py +2 -2
  51. helm/benchmark/augmentations/test_perturbation.py +11 -7
  52. helm/benchmark/augmentations/translate_perturbation.py +30 -0
  53. helm/benchmark/augmentations/typos_perturbation.py +2 -2
  54. helm/benchmark/config_registry.py +38 -0
  55. helm/benchmark/executor.py +46 -16
  56. helm/benchmark/huggingface_registration.py +37 -7
  57. helm/benchmark/metrics/basic_metrics.py +172 -641
  58. helm/benchmark/metrics/bbq_metrics.py +3 -4
  59. helm/benchmark/metrics/bias_metrics.py +6 -6
  60. helm/benchmark/metrics/classification_metrics.py +11 -8
  61. helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
  62. helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
  63. helm/benchmark/metrics/code_metrics.py +4 -3
  64. helm/benchmark/metrics/code_metrics_helper.py +0 -2
  65. helm/benchmark/metrics/common_metric_specs.py +167 -0
  66. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
  67. helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
  68. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
  69. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
  70. helm/benchmark/metrics/disinformation_metrics.py +6 -112
  71. helm/benchmark/metrics/dry_run_metrics.py +5 -3
  72. helm/benchmark/metrics/efficiency_metrics.py +206 -0
  73. helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
  74. helm/benchmark/metrics/evaluate_reference_metrics.py +376 -0
  75. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
  76. helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
  77. helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
  78. helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
  79. helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
  80. helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
  81. helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
  82. helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
  83. helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
  84. helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
  85. helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
  86. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
  87. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
  88. helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
  89. helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
  90. helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
  91. helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
  92. helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
  93. helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
  94. helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
  95. helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
  96. helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
  97. helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
  98. helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
  99. helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
  100. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
  101. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
  102. helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
  103. helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
  104. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
  105. helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
  106. helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
  107. helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
  108. helm/benchmark/metrics/language_modeling_metrics.py +99 -0
  109. helm/benchmark/metrics/machine_translation_metrics.py +5 -5
  110. helm/benchmark/metrics/metric.py +93 -172
  111. helm/benchmark/metrics/metric_name.py +0 -1
  112. helm/benchmark/metrics/metric_service.py +16 -0
  113. helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
  114. helm/benchmark/metrics/ranking_metrics.py +6 -7
  115. helm/benchmark/metrics/reference_metric.py +148 -0
  116. helm/benchmark/metrics/summac/model_summac.py +0 -2
  117. helm/benchmark/metrics/summarization_metrics.py +8 -8
  118. helm/benchmark/metrics/test_classification_metrics.py +9 -6
  119. helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
  120. helm/benchmark/metrics/test_evaluate_reference_metrics.py +30 -0
  121. helm/benchmark/metrics/test_metric.py +2 -2
  122. helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +1 -1
  123. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +13 -3
  124. helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
  125. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -0
  126. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +9 -2
  127. helm/benchmark/metrics/toxicity_metrics.py +1 -1
  128. helm/benchmark/metrics/toxicity_utils.py +23 -0
  129. helm/benchmark/metrics/unitxt_metrics.py +81 -0
  130. helm/benchmark/metrics/vision_language/__init__.py +0 -0
  131. helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
  132. helm/benchmark/metrics/vision_language/image_metrics.py +450 -0
  133. helm/benchmark/metrics/vision_language/image_utils.py +100 -0
  134. helm/benchmark/model_deployment_registry.py +164 -41
  135. helm/benchmark/model_metadata_registry.py +181 -35
  136. helm/benchmark/multi_gpu_runner.py +133 -0
  137. helm/benchmark/presentation/contamination.py +3 -3
  138. helm/benchmark/presentation/create_plots.py +8 -7
  139. helm/benchmark/presentation/run_display.py +50 -17
  140. helm/benchmark/presentation/schema.py +28 -46
  141. helm/benchmark/presentation/summarize.py +213 -96
  142. helm/benchmark/presentation/table.py +8 -8
  143. helm/benchmark/presentation/test_contamination.py +2 -2
  144. helm/benchmark/presentation/test_run_entry.py +14 -9
  145. helm/benchmark/presentation/test_summarize.py +5 -0
  146. helm/benchmark/run.py +66 -54
  147. helm/benchmark/run_expander.py +342 -31
  148. helm/benchmark/run_spec.py +93 -0
  149. helm/benchmark/run_spec_factory.py +162 -0
  150. helm/benchmark/run_specs/__init__.py +0 -0
  151. helm/benchmark/{run_specs.py → run_specs/classic_run_specs.py} +217 -1330
  152. helm/benchmark/run_specs/cleva_run_specs.py +277 -0
  153. helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
  154. helm/benchmark/run_specs/heim_run_specs.py +623 -0
  155. helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
  156. helm/benchmark/run_specs/lite_run_specs.py +307 -0
  157. helm/benchmark/run_specs/simple_run_specs.py +104 -0
  158. helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
  159. helm/benchmark/run_specs/vlm_run_specs.py +501 -0
  160. helm/benchmark/runner.py +116 -69
  161. helm/benchmark/runner_config_registry.py +21 -0
  162. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  163. helm/benchmark/scenarios/bold_scenario.py +2 -2
  164. helm/benchmark/scenarios/cleva_scenario.py +43 -46
  165. helm/benchmark/scenarios/code_scenario.py +3 -2
  166. helm/benchmark/scenarios/commonsense_scenario.py +171 -191
  167. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
  168. helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
  169. helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
  170. helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
  171. helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
  172. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
  173. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
  174. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
  175. helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
  176. helm/benchmark/scenarios/entity_matching_scenario.py +1 -1
  177. helm/benchmark/scenarios/image_generation/__init__.py +0 -0
  178. helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
  179. helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
  180. helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
  181. helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
  182. helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
  183. helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
  184. helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
  185. helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
  186. helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
  187. helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
  188. helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
  189. helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
  190. helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
  191. helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
  192. helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
  193. helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
  194. helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
  195. helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
  196. helm/benchmark/scenarios/imdb_scenario.py +0 -1
  197. helm/benchmark/scenarios/legalbench_scenario.py +123 -0
  198. helm/benchmark/scenarios/live_qa_scenario.py +94 -0
  199. helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
  200. helm/benchmark/scenarios/lsat_qa_scenario.py +4 -2
  201. helm/benchmark/scenarios/math_scenario.py +19 -2
  202. helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
  203. helm/benchmark/scenarios/numeracy_scenario.py +3 -3
  204. helm/benchmark/scenarios/opinions_qa_scenario.py +6 -10
  205. helm/benchmark/scenarios/raft_scenario.py +2 -6
  206. helm/benchmark/scenarios/scenario.py +14 -2
  207. helm/benchmark/scenarios/simple_scenarios.py +122 -1
  208. helm/benchmark/scenarios/test_math_scenario.py +22 -0
  209. helm/benchmark/scenarios/test_scenario.py +6 -3
  210. helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
  211. helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
  212. helm/benchmark/scenarios/the_pile_scenario.py +6 -7
  213. helm/benchmark/scenarios/unitxt_scenario.py +56 -0
  214. helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
  215. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  216. helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
  217. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +92 -0
  218. helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
  219. helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
  220. helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
  221. helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
  222. helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
  223. helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
  224. helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
  225. helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
  226. helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
  227. helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
  228. helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
  229. helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
  230. helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
  231. helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
  232. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
  233. helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
  234. helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
  235. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
  236. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
  237. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +107 -0
  238. helm/benchmark/scenarios/vision_language/vqa_scenario.py +1 -1
  239. helm/benchmark/scenarios/wmt_14_scenario.py +18 -18
  240. helm/benchmark/server.py +59 -2
  241. helm/benchmark/slurm_jobs.py +12 -0
  242. helm/benchmark/slurm_runner.py +79 -51
  243. helm/benchmark/static/benchmarking.js +3 -4
  244. helm/benchmark/static/contamination.yaml +1 -1
  245. helm/benchmark/static/images/organizations/together.png +0 -0
  246. helm/benchmark/static/json-urls.js +4 -0
  247. helm/benchmark/static/{schema.yaml → schema_classic.yaml} +346 -930
  248. helm/benchmark/static/schema_instruction_following.yaml +210 -0
  249. helm/benchmark/static/schema_lite.yaml +824 -0
  250. helm/benchmark/static/schema_mmlu.yaml +1507 -0
  251. helm/benchmark/static/schema_unitxt.yaml +428 -0
  252. helm/benchmark/static/schema_vlm.yaml +576 -0
  253. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  254. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  255. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  256. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  257. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  258. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  259. helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
  260. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  261. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  262. helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
  263. helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
  264. helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
  265. helm/benchmark/static_build/assets/index-5088afcb.css +1 -0
  266. helm/benchmark/static_build/assets/index-d839df55.js +9 -0
  267. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  268. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  269. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  270. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  271. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  272. helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
  273. helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
  274. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  275. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  276. helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
  277. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  278. helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  279. helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  280. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  281. helm/benchmark/static_build/config.js +4 -0
  282. helm/benchmark/static_build/index.html +20 -0
  283. helm/benchmark/test_data_preprocessor.py +3 -3
  284. helm/benchmark/test_model_deployment_definition.py +90 -0
  285. helm/benchmark/test_run_expander.py +1 -1
  286. helm/benchmark/tokenizer_config_registry.py +10 -14
  287. helm/benchmark/window_services/ai21_window_service.py +22 -33
  288. helm/benchmark/window_services/cohere_window_service.py +1 -63
  289. helm/benchmark/window_services/default_window_service.py +2 -35
  290. helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
  291. helm/benchmark/window_services/ice_window_service.py +0 -34
  292. helm/benchmark/window_services/image_generation/__init__.py +0 -0
  293. helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
  294. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
  295. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
  296. helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
  297. helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
  298. helm/benchmark/window_services/local_window_service.py +21 -4
  299. helm/benchmark/window_services/no_decoding_window_service.py +32 -0
  300. helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
  301. helm/benchmark/window_services/test_bloom_window_service.py +2 -1
  302. helm/benchmark/window_services/test_cohere_window_service.py +2 -1
  303. helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
  304. helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
  305. helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
  306. helm/benchmark/window_services/test_gptj_window_service.py +3 -2
  307. helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
  308. helm/benchmark/window_services/test_ice_window_service.py +2 -1
  309. helm/benchmark/window_services/test_openai_window_service.py +2 -1
  310. helm/benchmark/window_services/test_opt_window_service.py +3 -2
  311. helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
  312. helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
  313. helm/benchmark/window_services/test_t511b_window_service.py +2 -1
  314. helm/benchmark/window_services/test_ul2_window_service.py +2 -1
  315. helm/benchmark/window_services/test_utils.py +3 -2
  316. helm/benchmark/window_services/test_yalm_window_service.py +2 -1
  317. helm/benchmark/window_services/window_service.py +42 -0
  318. helm/benchmark/window_services/window_service_factory.py +24 -269
  319. helm/benchmark/window_services/yalm_window_service.py +0 -27
  320. helm/clients/__init__.py +0 -0
  321. helm/{proxy/clients → clients}/ai21_client.py +5 -12
  322. helm/clients/aleph_alpha_client.py +112 -0
  323. helm/{proxy/clients → clients}/anthropic_client.py +213 -24
  324. helm/clients/auto_client.py +215 -0
  325. helm/clients/bedrock_client.py +128 -0
  326. helm/clients/bedrock_utils.py +72 -0
  327. helm/{proxy/clients → clients}/client.py +67 -55
  328. helm/clients/clip_score_client.py +49 -0
  329. helm/clients/clip_scorers/__init__.py +0 -0
  330. helm/clients/clip_scorers/base_clip_scorer.py +18 -0
  331. helm/clients/clip_scorers/clip_scorer.py +50 -0
  332. helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
  333. helm/{proxy/clients → clients}/cohere_client.py +6 -17
  334. helm/clients/gcs_client.py +82 -0
  335. helm/{proxy/clients → clients}/google_client.py +7 -8
  336. helm/clients/google_translate_client.py +35 -0
  337. helm/{proxy/clients → clients}/http_model_client.py +6 -10
  338. helm/{proxy/clients → clients}/huggingface_client.py +134 -92
  339. helm/clients/image_generation/__init__.py +0 -0
  340. helm/clients/image_generation/adobe_vision_client.py +78 -0
  341. helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
  342. helm/clients/image_generation/cogview2/__init__.py +0 -0
  343. helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
  344. helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
  345. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
  346. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
  347. helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
  348. helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
  349. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
  350. helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
  351. helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
  352. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
  353. helm/clients/image_generation/cogview2_client.py +191 -0
  354. helm/clients/image_generation/dalle2_client.py +192 -0
  355. helm/clients/image_generation/dalle3_client.py +108 -0
  356. helm/clients/image_generation/dalle_mini/__init__.py +3 -0
  357. helm/clients/image_generation/dalle_mini/data.py +442 -0
  358. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
  359. helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
  360. helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
  361. helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
  362. helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
  363. helm/clients/image_generation/dalle_mini/model/text.py +251 -0
  364. helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
  365. helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
  366. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
  367. helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
  368. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
  369. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
  370. helm/clients/image_generation/dalle_mini_client.py +190 -0
  371. helm/clients/image_generation/deep_floyd_client.py +78 -0
  372. helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
  373. helm/clients/image_generation/image_generation_client_utils.py +9 -0
  374. helm/clients/image_generation/lexica_client.py +86 -0
  375. helm/clients/image_generation/mindalle/__init__.py +0 -0
  376. helm/clients/image_generation/mindalle/models/__init__.py +216 -0
  377. helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
  378. helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
  379. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
  380. helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
  381. helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
  382. helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
  383. helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
  384. helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
  385. helm/clients/image_generation/mindalle/utils/config.py +129 -0
  386. helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
  387. helm/clients/image_generation/mindalle/utils/utils.py +89 -0
  388. helm/clients/image_generation/mindalle_client.py +115 -0
  389. helm/clients/image_generation/nudity_check_client.py +64 -0
  390. helm/clients/image_generation/together_image_generation_client.py +111 -0
  391. helm/{proxy/clients → clients}/lit_gpt_client.py +7 -5
  392. helm/{proxy/clients → clients}/megatron_client.py +13 -7
  393. helm/clients/mistral_client.py +134 -0
  394. helm/clients/moderation_api_client.py +109 -0
  395. helm/clients/open_lm_client.py +43 -0
  396. helm/clients/openai_client.py +302 -0
  397. helm/{proxy/clients → clients}/palmyra_client.py +15 -12
  398. helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
  399. helm/clients/simple_client.py +64 -0
  400. helm/{proxy/clients → clients}/test_auto_client.py +15 -15
  401. helm/clients/test_client.py +100 -0
  402. helm/clients/test_huggingface_client.py +70 -0
  403. helm/clients/test_simple_client.py +19 -0
  404. helm/{proxy/clients → clients}/test_together_client.py +23 -12
  405. helm/{proxy/clients → clients}/together_client.py +18 -71
  406. helm/clients/vertexai_client.py +391 -0
  407. helm/clients/vision_language/__init__.py +0 -0
  408. helm/clients/vision_language/huggingface_vlm_client.py +104 -0
  409. helm/{proxy/clients → clients}/vision_language/idefics_client.py +59 -52
  410. helm/clients/vision_language/open_flamingo/__init__.py +2 -0
  411. helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
  412. helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
  413. helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
  414. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
  415. helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
  416. helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
  417. helm/clients/vision_language/open_flamingo_client.py +155 -0
  418. helm/clients/vision_language/qwen_vlm_client.py +171 -0
  419. helm/clients/vllm_client.py +46 -0
  420. helm/common/cache.py +24 -179
  421. helm/common/cache_backend_config.py +47 -0
  422. helm/common/clip_score_request.py +41 -0
  423. helm/common/concurrency.py +32 -0
  424. helm/common/credentials_utils.py +28 -0
  425. helm/common/file_caches/__init__.py +0 -0
  426. helm/common/file_caches/file_cache.py +16 -0
  427. helm/common/file_caches/local_file_cache.py +61 -0
  428. helm/common/file_caches/test_local_file_cache.py +25 -0
  429. helm/common/file_upload_request.py +27 -0
  430. helm/common/general.py +29 -10
  431. helm/common/image_generation_parameters.py +25 -0
  432. helm/common/images_utils.py +24 -1
  433. helm/common/key_value_store.py +113 -0
  434. helm/common/media_object.py +13 -0
  435. helm/common/moderations_api_request.py +71 -0
  436. helm/common/mongo_key_value_store.py +88 -0
  437. helm/common/multimodal_request_utils.py +31 -0
  438. helm/common/nudity_check_request.py +29 -0
  439. helm/common/object_spec.py +2 -2
  440. helm/common/request.py +36 -27
  441. helm/common/test_general.py +6 -0
  442. helm/common/tokenization_request.py +6 -3
  443. helm/config/__init__.py +0 -0
  444. helm/config/model_deployments.yaml +1942 -0
  445. helm/config/model_metadata.yaml +2201 -0
  446. helm/config/tokenizer_configs.yaml +362 -0
  447. helm/proxy/accounts.py +31 -4
  448. helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
  449. helm/proxy/critique/model_critique_client.py +13 -5
  450. helm/proxy/example_queries.py +29 -17
  451. helm/proxy/retry.py +8 -2
  452. helm/proxy/server.py +77 -5
  453. helm/proxy/services/remote_service.py +31 -0
  454. helm/proxy/services/server_service.py +103 -20
  455. helm/proxy/services/service.py +34 -2
  456. helm/proxy/services/test_remote_service.py +7 -6
  457. helm/proxy/services/test_service.py +27 -18
  458. helm/proxy/test_accounts.py +32 -0
  459. helm/proxy/token_counters/auto_token_counter.py +37 -37
  460. helm/proxy/token_counters/test_auto_token_counter.py +164 -0
  461. helm/proxy/token_counters/token_counter.py +3 -5
  462. helm/py.typed +0 -0
  463. helm/tokenizers/__init__.py +0 -0
  464. helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
  465. helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +3 -1
  466. helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +17 -11
  467. helm/tokenizers/auto_tokenizer.py +93 -0
  468. helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +8 -2
  469. helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
  470. helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
  471. helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +56 -60
  472. helm/tokenizers/simple_tokenizer.py +33 -0
  473. helm/tokenizers/test_anthropic_tokenizer.py +82 -0
  474. helm/tokenizers/test_huggingface_tokenizer.py +136 -0
  475. helm/tokenizers/test_simple_tokenizer.py +33 -0
  476. helm/tokenizers/vertexai_tokenizer.py +97 -0
  477. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
  478. helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
  479. helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
  480. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
  481. crfm_helm-0.3.0.dist-info/RECORD +0 -396
  482. helm/benchmark/vlm_run_specs.py +0 -71
  483. helm/benchmark/window_services/anthropic_window_service.py +0 -68
  484. helm/benchmark/window_services/bloom_window_service.py +0 -35
  485. helm/benchmark/window_services/flan_t5_window_service.py +0 -29
  486. helm/benchmark/window_services/gpt2_window_service.py +0 -32
  487. helm/benchmark/window_services/gptj_window_service.py +0 -38
  488. helm/benchmark/window_services/gptneox_window_service.py +0 -41
  489. helm/benchmark/window_services/http_model_window_service.py +0 -28
  490. helm/benchmark/window_services/huggingface_window_service.py +0 -59
  491. helm/benchmark/window_services/lit_gpt_window_service.py +0 -27
  492. helm/benchmark/window_services/llama_window_service.py +0 -28
  493. helm/benchmark/window_services/luminous_window_service.py +0 -67
  494. helm/benchmark/window_services/megatron_window_service.py +0 -10
  495. helm/benchmark/window_services/mt_nlg_window_service.py +0 -27
  496. helm/benchmark/window_services/openai_window_service.py +0 -13
  497. helm/benchmark/window_services/opt_window_service.py +0 -35
  498. helm/benchmark/window_services/palmyra_window_service.py +0 -45
  499. helm/benchmark/window_services/remote_window_service.py +0 -48
  500. helm/benchmark/window_services/santacoder_window_service.py +0 -27
  501. helm/benchmark/window_services/starcoder_window_service.py +0 -27
  502. helm/benchmark/window_services/t0pp_window_service.py +0 -35
  503. helm/benchmark/window_services/t511b_window_service.py +0 -30
  504. helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
  505. helm/benchmark/window_services/ul2_window_service.py +0 -30
  506. helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
  507. helm/benchmark/window_services/wider_openai_window_service.py +0 -52
  508. helm/proxy/clients/aleph_alpha_client.py +0 -99
  509. helm/proxy/clients/auto_client.py +0 -461
  510. helm/proxy/clients/goose_ai_client.py +0 -100
  511. helm/proxy/clients/microsoft_client.py +0 -182
  512. helm/proxy/clients/openai_client.py +0 -206
  513. helm/proxy/clients/remote_model_registry.py +0 -28
  514. helm/proxy/clients/simple_client.py +0 -61
  515. helm/proxy/clients/test_anthropic_client.py +0 -63
  516. helm/proxy/clients/test_client.py +0 -31
  517. helm/proxy/clients/test_huggingface_client.py +0 -87
  518. helm/proxy/models.py +0 -963
  519. helm/proxy/test_models.py +0 -27
  520. helm/proxy/token_counters/ai21_token_counter.py +0 -20
  521. helm/proxy/token_counters/cohere_token_counter.py +0 -13
  522. helm/proxy/token_counters/free_token_counter.py +0 -12
  523. helm/proxy/token_counters/gooseai_token_counter.py +0 -24
  524. helm/proxy/token_counters/openai_token_counter.py +0 -22
  525. helm/proxy/token_counters/test_ai21_token_counter.py +0 -86
  526. helm/proxy/token_counters/test_openai_token_counter.py +0 -79
  527. helm/proxy/tokenizers/simple_tokenizer.py +0 -32
  528. helm/proxy/tokenizers/test_huggingface_tokenizer.py +0 -56
  529. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/LICENSE +0 -0
  530. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/entry_points.txt +0 -0
  531. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/top_level.txt +0 -0
  532. /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
  533. /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
  534. /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
  535. /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
  536. /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
  537. /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
  538. /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
  539. /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
  540. /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
  541. /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
  542. /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
  543. /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
  544. /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
  545. /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
  546. /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
@@ -0,0 +1,1942 @@
1
+ # This file defines all the model deployments that are supported by the Helm API.
2
+ # Some models have several deployments, each with different parameters.
3
+
4
+ # If you want to add a new deployment, you can technically do it here but we recommend
5
+ # you to do it in prod_env/model_deployments.yaml instead.
6
+
7
+ # Follow the template of this file to add a new deployment. You can copy paste this to get started:
8
+ # # This file defines all the model deployments that you do not want to be public.
9
+ # model_deployments: [] # Leave empty to disable private model deployments
10
+
11
+ model_deployments:
12
+ - name: simple/model1
13
+ model_name: simple/model1
14
+ tokenizer_name: simple/tokenizer1
15
+ max_sequence_length: 2048
16
+ client_spec:
17
+ class_name: "helm.clients.simple_client.SimpleClient"
18
+
19
+ # Adobe
20
+ - name: adobe/giga-gan
21
+ model_name: adobe/giga-gan
22
+ tokenizer_name: openai/clip-vit-large-patch14
23
+ max_sequence_length: 75
24
+ client_spec:
25
+ class_name: "helm.clients.image_generation.adobe_vision_client.AdobeVisionClient"
26
+ window_service_spec:
27
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
28
+
29
+ # AI21 Labs
30
+
31
+ # J1 models are Deprecated by AI21 Labs
32
+ # API returns: Detail: Jurassic J1 models are deprecated
33
+ - name: ai21/j1-jumbo
34
+ deprecated: true
35
+ model_name: ai21/j1-jumbo
36
+ tokenizer_name: ai21/j1
37
+ max_sequence_length: 2047
38
+ client_spec:
39
+ class_name: "helm.clients.ai21_client.AI21Client"
40
+ window_service_spec:
41
+ class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
42
+
43
+ - name: ai21/j1-large
44
+ deprecated: true
45
+ model_name: ai21/j1-large
46
+ tokenizer_name: ai21/j1
47
+ max_sequence_length: 2047
48
+ client_spec:
49
+ class_name: "helm.clients.ai21_client.AI21Client"
50
+ window_service_spec:
51
+ class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
52
+
53
+ - name: ai21/j1-grande
54
+ deprecated: true
55
+ model_name: ai21/j1-grande
56
+ tokenizer_name: ai21/j1
57
+ max_sequence_length: 2047
58
+ client_spec:
59
+ class_name: "helm.clients.ai21_client.AI21Client"
60
+ window_service_spec:
61
+ class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
62
+
63
+ - name: ai21/j1-grande-v2-beta
64
+ deprecated: true
65
+ model_name: ai21/j1-grande-v2-beta
66
+ tokenizer_name: ai21/j1
67
+ max_sequence_length: 2047
68
+ client_spec:
69
+ class_name: "helm.clients.ai21_client.AI21Client"
70
+ window_service_spec:
71
+ class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
72
+
73
+ - name: ai21/j2-jumbo
74
+ model_name: ai21/j2-jumbo
75
+ tokenizer_name: ai21/j1
76
+ max_sequence_length: 6000
77
+ client_spec:
78
+ class_name: "helm.clients.ai21_client.AI21Client"
79
+ window_service_spec:
80
+ class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
81
+
82
+ - name: ai21/j2-large
83
+ model_name: ai21/j2-large
84
+ tokenizer_name: ai21/j1
85
+ max_sequence_length: 2047
86
+ client_spec:
87
+ class_name: "helm.clients.ai21_client.AI21Client"
88
+ window_service_spec:
89
+ class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
90
+
91
+ - name: ai21/j2-grande
92
+ model_name: ai21/j2-grande
93
+ tokenizer_name: ai21/j1
94
+ max_sequence_length: 2047
95
+ client_spec:
96
+ class_name: "helm.clients.ai21_client.AI21Client"
97
+ window_service_spec:
98
+ class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
99
+
100
+ # Aleph Alpha
101
+ - name: AlephAlpha/luminous-base
102
+ model_name: AlephAlpha/luminous-base
103
+ tokenizer_name: AlephAlpha/luminous-base
104
+ max_sequence_length: 2048
105
+ client_spec:
106
+ class_name: "helm.clients.aleph_alpha_client.AlephAlphaClient"
107
+
108
+ - name: AlephAlpha/luminous-extended
109
+ model_name: AlephAlpha/luminous-extended
110
+ tokenizer_name: AlephAlpha/luminous-extended
111
+ max_sequence_length: 2048
112
+ client_spec:
113
+ class_name: "helm.clients.aleph_alpha_client.AlephAlphaClient"
114
+
115
+ - name: AlephAlpha/luminous-supreme
116
+ model_name: AlephAlpha/luminous-supreme
117
+ tokenizer_name: AlephAlpha/luminous-supreme
118
+ max_sequence_length: 2048
119
+ client_spec:
120
+ class_name: "helm.clients.aleph_alpha_client.AlephAlphaClient"
121
+
122
+ # TODO: Add luminous-world once it is released
123
+
124
+ - name: AlephAlpha/m-vader
125
+ model_name: AlephAlpha/m-vader
126
+ tokenizer_name: openai/clip-vit-large-patch14
127
+ max_sequence_length: 75
128
+ client_spec:
129
+ class_name: "helm.clients.image_generation.aleph_alpha_image_generation_client.AlephAlphaImageGenerationClient"
130
+ window_service_spec:
131
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
132
+
133
+
134
+ # Amazon
135
+ - name: amazon/titan-text-lite-v1
136
+ model_name: amazon/titan-text-lite-v1
137
+ tokenizer_name: huggingface/gpt2
138
+ max_sequence_length: 4000
139
+ client_spec:
140
+ class_name: "helm.clients.bedrock_client.BedrockTitanClient"
141
+
142
+ - name: amazon/titan-tg1-large
143
+ model_name: amazon/titan-tg1-large
144
+ tokenizer_name: huggingface/gpt2
145
+ max_sequence_length: 8000
146
+ client_spec:
147
+ class_name: "helm.clients.bedrock_client.BedrockTitanClient"
148
+
149
+ - name: amazon/titan-text-express-v1
150
+ model_name: amazon/titan-text-express-v1
151
+ tokenizer_name: huggingface/gpt2
152
+ max_sequence_length: 8000
153
+ client_spec:
154
+ class_name: "helm.clients.bedrock_client.BedrockTitanClient"
155
+
156
+
157
+ # Anthropic
158
+ - name: anthropic/claude-v1.3
159
+ model_name: anthropic/claude-v1.3
160
+ tokenizer_name: anthropic/claude
161
+ max_sequence_length: 8000
162
+ max_sequence_and_generated_tokens_length: 9016
163
+ client_spec:
164
+ class_name: "helm.clients.anthropic_client.AnthropicClient"
165
+
166
+ - name: anthropic/claude-instant-v1
167
+ model_name: anthropic/claude-instant-v1
168
+ tokenizer_name: anthropic/claude
169
+ max_sequence_length: 8000
170
+ max_sequence_and_generated_tokens_length: 9016
171
+ client_spec:
172
+ class_name: "helm.clients.anthropic_client.AnthropicClient"
173
+
174
+ - name: anthropic/claude-instant-1.2
175
+ model_name: anthropic/claude-instant-1.2
176
+ tokenizer_name: anthropic/claude
177
+ max_sequence_length: 8000
178
+ max_sequence_and_generated_tokens_length: 9016
179
+ client_spec:
180
+ class_name: "helm.clients.anthropic_client.AnthropicClient"
181
+
182
+ - name: anthropic/claude-2.0
183
+ model_name: anthropic/claude-2.0
184
+ tokenizer_name: anthropic/claude
185
+ max_sequence_length: 8000
186
+ max_sequence_and_generated_tokens_length: 9016
187
+ client_spec:
188
+ class_name: "helm.clients.anthropic_client.AnthropicClient"
189
+
190
+ - name: anthropic/claude-2.1
191
+ model_name: anthropic/claude-2.1
192
+ tokenizer_name: anthropic/claude
193
+ max_sequence_length: 8000
194
+ max_sequence_and_generated_tokens_length: 9016
195
+ client_spec:
196
+ class_name: "helm.clients.anthropic_client.AnthropicClient"
197
+
198
+ - name: anthropic/claude-3-sonnet-20240229
199
+ model_name: anthropic/claude-3-sonnet-20240229
200
+ tokenizer_name: anthropic/claude
201
+ max_sequence_length: 200000
202
+ client_spec:
203
+ class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
204
+
205
+ - name: anthropic/claude-3-haiku-20240307
206
+ model_name: anthropic/claude-3-haiku-20240307
207
+ tokenizer_name: anthropic/claude
208
+ max_sequence_length: 200000
209
+ client_spec:
210
+ class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
211
+
212
+ - name: anthropic/claude-3-opus-20240229
213
+ model_name: anthropic/claude-3-opus-20240229
214
+ tokenizer_name: anthropic/claude
215
+ max_sequence_length: 200000
216
+ client_spec:
217
+ class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
218
+
219
+ - name: anthropic/stanford-online-all-v4-s3
220
+ deprecated: true # Closed model, not accessible via API
221
+ model_name: anthropic/stanford-online-all-v4-s3
222
+ tokenizer_name: huggingface/gpt2
223
+ max_sequence_length: 8192
224
+ client_spec:
225
+ class_name: "helm.clients.anthropic_client.AnthropicLegacyClient"
226
+
227
+ # Cohere
228
+ - name: cohere/xlarge-20220609
229
+ model_name: cohere/xlarge-20220609
230
+ tokenizer_name: cohere/cohere
231
+ max_sequence_length: 2047
232
+ max_request_length: 2048
233
+ client_spec:
234
+ class_name: "helm.clients.cohere_client.CohereClient"
235
+ window_service_spec:
236
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
237
+
238
+ - name: cohere/large-20220720
239
+ model_name: cohere/large-20220720
240
+ tokenizer_name: cohere/cohere
241
+ max_sequence_length: 2047
242
+ max_request_length: 2048
243
+ client_spec:
244
+ class_name: "helm.clients.cohere_client.CohereClient"
245
+ window_service_spec:
246
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
247
+
248
+ - name: cohere/medium-20220720
249
+ model_name: cohere/medium-20220720
250
+ tokenizer_name: cohere/cohere
251
+ max_sequence_length: 2047
252
+ max_request_length: 2048
253
+ client_spec:
254
+ class_name: "helm.clients.cohere_client.CohereClient"
255
+ window_service_spec:
256
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
257
+
258
+ - name: cohere/small-20220720
259
+ model_name: cohere/small-20220720
260
+ tokenizer_name: cohere/cohere
261
+ max_sequence_length: 2047
262
+ max_request_length: 2048
263
+ client_spec:
264
+ class_name: "helm.clients.cohere_client.CohereClient"
265
+ window_service_spec:
266
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
267
+
268
+ - name: cohere/xlarge-20221108
269
+ model_name: cohere/xlarge-20221108
270
+ tokenizer_name: cohere/cohere
271
+ max_sequence_length: 2047
272
+ max_request_length: 2048
273
+ client_spec:
274
+ class_name: "helm.clients.cohere_client.CohereClient"
275
+ window_service_spec:
276
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
277
+
278
+ - name: cohere/medium-20221108
279
+ model_name: cohere/medium-20221108
280
+ tokenizer_name: cohere/cohere
281
+ max_sequence_length: 2047
282
+ max_request_length: 2048
283
+ client_spec:
284
+ class_name: "helm.clients.cohere_client.CohereClient"
285
+ window_service_spec:
286
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
287
+
288
+ - name: cohere/command-medium-beta
289
+ model_name: cohere/command-medium-beta
290
+ tokenizer_name: cohere/cohere
291
+ max_sequence_length: 2019
292
+ max_request_length: 2020
293
+ client_spec:
294
+ class_name: "helm.clients.cohere_client.CohereClient"
295
+ window_service_spec:
296
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
297
+
298
+ - name: cohere/command-xlarge-beta
299
+ model_name: cohere/command-xlarge-beta
300
+ tokenizer_name: cohere/cohere
301
+ max_sequence_length: 2019
302
+ max_request_length: 2020
303
+ client_spec:
304
+ class_name: "helm.clients.cohere_client.CohereClient"
305
+ window_service_spec:
306
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
307
+
308
+ - name: cohere/command
309
+ model_name: cohere/command
310
+ tokenizer_name: cohere/cohere
311
+ max_sequence_length: 2019 # TODO: verify this
312
+ max_request_length: 2020 # TODO: verify this
313
+ client_spec:
314
+ class_name: "helm.clients.cohere_client.CohereClient"
315
+ window_service_spec:
316
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
317
+
318
+ - name: cohere/command-light
319
+ model_name: cohere/command-light
320
+ tokenizer_name: cohere/cohere
321
+ max_sequence_length: 2019 # TODO: verify this
322
+ max_request_length: 2020 # TODO: verify this
323
+ client_spec:
324
+ class_name: "helm.clients.cohere_client.CohereClient"
325
+ window_service_spec:
326
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
327
+
328
+ # Craiyon
329
+
330
+ - name: craiyon/dalle-mini
331
+ model_name: craiyon/dalle-mini
332
+ tokenizer_name: openai/clip-vit-large-patch14
333
+ max_sequence_length: 75
334
+ client_spec:
335
+ class_name: "helm.clients.image_generation.dalle_mini_client.DALLEMiniClient"
336
+ window_service_spec:
337
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
338
+
339
+ - name: craiyon/dalle-mega
340
+ model_name: craiyon/dalle-mega
341
+ tokenizer_name: openai/clip-vit-large-patch14
342
+ max_sequence_length: 75
343
+ client_spec:
344
+ class_name: "helm.clients.image_generation.dalle_mini_client.DALLEMiniClient"
345
+ window_service_spec:
346
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
347
+
348
+ # Databricks
349
+
350
+ - name: together/dbrx-instruct
351
+ model_name: databricks/dbrx-instruct
352
+ tokenizer_name: databricks/dbrx-instruct
353
+ max_sequence_length: 32767
354
+ client_spec:
355
+ class_name: "helm.clients.together_client.TogetherClient"
356
+
357
+ # DeepFloyd
358
+
359
+ - name: DeepFloyd/IF-I-M-v1.0
360
+ model_name: DeepFloyd/IF-I-M-v1.0
361
+ tokenizer_name: openai/clip-vit-large-patch14
362
+ max_sequence_length: 75
363
+ client_spec:
364
+ class_name: "helm.clients.image_generation.deep_floyd_client.DeepFloydClient"
365
+ window_service_spec:
366
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
367
+
368
+ - name: DeepFloyd/IF-I-L-v1.0
369
+ model_name: DeepFloyd/IF-I-L-v1.0
370
+ tokenizer_name: openai/clip-vit-large-patch14
371
+ max_sequence_length: 75
372
+ client_spec:
373
+ class_name: "helm.clients.image_generation.deep_floyd_client.DeepFloydClient"
374
+ window_service_spec:
375
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
376
+
377
+ - name: DeepFloyd/IF-I-XL-v1.0
378
+ model_name: DeepFloyd/IF-I-XL-v1.0
379
+ tokenizer_name: openai/clip-vit-large-patch14
380
+ max_sequence_length: 75
381
+ client_spec:
382
+ class_name: "helm.clients.image_generation.deep_floyd_client.DeepFloydClient"
383
+ window_service_spec:
384
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
385
+
386
+ # Deepseek
387
+
388
+ - name: together/deepseek-llm-67b-chat
389
+ model_name: deepseek-ai/deepseek-llm-67b-chat
390
+ tokenizer_name: deepseek-ai/deepseek-llm-67b-chat
391
+ max_sequence_length: 4095
392
+ client_spec:
393
+ class_name: "helm.clients.together_client.TogetherClient"
394
+
395
+ # Gooseai
396
+
397
+ # TODO: Migrate these models to use OpenAIClient
398
+
399
+ ## EleutherAI
400
+ # - name: gooseai/gpt-neo-20b
401
+ # model_name: eleutherai/gpt-neox-20b
402
+ # tokenizer_name: EleutherAI/gpt-neox-20b
403
+ # max_sequence_length: 2048
404
+ # max_request_length: 2049
405
+ # client_spec:
406
+ # class_name: "helm.clients.goose_ai_client.GooseAIClient"
407
+
408
+ # - name: gooseai/gpt-j-6b
409
+ # model_name: eleutherai/gpt-j-6b
410
+ # tokenizer_name: EleutherAI/gpt-j-6B
411
+ # max_sequence_length: 2048
412
+ # max_request_length: 2049
413
+ # client_spec:
414
+ # class_name: "helm.clients.goose_ai_client.GooseAIClient"
415
+
416
+ # Google
417
+ # See: https://cloud.google.com/vertex-ai/docs/generative-ai/learn/model-versioning
418
+
419
+ ## Gemini
420
+ # See: https://ai.google.dev/models/gemini#model_variations
421
+ - name: google/gemini-pro
422
+ model_name: google/gemini-pro
423
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
424
+ max_sequence_length: 30720
425
+ max_sequence_and_generated_tokens_length: 32768 # Officially max_sequence_length + 2048
426
+ client_spec:
427
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
428
+
429
+ - name: google/gemini-1.0-pro-001
430
+ model_name: google/gemini-1.0-pro-001
431
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
432
+ max_sequence_length: 30720
433
+ max_sequence_and_generated_tokens_length: 32768 # Officially max_sequence_length + 2048
434
+ client_spec:
435
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
436
+
437
+ - name: google/gemini-pro-vision
438
+ model_name: google/gemini-pro-vision
439
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
440
+ max_sequence_length: 12288
441
+ max_sequence_and_generated_tokens_length: 16384 # Officially max_sequence_length + 4096, in practice max_output_tokens <= 2048 for vision models
442
+ client_spec:
443
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
444
+
445
+ - name: google/gemini-1.0-pro-vision-001
446
+ model_name: google/gemini-1.0-pro-vision-001
447
+ tokenizer_name: hf-internal-testing/llama-tokenizer
448
+ max_sequence_length: 12288
449
+ max_sequence_and_generated_tokens_length: 16384
450
+ client_spec:
451
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
452
+
453
+ - name: google/gemini-1.5-pro-preview-0409
454
+ model_name: google/gemini-1.5-pro-preview-0409
455
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
456
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
457
+ # TODO: Max output tokens: 8192
458
+ client_spec:
459
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
460
+
461
+ ## Gemma
462
+ - name: together/gemma-2b
463
+ model_name: google/gemma-2b
464
+ tokenizer_name: google/gemma-2b
465
+ max_sequence_length: 7167
466
+ client_spec:
467
+ class_name: "helm.clients.together_client.TogetherClient"
468
+
469
+ - name: together/gemma-2b-it
470
+ model_name: google/gemma-2b-it
471
+ tokenizer_name: google/gemma-2b
472
+ max_sequence_length: 7167
473
+ client_spec:
474
+ class_name: "helm.clients.together_client.TogetherClient"
475
+
476
+ - name: together/gemma-7b
477
+ model_name: google/gemma-7b
478
+ tokenizer_name: google/gemma-2b
479
+ max_sequence_length: 7167
480
+ client_spec:
481
+ class_name: "helm.clients.together_client.TogetherClient"
482
+
483
+ - name: together/gemma-7b-it
484
+ model_name: google/gemma-7b-it
485
+ tokenizer_name: google/gemma-2b
486
+ max_sequence_length: 7167
487
+ client_spec:
488
+ class_name: "helm.clients.together_client.TogetherClient"
489
+
490
+ ## PaLM 2
491
+ - name: google/text-bison@001
492
+ model_name: google/text-bison@001
493
+ tokenizer_name: google/text-bison@001
494
+ max_sequence_length: 6000 # Officially 8192
495
+ max_sequence_and_generated_tokens_length: 7000 # Officially 9216
496
+ client_spec:
497
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
498
+ window_service_spec:
499
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
500
+
501
+ - name: google/text-bison@002
502
+ model_name: google/text-bison@002
503
+ tokenizer_name: google/text-bison@002
504
+ max_sequence_length: 6000 # Officially 8192
505
+ max_sequence_and_generated_tokens_length: 9216
506
+ client_spec:
507
+ class_name: "helm.proxy.clients.vertexai_client.VertexAITextClient"
508
+ window_service_spec:
509
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
510
+
511
+ - name: google/text-bison-32k
512
+ model_name: google/text-bison-32k
513
+ tokenizer_name: google/text-bison@001
514
+ max_sequence_length: 32000
515
+ max_sequence_and_generated_tokens_length: 32000
516
+ client_spec:
517
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
518
+ window_service_spec:
519
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
520
+
521
+
522
+ - name: google/text-unicorn@001
523
+ model_name: google/text-unicorn@001
524
+ tokenizer_name: google/text-unicorn@001
525
+ max_sequence_length: 6000 # Officially 8192
526
+ max_sequence_and_generated_tokens_length: 7000 # Officially 9216
527
+ client_spec:
528
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
529
+ window_service_spec:
530
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
531
+
532
+ - name: google/code-bison@001
533
+ model_name: google/code-bison@001
534
+ tokenizer_name: google/mt5-base # TODO #2188: change to actual tokenizer
535
+ max_sequence_length: 6000 # Officially 6144
536
+ max_sequence_and_generated_tokens_length: 7000 # Officially 7168
537
+ client_spec:
538
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
539
+ window_service_spec:
540
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
541
+
542
+ - name: google/code-bison@002
543
+ model_name: google/code-bison@002
544
+ tokenizer_name: google/mt5-base # TODO #2188: change to actual tokenizer
545
+ max_sequence_length: 6000 # Officially 6144
546
+ max_sequence_and_generated_tokens_length: 7168
547
+ client_spec:
548
+ class_name: "helm.proxy.clients.vertexai_client.VertexAITextClient"
549
+ window_service_spec:
550
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
551
+
552
+ - name: google/code-bison-32k
553
+ model_name: google/code-bison-32k
554
+ tokenizer_name: google/mt5-base # TODO #2188: change to actual tokenizer
555
+ max_sequence_length: 32000
556
+ max_sequence_and_generated_tokens_length: 32000
557
+ client_spec:
558
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
559
+ window_service_spec:
560
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
561
+
562
+ # HuggingFace
563
+
564
+ ## Bigcode
565
+ - name: huggingface/santacoder
566
+ model_name: bigcode/santacoder
567
+ tokenizer_name: bigcode/santacoder
568
+ max_sequence_length: 2048
569
+ client_spec:
570
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
571
+
572
+ - name: huggingface/starcoder
573
+ model_name: bigcode/starcoder
574
+ tokenizer_name: bigcode/starcoder
575
+ max_sequence_length: 8192
576
+ client_spec:
577
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
578
+
579
+ ## Databricks
580
+ - name: huggingface/dolly-v2-3b
581
+ model_name: databricks/dolly-v2-3b
582
+ tokenizer_name: EleutherAI/gpt-neox-20b
583
+ max_sequence_length: 2048
584
+ client_spec:
585
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
586
+
587
+ - name: huggingface/dolly-v2-7b
588
+ model_name: databricks/dolly-v2-7b
589
+ tokenizer_name: EleutherAI/gpt-neox-20b
590
+ max_sequence_length: 2048
591
+ client_spec:
592
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
593
+
594
+ - name: huggingface/dolly-v2-12b
595
+ model_name: databricks/dolly-v2-12b
596
+ tokenizer_name: EleutherAI/gpt-neox-20b
597
+ max_sequence_length: 2048
598
+ client_spec:
599
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
600
+
601
+ ## EleutherAI
602
+ - name: huggingface/pythia-1b-v0
603
+ model_name: eleutherai/pythia-1b-v0
604
+ tokenizer_name: EleutherAI/gpt-neox-20b
605
+ max_sequence_length: 2048
606
+ client_spec:
607
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
608
+
609
+ - name: huggingface/pythia-2.8b-v0
610
+ model_name: eleutherai/pythia-2.8b-v0
611
+ tokenizer_name: EleutherAI/gpt-neox-20b
612
+ max_sequence_length: 2048
613
+ client_spec:
614
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
615
+
616
+ - name: huggingface/pythia-6.9b
617
+ model_name: eleutherai/pythia-6.9b
618
+ tokenizer_name: EleutherAI/gpt-neox-20b
619
+ max_sequence_length: 2048
620
+ client_spec:
621
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
622
+
623
+ - name: huggingface/pythia-12b-v0
624
+ model_name: eleutherai/pythia-12b-v0
625
+ tokenizer_name: EleutherAI/gpt-neox-20b
626
+ max_sequence_length: 2048
627
+ client_spec:
628
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
629
+
630
+ - name: huggingface/gpt-j-6b
631
+ model_name: eleutherai/gpt-j-6b
632
+ tokenizer_name: EleutherAI/gpt-j-6B
633
+ max_sequence_length: 2048
634
+ max_request_length: 2049
635
+ client_spec:
636
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
637
+
638
+ - name: huggingface/gpt-neox-20b
639
+ model_name: eleutherai/gpt-neox-20b
640
+ tokenizer_name: EleutherAI/gpt-neox-20b
641
+ max_sequence_length: 2048
642
+ max_request_length: 2049
643
+ client_spec:
644
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
645
+
646
+ ## LMSYS
647
+ - name: huggingface/vicuna-7b-v1.3
648
+ model_name: lmsys/vicuna-7b-v1.3
649
+ tokenizer_name: hf-internal-testing/llama-tokenizer
650
+ max_sequence_length: 2048
651
+ client_spec:
652
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
653
+
654
+ - name: huggingface/vicuna-13b-v1.3
655
+ model_name: lmsys/vicuna-13b-v1.3
656
+ tokenizer_name: hf-internal-testing/llama-tokenizer
657
+ max_sequence_length: 2048
658
+ client_spec:
659
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
660
+
661
+ ## Meta
662
+ - name: huggingface/opt-175b
663
+ model_name: meta/opt-175b
664
+ tokenizer_name: facebook/opt-66b
665
+ max_sequence_length: 2048
666
+ client_spec:
667
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
668
+ args:
669
+ pretrained_model_name_or_path: facebook/opt-175b
670
+
671
+ - name: huggingface/opt-66b
672
+ model_name: meta/opt-66b
673
+ tokenizer_name: facebook/opt-66b
674
+ max_sequence_length: 2048
675
+ client_spec:
676
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
677
+ args:
678
+ pretrained_model_name_or_path: facebook/opt-66b
679
+
680
+ - name: huggingface/opt-6.7b
681
+ model_name: meta/opt-6.7b
682
+ tokenizer_name: facebook/opt-66b
683
+ max_sequence_length: 2048
684
+ client_spec:
685
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
686
+ args:
687
+ pretrained_model_name_or_path: facebook/opt-6.7b
688
+
689
+ - name: huggingface/opt-1.3b
690
+ model_name: meta/opt-1.3b
691
+ tokenizer_name: facebook/opt-66b
692
+ max_sequence_length: 2048
693
+ client_spec:
694
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
695
+ args:
696
+ pretrained_model_name_or_path: facebook/opt-1.3b
697
+
698
+ ## Microsoft
699
+ - name: huggingface/llava-1.5-7b-hf
700
+ model_name: microsoft/llava-1.5-7b-hf
701
+ tokenizer_name: hf-internal-testing/llama-tokenizer
702
+ max_sequence_length: 2048
703
+ client_spec:
704
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
705
+
706
+ - name: huggingface/llava-1.5-13b-hf
707
+ model_name: microsoft/llava-1.5-13b-hf
708
+ tokenizer_name: hf-internal-testing/llama-tokenizer
709
+ max_sequence_length: 2048
710
+ client_spec:
711
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
712
+
713
+ ## OpenFlamingo
714
+ - name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
715
+ model_name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
716
+ tokenizer_name: anas-awadalla/mpt-7b
717
+ max_sequence_length: 2048
718
+ client_spec:
719
+ class_name: "helm.clients.vision_language.open_flamingo_client.OpenFlamingoClient"
720
+ args:
721
+ checkpoint_path: "openflamingo/OpenFlamingo-9B-vitl-mpt7b"
722
+ tokenizer_name: "anas-awadalla-2/mpt-7b"
723
+ cross_attn_every_n_layers: 4
724
+
725
+ - name: together/phi-2
726
+ model_name: microsoft/phi-2
727
+ tokenizer_name: microsoft/phi-2
728
+ max_sequence_length: 2047
729
+ client_spec:
730
+ class_name: "helm.clients.together_client.TogetherClient"
731
+
732
+ ## Mistral AI
733
+ - name: huggingface/bakLlava-v1-hf
734
+ model_name: mistralai/bakLlava-v1-hf
735
+ tokenizer_name: hf-internal-testing/llama-tokenizer
736
+ max_sequence_length: 2048
737
+ client_spec:
738
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
739
+
740
+ ## MosaicML
741
+ - name: huggingface/mpt-7b
742
+ model_name: mosaicml/mpt-7b
743
+ tokenizer_name: EleutherAI/gpt-neox-20b
744
+ max_sequence_length: 2048
745
+ client_spec:
746
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
747
+ args:
748
+ pretrained_model_name_or_path: mosaicml/mpt-7b
749
+
750
+ - name: huggingface/mpt-instruct-7b
751
+ model_name: mosaicml/mpt-instruct-7b
752
+ tokenizer_name: EleutherAI/gpt-neox-20b
753
+ max_sequence_length: 2048
754
+ client_spec:
755
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
756
+ args:
757
+ pretrained_model_name_or_path: mosaicml/mpt-7b-instruct
758
+
759
+ - name: huggingface/mpt-30b
760
+ model_name: mosaicml/mpt-30b
761
+ tokenizer_name: EleutherAI/gpt-neox-20b
762
+ max_sequence_length: 2048
763
+ client_spec:
764
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
765
+
766
+ - name: huggingface/mpt-instruct-30b
767
+ model_name: mosaicml/mpt-instruct-30b
768
+ tokenizer_name: EleutherAI/gpt-neox-20b
769
+ max_sequence_length: 2048
770
+ client_spec:
771
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
772
+ args:
773
+ pretrained_model_name_or_path: mosaicml/mpt-30b-instruct
774
+
775
+ ## OpenAI
776
+ - name: huggingface/gpt2
777
+ model_name: openai/gpt2
778
+ tokenizer_name: huggingface/gpt2
779
+ max_sequence_length: 1024
780
+ max_request_length: 1025
781
+ client_spec:
782
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
783
+ args:
784
+ pretrained_model_name_or_path: openai-community/gpt2
785
+
786
+ ## StabilityAI
787
+ - name: huggingface/stablelm-base-alpha-3b
788
+ model_name: stabilityai/stablelm-base-alpha-3b
789
+ tokenizer_name: EleutherAI/gpt-neox-20b
790
+ max_sequence_length: 4096
791
+ client_spec:
792
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
793
+
794
+ - name: huggingface/stablelm-base-alpha-7b
795
+ model_name: stabilityai/stablelm-base-alpha-7b
796
+ tokenizer_name: EleutherAI/gpt-neox-20b
797
+ max_sequence_length: 4096
798
+ client_spec:
799
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
800
+
801
+ ## Text-to-Image Diffusion Models
802
+
803
+ - name: huggingface/dreamlike-diffusion-v1-0
804
+ model_name: huggingface/dreamlike-diffusion-v1-0
805
+ tokenizer_name: openai/clip-vit-large-patch14
806
+ max_sequence_length: 75
807
+ client_spec:
808
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
809
+ window_service_spec:
810
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
811
+
812
+ - name: huggingface/dreamlike-photoreal-v2-0
813
+ model_name: huggingface/dreamlike-photoreal-v2-0
814
+ tokenizer_name: openai/clip-vit-large-patch14
815
+ max_sequence_length: 75
816
+ client_spec:
817
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
818
+ window_service_spec:
819
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
820
+
821
+ - name: huggingface/openjourney-v1-0
822
+ model_name: huggingface/openjourney-v1-0
823
+ tokenizer_name: openai/clip-vit-large-patch14
824
+ max_sequence_length: 75
825
+ client_spec:
826
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
827
+ window_service_spec:
828
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
829
+
830
+ - name: huggingface/openjourney-v2-0
831
+ model_name: huggingface/openjourney-v2-0
832
+ tokenizer_name: openai/clip-vit-large-patch14
833
+ max_sequence_length: 75
834
+ client_spec:
835
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
836
+ window_service_spec:
837
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
838
+
839
+ - name: huggingface/redshift-diffusion
840
+ model_name: huggingface/redshift-diffusion
841
+ tokenizer_name: openai/clip-vit-large-patch14
842
+ max_sequence_length: 75
843
+ client_spec:
844
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
845
+ window_service_spec:
846
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
847
+
848
+ - name: huggingface/promptist-stable-diffusion-v1-4
849
+ model_name: huggingface/promptist-stable-diffusion-v1-4
850
+ tokenizer_name: openai/clip-vit-large-patch14
851
+ max_sequence_length: 75
852
+ client_spec:
853
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
854
+ window_service_spec:
855
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
856
+
857
+ - name: huggingface/stable-diffusion-v1-4
858
+ model_name: huggingface/stable-diffusion-v1-4
859
+ tokenizer_name: openai/clip-vit-large-patch14
860
+ max_sequence_length: 75
861
+ client_spec:
862
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
863
+ window_service_spec:
864
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
865
+
866
+ - name: huggingface/stable-diffusion-v1-5
867
+ model_name: huggingface/stable-diffusion-v1-5
868
+ tokenizer_name: openai/clip-vit-large-patch14
869
+ max_sequence_length: 75
870
+ client_spec:
871
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
872
+ window_service_spec:
873
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
874
+
875
+ - name: huggingface/stable-diffusion-v2-base
876
+ model_name: huggingface/stable-diffusion-v2-base
877
+ tokenizer_name: openai/clip-vit-large-patch14
878
+ max_sequence_length: 75
879
+ client_spec:
880
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
881
+ window_service_spec:
882
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
883
+
884
+ - name: huggingface/stable-diffusion-v2-1-base
885
+ model_name: huggingface/stable-diffusion-v2-1-base
886
+ tokenizer_name: openai/clip-vit-large-patch14
887
+ max_sequence_length: 75
888
+ client_spec:
889
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
890
+ window_service_spec:
891
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
892
+
893
+ - name: huggingface/stable-diffusion-safe-weak
894
+ model_name: huggingface/stable-diffusion-safe-weak
895
+ tokenizer_name: openai/clip-vit-large-patch14
896
+ max_sequence_length: 75
897
+ client_spec:
898
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
899
+ window_service_spec:
900
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
901
+
902
+ - name: huggingface/stable-diffusion-safe-medium
903
+ model_name: huggingface/stable-diffusion-safe-medium
904
+ tokenizer_name: openai/clip-vit-large-patch14
905
+ max_sequence_length: 75
906
+ client_spec:
907
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
908
+ window_service_spec:
909
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
910
+
911
+ - name: huggingface/stable-diffusion-safe-strong
912
+ model_name: huggingface/stable-diffusion-safe-strong
913
+ tokenizer_name: openai/clip-vit-large-patch14
914
+ max_sequence_length: 75
915
+ client_spec:
916
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
917
+ window_service_spec:
918
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
919
+
920
+ - name: huggingface/stable-diffusion-safe-max
921
+ model_name: huggingface/stable-diffusion-safe-max
922
+ tokenizer_name: openai/clip-vit-large-patch14
923
+ max_sequence_length: 75
924
+ client_spec:
925
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
926
+ window_service_spec:
927
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
928
+
929
+ - name: huggingface/vintedois-diffusion-v0-1
930
+ model_name: huggingface/vintedois-diffusion-v0-1
931
+ tokenizer_name: openai/clip-vit-large-patch14
932
+ max_sequence_length: 75
933
+ client_spec:
934
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
935
+ window_service_spec:
936
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
937
+
938
+ - name: segmind/Segmind-Vega
939
+ model_name: segmind/Segmind-Vega
940
+ tokenizer_name: openai/clip-vit-large-patch14
941
+ max_sequence_length: 75
942
+ client_spec:
943
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
944
+ window_service_spec:
945
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
946
+
947
+ - name: segmind/SSD-1B
948
+ model_name: segmind/SSD-1B
949
+ tokenizer_name: openai/clip-vit-large-patch14
950
+ max_sequence_length: 75
951
+ client_spec:
952
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
953
+ window_service_spec:
954
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
955
+
956
+ - name: stabilityai/stable-diffusion-xl-base-1.0
957
+ model_name: stabilityai/stable-diffusion-xl-base-1.0
958
+ tokenizer_name: openai/clip-vit-large-patch14
959
+ max_sequence_length: 75
960
+ client_spec:
961
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
962
+ window_service_spec:
963
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
964
+
965
+ # HuggingFaceM4
966
+ - name: HuggingFaceM4/idefics-9b
967
+ model_name: HuggingFaceM4/idefics-9b
968
+ tokenizer_name: HuggingFaceM4/idefics-9b
969
+ max_sequence_length: 2048
970
+ client_spec:
971
+ class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
972
+
973
+ - name: HuggingFaceM4/idefics-9b-instruct
974
+ model_name: HuggingFaceM4/idefics-9b-instruct
975
+ tokenizer_name: HuggingFaceM4/idefics-9b-instruct
976
+ max_sequence_length: 2048
977
+ client_spec:
978
+ class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
979
+
980
+ - name: HuggingFaceM4/idefics-80b
981
+ model_name: HuggingFaceM4/idefics-80b
982
+ tokenizer_name: HuggingFaceM4/idefics-80b
983
+ max_sequence_length: 2048
984
+ client_spec:
985
+ class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
986
+
987
+ - name: HuggingFaceM4/idefics-80b-instruct
988
+ model_name: HuggingFaceM4/idefics-80b-instruct
989
+ tokenizer_name: HuggingFaceM4/idefics-80b-instruct
990
+ max_sequence_length: 2048
991
+ client_spec:
992
+ class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
993
+
994
+ # Lexica
995
+ - name: lexica/search-stable-diffusion-1.5
996
+ model_name: lexica/search-stable-diffusion-1.5
997
+ tokenizer_name: openai/clip-vit-large-patch14
998
+ max_sequence_length: 200
999
+ client_spec:
1000
+ class_name: "helm.clients.image_generation.lexica_client.LexicaClient"
1001
+ window_service_spec:
1002
+ class_name: "helm.benchmark.window_services.image_generation.lexica_search_window_service.LexicaSearchWindowService"
1003
+
1004
+ # Kakao
1005
+ - name: kakaobrain/mindall-e
1006
+ model_name: kakaobrain/mindall-e
1007
+ tokenizer_name: openai/clip-vit-large-patch14
1008
+ max_sequence_length: 75
1009
+ client_spec:
1010
+ class_name: "helm.clients.image_generation.mindalle_client.MinDALLEClient"
1011
+ window_service_spec:
1012
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
1013
+
1014
+ # Lighting AI
1015
+ - name: lightningai/lit-gpt
1016
+ model_name: lightningai/lit-gpt
1017
+ tokenizer_name: lightningai/lit-gpt
1018
+ max_sequence_length: 2048
1019
+ client_spec:
1020
+ class_name: "helm.clients.lit_gpt_client.LitGPTClient"
1021
+ args:
1022
+ checkpoint_dir: "" # Path to the checkpoint directory
1023
+ precision: bf16-true
1024
+
1025
+ # Mistral AI
1026
+ - name: mistralai/mistral-tiny
1027
+ model_name: mistralai/mistral-7b-v0.1
1028
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1029
+ max_sequence_length: 32000
1030
+ client_spec:
1031
+ class_name: "helm.clients.mistral_client.MistralAIClient"
1032
+ args:
1033
+ mistral_model: "mistral-tiny"
1034
+
1035
+ - name: mistralai/mistral-small-2402
1036
+ model_name: mistralai/mistral-small-2402
1037
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1038
+ max_sequence_length: 32000
1039
+ client_spec:
1040
+ class_name: "helm.clients.mistral_client.MistralAIClient"
1041
+
1042
+ - name: mistralai/mistral-medium-2312
1043
+ model_name: mistralai/mistral-medium-2312
1044
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1045
+ max_sequence_length: 32000
1046
+ client_spec:
1047
+ class_name: "helm.clients.mistral_client.MistralAIClient"
1048
+
1049
+ - name: mistralai/mistral-large-2402
1050
+ model_name: mistralai/mistral-large-2402
1051
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1052
+ max_sequence_length: 32000
1053
+ client_spec:
1054
+ class_name: "helm.clients.mistral_client.MistralAIClient"
1055
+
1056
+ # Neurips
1057
+ - name: neurips/local
1058
+ model_name: neurips/local
1059
+ tokenizer_name: neurips/local
1060
+ max_sequence_length: 2048
1061
+ client_spec:
1062
+ class_name: "helm.clients.http_model_client.HTTPModelClient"
1063
+
1064
+ # Nvidia
1065
+ - name: nvidia/megatron-gpt2
1066
+ model_name: nvidia/megatron-gpt2
1067
+ tokenizer_name: huggingface/gpt2
1068
+ max_sequence_length: 1024
1069
+ client_spec:
1070
+ class_name: "helm.clients.megatron_client.MegatronClient"
1071
+
1072
+ # OpenAI
1073
+
1074
+ ## GPT 3 Models
1075
+
1076
+ - name: openai/davinci-002
1077
+ model_name: openai/davinci-002
1078
+ tokenizer_name: openai/cl100k_base
1079
+ # Claimed sequence length is 16,384 tokens but we round down to 16,000 tokens
1080
+ # to provide a margin of error.
1081
+ max_sequence_length: 16000
1082
+ client_spec:
1083
+ class_name: "helm.clients.openai_client.OpenAIClient"
1084
+
1085
+ - name: openai/babbage-002
1086
+ model_name: openai/babbage-002
1087
+ tokenizer_name: openai/cl100k_base
1088
+ # Claimed sequence length is 16,384 tokens but we round down to 16,000 tokens
1089
+ # to provide a margin of error.
1090
+ max_sequence_length: 16000
1091
+ client_spec:
1092
+ class_name: "helm.clients.openai_client.OpenAIClient"
1093
+
1094
+ # The list of models can be found here: https://beta.openai.com/docs/engines/gpt-3
1095
+ # DEPRECATED: Announced on July 06 2023 that these models will be shut down on January 04 2024.
1096
+
1097
+ - name: openai/davinci
1098
+ deprecated: true
1099
+ model_name: openai/davinci
1100
+ tokenizer_name: huggingface/gpt2
1101
+ max_sequence_length: 2048
1102
+ max_request_length: 2049
1103
+ client_spec:
1104
+ class_name: "helm.clients.openai_client.OpenAIClient"
1105
+
1106
+ - name: openai/curie
1107
+ deprecated: true
1108
+ model_name: openai/curie
1109
+ tokenizer_name: huggingface/gpt2
1110
+ max_sequence_length: 2048
1111
+ max_request_length: 2049
1112
+ client_spec:
1113
+ class_name: "helm.clients.openai_client.OpenAIClient"
1114
+
1115
+ - name: openai/babbage
1116
+ deprecated: true
1117
+ model_name: openai/babbage
1118
+ tokenizer_name: huggingface/gpt2
1119
+ max_sequence_length: 2048
1120
+ max_request_length: 2049
1121
+ client_spec:
1122
+ class_name: "helm.clients.openai_client.OpenAIClient"
1123
+
1124
+ - name: openai/ada
1125
+ deprecated: true
1126
+ model_name: openai/ada
1127
+ tokenizer_name: huggingface/gpt2
1128
+ max_sequence_length: 2048
1129
+ max_request_length: 2049
1130
+ client_spec:
1131
+ class_name: "helm.clients.openai_client.OpenAIClient"
1132
+
1133
+ - name: openai/text-davinci-003
1134
+ deprecated: true
1135
+ model_name: openai/text-davinci-003
1136
+ tokenizer_name: huggingface/gpt2
1137
+ max_sequence_length: 4000
1138
+ max_request_length: 4001
1139
+ client_spec:
1140
+ class_name: "helm.clients.openai_client.OpenAIClient"
1141
+
1142
+ - name: openai/text-davinci-002
1143
+ deprecated: true
1144
+ model_name: openai/text-davinci-002
1145
+ tokenizer_name: huggingface/gpt2
1146
+ max_sequence_length: 4000
1147
+ max_request_length: 4001
1148
+ client_spec:
1149
+ class_name: "helm.clients.openai_client.OpenAIClient"
1150
+
1151
+ - name: openai/text-davinci-001
1152
+ deprecated: true
1153
+ model_name: openai/text-davinci-001
1154
+ tokenizer_name: huggingface/gpt2
1155
+ max_sequence_length: 2048
1156
+ max_request_length: 2049
1157
+ client_spec:
1158
+ class_name: "helm.clients.openai_client.OpenAIClient"
1159
+
1160
+ - name: openai/text-curie-001
1161
+ deprecated: true
1162
+ model_name: openai/text-curie-001
1163
+ tokenizer_name: huggingface/gpt2
1164
+ max_sequence_length: 2048
1165
+ max_request_length: 2049
1166
+ client_spec:
1167
+ class_name: "helm.clients.openai_client.OpenAIClient"
1168
+
1169
+ - name: openai/text-babbage-001
1170
+ deprecated: true
1171
+ model_name: openai/text-babbage-001
1172
+ tokenizer_name: huggingface/gpt2
1173
+ max_sequence_length: 2048
1174
+ max_request_length: 2049
1175
+ client_spec:
1176
+ class_name: "helm.clients.openai_client.OpenAIClient"
1177
+
1178
+ - name: openai/text-ada-001
1179
+ deprecated: true
1180
+ model_name: openai/text-ada-001
1181
+ tokenizer_name: huggingface/gpt2
1182
+ max_sequence_length: 2048
1183
+ max_request_length: 2049
1184
+ client_spec:
1185
+ class_name: "helm.clients.openai_client.OpenAIClient"
1186
+
1187
+ ## GPT 3.5 Turbo Models
1188
+ # ChatGPT: https://openai.com/blog/chatgpt
1189
+
1190
+ - name: openai/gpt-3.5-turbo-instruct
1191
+ model_name: openai/gpt-3.5-turbo-instruct
1192
+ tokenizer_name: openai/cl100k_base
1193
+ max_sequence_length: 4096
1194
+ max_request_length: 4097
1195
+ client_spec:
1196
+ class_name: "helm.clients.openai_client.OpenAIClient"
1197
+
1198
+ # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable
1199
+ # sequence length is smaller at 4087 with one user input message and one assistant
1200
+ # output message because ChatGPT uses special tokens for message roles and boundaries.
1201
+ # We use a rounded-down sequence length of 4000 to account for these special tokens.
1202
+ - name: openai/gpt-3.5-turbo-0301
1203
+ model_name: openai/gpt-3.5-turbo-0301
1204
+ tokenizer_name: openai/cl100k_base
1205
+ max_sequence_length: 4000
1206
+ max_request_length: 4001
1207
+ client_spec:
1208
+ class_name: "helm.clients.openai_client.OpenAIClient"
1209
+
1210
+ # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable
1211
+ # sequence length is smaller at 4087 with one user input message and one assistant
1212
+ # output message because ChatGPT uses special tokens for message roles and boundaries.
1213
+ # We use a rounded-down sequence length of 4000 to account for these special tokens.
1214
+ - name: openai/gpt-3.5-turbo-0613
1215
+ model_name: openai/gpt-3.5-turbo-0613
1216
+ tokenizer_name: openai/cl100k_base
1217
+ max_sequence_length: 4000
1218
+ max_request_length: 4001
1219
+ client_spec:
1220
+ class_name: "helm.clients.openai_client.OpenAIClient"
1221
+
1222
+ # Claimed length is 16,384; we round down to 16,000 for the same reasons as explained
1223
+ # in the openai/gpt-3.5-turbo-0613 comment
1224
+ - name: openai/gpt-3.5-turbo-16k-0613
1225
+ model_name: openai/gpt-3.5-turbo-16k-0613
1226
+ tokenizer_name: openai/cl100k_base
1227
+ max_sequence_length: 16000
1228
+ max_request_length: 16001
1229
+ client_spec:
1230
+ class_name: "helm.clients.openai_client.OpenAIClient"
1231
+
1232
+ # Claimed length is 16,384; we round down to 16,000 for the same reasons as explained
1233
+ # in the openai/gpt-3.5-turbo-0613 comment
1234
+ - name: openai/gpt-3.5-turbo-1106
1235
+ model_name: openai/gpt-3.5-turbo-1106
1236
+ tokenizer_name: openai/cl100k_base
1237
+ max_sequence_length: 16000
1238
+ max_request_length: 16001
1239
+ client_spec:
1240
+ class_name: "helm.clients.openai_client.OpenAIClient"
1241
+
1242
+ # Claimed length is 16,384; we round down to 16,000 for the same reasons as explained
1243
+ # in the openai/gpt-3.5-turbo-0613 comment
1244
+ - name: openai/gpt-3.5-turbo-0125
1245
+ model_name: openai/gpt-3.5-turbo-0125
1246
+ tokenizer_name: openai/cl100k_base
1247
+ max_sequence_length: 16000
1248
+ client_spec:
1249
+ class_name: "helm.clients.openai_client.OpenAIClient"
1250
+
1251
+ ## GPT 4 Models
1252
+
1253
+ - name: openai/gpt-4-1106-preview
1254
+ model_name: openai/gpt-4-1106-preview
1255
+ tokenizer_name: openai/cl100k_base
1256
+ # According to https://help.openai.com/en/articles/8555510-gpt-4-turbo,
1257
+ # the maximum number of output tokens for this model is 4096
1258
+ # TODO: add max_generated_tokens_length of 4096 https://github.com/stanford-crfm/helm/issues/2098
1259
+ max_sequence_length: 128000
1260
+ max_request_length: 128001
1261
+ client_spec:
1262
+ class_name: "helm.clients.openai_client.OpenAIClient"
1263
+
1264
+ - name: openai/gpt-4-0314
1265
+ model_name: openai/gpt-4-0314
1266
+ tokenizer_name: openai/cl100k_base
1267
+ max_sequence_length: 8192
1268
+ max_request_length: 8193
1269
+ client_spec:
1270
+ class_name: "helm.clients.openai_client.OpenAIClient"
1271
+
1272
+ - name: openai/gpt-4-32k-0314
1273
+ model_name: openai/gpt-4-32k-0314
1274
+ tokenizer_name: openai/cl100k_base
1275
+ max_sequence_length: 32768
1276
+ max_request_length: 32769
1277
+ client_spec:
1278
+ class_name: "helm.clients.openai_client.OpenAIClient"
1279
+
1280
+ - name: openai/gpt-4-0613
1281
+ model_name: openai/gpt-4-0613
1282
+ tokenizer_name: openai/cl100k_base
1283
+ max_sequence_length: 8192
1284
+ max_request_length: 8193
1285
+ client_spec:
1286
+ class_name: "helm.clients.openai_client.OpenAIClient"
1287
+
1288
+ - name: openai/gpt-4-32k-0613
1289
+ model_name: openai/gpt-4-32k-0613
1290
+ tokenizer_name: openai/cl100k_base
1291
+ max_sequence_length: 32768
1292
+ max_request_length: 32769
1293
+ client_spec:
1294
+ class_name: "helm.clients.openai_client.OpenAIClient"
1295
+
1296
+ - name: openai/gpt-4-0125-preview
1297
+ model_name: openai/gpt-4-0125-preview
1298
+ tokenizer_name: openai/cl100k_base
1299
+ # According to https://help.openai.com/en/articles/8555510-gpt-4-turbo,
1300
+ # the maximum number of output tokens for this model is 4096
1301
+ # TODO: add max_generated_tokens_length of 4096 https://github.com/stanford-crfm/helm/issues/2098
1302
+ max_sequence_length: 128000
1303
+ max_request_length: 128001
1304
+ client_spec:
1305
+ class_name: "helm.clients.openai_client.OpenAIClient"
1306
+
1307
+ - name: openai/gpt-4-turbo-2024-04-09
1308
+ model_name: openai/gpt-4-turbo-2024-04-09
1309
+ tokenizer_name: openai/cl100k_base
1310
+ max_sequence_length: 128000
1311
+ client_spec:
1312
+ class_name: "helm.clients.openai_client.OpenAIClient"
1313
+
1314
+ - name: openai/gpt-4-vision-preview
1315
+ model_name: openai/gpt-4-vision-preview
1316
+ tokenizer_name: openai/cl100k_base
1317
+ max_sequence_length: 128000 # According to https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
1318
+ max_request_length: 128001
1319
+ max_sequence_and_generated_tokens_length: 132096
1320
+ client_spec:
1321
+ class_name: "helm.clients.openai_client.OpenAIClient"
1322
+
1323
+ ## Codex Models
1324
+ # DEPRECATED: Codex models have been shut down on March 23 2023.
1325
+
1326
+ - name: openai/code-davinci-002
1327
+ deprecated: true
1328
+ model_name: openai/code-davinci-002
1329
+ tokenizer_name: huggingface/gpt2
1330
+ max_sequence_length: 4000
1331
+ max_request_length: 4001
1332
+ client_spec:
1333
+ class_name: "helm.clients.openai_client.OpenAIClient"
1334
+
1335
+ - name: openai/code-davinci-001
1336
+ deprecated: true
1337
+ model_name: openai/code-davinci-001
1338
+ tokenizer_name: huggingface/gpt2
1339
+ max_sequence_length: 2048
1340
+ max_request_length: 2049
1341
+ client_spec:
1342
+ class_name: "helm.clients.openai_client.OpenAIClient"
1343
+
1344
+ - name: openai/code-cushman-001
1345
+ deprecated: true
1346
+ model_name: openai/code-cushman-001
1347
+ tokenizer_name: huggingface/gpt2
1348
+ max_sequence_length: 2048
1349
+ max_request_length: 2049
1350
+ client_spec:
1351
+ class_name: "helm.clients.openai_client.OpenAIClient"
1352
+
1353
+ ## Text Similarity Models
1354
+ # OpenAI similarity embedding models: https://beta.openai.com/docs/guides/embeddings
1355
+ # The number of parameters is guessed based on the number of parameters of the
1356
+ # corresponding GPT-3 model.
1357
+ # DEPRECATED: Announced on July 06 2023 that first generation embeddings models
1358
+ # will be shut down on January 04 2024.
1359
+
1360
+ - name: openai/text-similarity-davinci-001
1361
+ deprecated: true
1362
+ model_name: openai/text-similarity-davinci-001
1363
+ tokenizer_name: huggingface/gpt2
1364
+ max_sequence_length: 2048
1365
+ max_request_length: 2049
1366
+ client_spec:
1367
+ class_name: "helm.clients.openai_client.OpenAIClient"
1368
+
1369
+ - name: openai/text-similarity-curie-001
1370
+ deprecated: true
1371
+ model_name: openai/text-similarity-curie-001
1372
+ tokenizer_name: huggingface/gpt2
1373
+ max_sequence_length: 2048
1374
+ max_request_length: 2049
1375
+ client_spec:
1376
+ class_name: "helm.clients.openai_client.OpenAIClient"
1377
+
1378
+ - name: openai/text-similarity-babbage-001
1379
+ deprecated: true
1380
+ model_name: openai/text-similarity-babbage-001
1381
+ tokenizer_name: huggingface/gpt2
1382
+ max_sequence_length: 2048
1383
+ max_request_length: 2049
1384
+ client_spec:
1385
+ class_name: "helm.clients.openai_client.OpenAIClient"
1386
+
1387
+ - name: openai/text-similarity-ada-001
1388
+ deprecated: true
1389
+ model_name: openai/text-similarity-ada-001
1390
+ tokenizer_name: huggingface/gpt2
1391
+ max_sequence_length: 2048
1392
+ max_request_length: 2049
1393
+ client_spec:
1394
+ class_name: "helm.clients.openai_client.OpenAIClient"
1395
+
1396
+ # As of 2023-11-07, text-embedding-ada-002 is not deprecated:
1397
+ # "We recommend using text-embedding-ada-002 for nearly all use cases."
1398
+ # Source: https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
1399
+ - name: openai/text-embedding-ada-002
1400
+ model_name: openai/text-embedding-ada-002
1401
+ tokenizer_name: huggingface/gpt2
1402
+ max_sequence_length: 2048
1403
+ max_request_length: 2049
1404
+ client_spec:
1405
+ class_name: "helm.clients.openai_client.OpenAIClient"
1406
+
1407
+ # Text-to-image models
1408
+ - name: openai/dall-e-2
1409
+ model_name: openai/dall-e-2
1410
+ tokenizer_name: openai/clip-vit-large-patch14
1411
+ max_sequence_length: 1000
1412
+ client_spec:
1413
+ class_name: "helm.clients.image_generation.dalle2_client.DALLE2Client"
1414
+ window_service_spec:
1415
+ class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
1416
+
1417
+ - name: openai/dall-e-3
1418
+ model_name: openai/dall-e-3
1419
+ tokenizer_name: openai/clip-vit-large-patch14
1420
+ max_sequence_length: 1000
1421
+ client_spec:
1422
+ class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
1423
+ window_service_spec:
1424
+ class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
1425
+
1426
+ - name: openai/dall-e-3-natural
1427
+ model_name: openai/dall-e-3-natural
1428
+ tokenizer_name: openai/clip-vit-large-patch14
1429
+ max_sequence_length: 1000
1430
+ client_spec:
1431
+ class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
1432
+ window_service_spec:
1433
+ class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
1434
+
1435
+ - name: openai/dall-e-3-hd
1436
+ model_name: openai/dall-e-3-hd
1437
+ tokenizer_name: openai/clip-vit-large-patch14
1438
+ max_sequence_length: 1000
1439
+ client_spec:
1440
+ class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
1441
+ window_service_spec:
1442
+ class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
1443
+
1444
+ - name: openai/dall-e-3-hd-natural
1445
+ model_name: openai/dall-e-3-hd-natural
1446
+ tokenizer_name: openai/clip-vit-large-patch14
1447
+ max_sequence_length: 1000
1448
+ client_spec:
1449
+ class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
1450
+ window_service_spec:
1451
+ class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
1452
+
1453
+ # Together
1454
+ # The list of models served by Together changes often, to check the latest list, visit:
1455
+ # https://docs.together.ai/docs/inference-models
1456
+ # You can also check the playground to check that the live models are working:
1457
+ # https://api.together.xyz/playground
1458
+
1459
+ ## BigScience
1460
+ - name: together/bloom
1461
+ deprecated: true # Removed from Together
1462
+ model_name: bigscience/bloom
1463
+ tokenizer_name: bigscience/bloom
1464
+ max_sequence_length: 2048
1465
+ max_request_length: 2049
1466
+ client_spec:
1467
+ class_name: "helm.clients.together_client.TogetherClient"
1468
+
1469
+ - name: together/t0pp
1470
+ deprecated: true # Removed from Together
1471
+ model_name: bigscience/t0pp
1472
+ tokenizer_name: bigscience/T0pp
1473
+ max_sequence_length: 1024
1474
+ client_spec:
1475
+ class_name: "helm.clients.together_client.TogetherClient"
1476
+ window_service_spec:
1477
+ class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
1478
+
1479
+ ## Google
1480
+ - name: together/t5-11b
1481
+ deprecated: true # Removed from Together
1482
+ model_name: google/t5-11b
1483
+ tokenizer_name: google/t5-11b
1484
+ max_sequence_length: 511
1485
+ client_spec:
1486
+ class_name: "helm.clients.together_client.TogetherClient"
1487
+ window_service_spec:
1488
+ class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
1489
+
1490
+ - name: together/flan-t5-xxl
1491
+ deprecated: true # Removed from Together
1492
+ model_name: google/flan-t5-xxl
1493
+ tokenizer_name: google/flan-t5-xxl
1494
+ max_sequence_length: 511
1495
+ client_spec:
1496
+ class_name: "helm.clients.together_client.TogetherClient"
1497
+ window_service_spec:
1498
+ class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
1499
+
1500
+ - name: together/ul2
1501
+ deprecated: true # Removed from Together
1502
+ model_name: google/ul2
1503
+ tokenizer_name: google/ul2
1504
+ max_sequence_length: 511
1505
+ client_spec:
1506
+ class_name: "helm.clients.together_client.TogetherClient"
1507
+ window_service_spec:
1508
+ class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
1509
+
1510
+ ## Meta
1511
+ - name: together/llama-7b
1512
+ model_name: meta/llama-7b
1513
+ tokenizer_name: hf-internal-testing/llama-tokenizer
1514
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1515
+ client_spec:
1516
+ class_name: "helm.clients.together_client.TogetherClient"
1517
+ args:
1518
+ together_model: huggyllama/llama-7b
1519
+
1520
+ - name: together/llama-13b
1521
+ model_name: meta/llama-13b
1522
+ tokenizer_name: hf-internal-testing/llama-tokenizer
1523
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1524
+ client_spec:
1525
+ class_name: "helm.clients.together_client.TogetherClient"
1526
+ args:
1527
+ together_model: huggyllama/llama-13b
1528
+
1529
+ - name: together/llama-30b
1530
+ model_name: meta/llama-30b
1531
+ tokenizer_name: hf-internal-testing/llama-tokenizer
1532
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1533
+ client_spec:
1534
+ class_name: "helm.clients.together_client.TogetherClient"
1535
+ args:
1536
+ together_model: huggyllama/llama-30b
1537
+
1538
+ - name: together/llama-65b
1539
+ model_name: meta/llama-65b
1540
+ tokenizer_name: hf-internal-testing/llama-tokenizer
1541
+ max_sequence_length: 2047 # Subtract 1 tokens to work around a off-by-one bug in Together's input validation token counting (#2080)
1542
+ client_spec:
1543
+ class_name: "helm.clients.together_client.TogetherClient"
1544
+ args:
1545
+ together_model: huggyllama/llama-65b
1546
+
1547
+ - name: together/llama-2-7b
1548
+ model_name: meta/llama-2-7b
1549
+ tokenizer_name: meta-llama/Llama-2-7b-hf
1550
+ max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
1551
+ client_spec:
1552
+ class_name: "helm.clients.together_client.TogetherClient"
1553
+ args:
1554
+ together_model: togethercomputer/llama-2-7b
1555
+
1556
+ - name: together/llama-2-13b
1557
+ model_name: meta/llama-2-13b
1558
+ tokenizer_name: meta-llama/Llama-2-7b-hf
1559
+ max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
1560
+ client_spec:
1561
+ class_name: "helm.clients.together_client.TogetherClient"
1562
+ args:
1563
+ together_model: togethercomputer/llama-2-13b
1564
+
1565
+ - name: together/llama-2-70b
1566
+ model_name: meta/llama-2-70b
1567
+ tokenizer_name: meta-llama/Llama-2-7b-hf
1568
+ max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
1569
+ client_spec:
1570
+ class_name: "helm.clients.together_client.TogetherClient"
1571
+ args:
1572
+ together_model: togethercomputer/llama-2-70b
1573
+
1574
+ - name: together/llama-3-8b
1575
+ model_name: meta/llama-3-8b
1576
+ tokenizer_name: meta/llama-3-8b
1577
+ max_sequence_length: 8191
1578
+ client_spec:
1579
+ class_name: "helm.clients.together_client.TogetherClient"
1580
+ args:
1581
+ together_model: meta-llama/Meta-Llama-3-8B
1582
+
1583
+ - name: together/llama-3-70b
1584
+ model_name: meta/llama-3-70b
1585
+ tokenizer_name: meta/llama-3-8b
1586
+ max_sequence_length: 8191
1587
+ client_spec:
1588
+ class_name: "helm.clients.together_client.TogetherClient"
1589
+ args:
1590
+ together_model: meta-llama/Meta-Llama-3-70B
1591
+
1592
+ # 01.AI
1593
+ - name: together/yi-6b
1594
+ model_name: 01-ai/yi-6b
1595
+ tokenizer_name: 01-ai/Yi-6B
1596
+ max_sequence_length: 4095
1597
+ client_spec:
1598
+ class_name: "helm.clients.together_client.TogetherClient"
1599
+ args:
1600
+ together_model: zero-one-ai/Yi-6B
1601
+
1602
+ - name: together/yi-34b
1603
+ model_name: 01-ai/yi-34b
1604
+ tokenizer_name: 01-ai/Yi-6B
1605
+ max_sequence_length: 4095
1606
+ client_spec:
1607
+ class_name: "helm.clients.together_client.TogetherClient"
1608
+ args:
1609
+ together_model: zero-one-ai/Yi-34B
1610
+
1611
+
1612
+ # Allen Institute for AI
1613
+ - name: together/olmo-7b
1614
+ model_name: allenai/olmo-7b
1615
+ tokenizer_name: allenai/olmo-7b
1616
+ max_sequence_length: 2047
1617
+ client_spec:
1618
+ class_name: "helm.clients.together_client.TogetherClient"
1619
+
1620
+ - name: together/olmo-7b-twin-2t
1621
+ model_name: allenai/olmo-7b-twin-2t
1622
+ tokenizer_name: allenai/olmo-7b
1623
+ max_sequence_length: 2047
1624
+ client_spec:
1625
+ class_name: "helm.clients.together_client.TogetherClient"
1626
+
1627
+ - name: together/olmo-7b-instruct
1628
+ model_name: allenai/olmo-7b-instruct
1629
+ tokenizer_name: allenai/olmo-7b
1630
+ max_sequence_length: 2047
1631
+ client_spec:
1632
+ class_name: "helm.clients.together_client.TogetherClient"
1633
+
1634
+
1635
+ ## MistralAI
1636
+ - name: together/mistral-7b-v0.1
1637
+ model_name: mistralai/mistral-7b-v0.1
1638
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1639
+ max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1640
+ client_spec:
1641
+ class_name: "helm.clients.together_client.TogetherClient"
1642
+ args:
1643
+ together_model: mistralai/Mistral-7B-v0.1
1644
+
1645
+ - name: together/mixtral-8x7b-32kseqlen
1646
+ model_name: mistralai/mixtral-8x7b-32kseqlen
1647
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1648
+ max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1649
+ client_spec:
1650
+ class_name: "helm.clients.together_client.TogetherClient"
1651
+ args:
1652
+ together_model: mistralai/mixtral-8x7b-32kseqlen
1653
+
1654
+ - name: together/mixtral-8x7b-instruct-v0.1
1655
+ model_name: mistralai/mixtral-8x7b-instruct-v0.1
1656
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1657
+ max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1658
+ client_spec:
1659
+ class_name: "helm.clients.together_client.TogetherClient"
1660
+
1661
+ - name: together/mixtral-8x22b
1662
+ model_name: mistralai/mixtral-8x22b
1663
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1664
+ max_sequence_length: 65535
1665
+ client_spec:
1666
+ class_name: "helm.clients.together_client.TogetherClient"
1667
+
1668
+ ## Stanford
1669
+ - name: together/alpaca-7b
1670
+ model_name: stanford/alpaca-7b
1671
+ tokenizer_name: hf-internal-testing/llama-tokenizer
1672
+ max_sequence_length: 2048
1673
+ client_spec:
1674
+ class_name: "helm.clients.together_client.TogetherClient"
1675
+ args:
1676
+ together_model: togethercomputer/alpaca-7b
1677
+
1678
+ ## Tiiuae
1679
+ - name: together/falcon-7b
1680
+ model_name: tiiuae/falcon-7b
1681
+ tokenizer_name: tiiuae/falcon-7b
1682
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1683
+ client_spec:
1684
+ class_name: "helm.clients.together_client.TogetherClient"
1685
+ args:
1686
+ together_model: togethercomputer/falcon-7b
1687
+
1688
+ - name: together/falcon-7b-instruct
1689
+ model_name: tiiuae/falcon-7b-instruct
1690
+ tokenizer_name: tiiuae/falcon-7b
1691
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1692
+ client_spec:
1693
+ class_name: "helm.clients.together_client.TogetherClient"
1694
+ args:
1695
+ together_model: togethercomputer/falcon-7b-instruct
1696
+
1697
+ - name: together/falcon-40b
1698
+ model_name: tiiuae/falcon-40b
1699
+ tokenizer_name: tiiuae/falcon-7b
1700
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1701
+ client_spec:
1702
+ class_name: "helm.clients.together_client.TogetherClient"
1703
+ args:
1704
+ together_model: togethercomputer/falcon-40b
1705
+
1706
+ - name: together/falcon-40b-instruct
1707
+ model_name: tiiuae/falcon-40b-instruct
1708
+ tokenizer_name: tiiuae/falcon-7b
1709
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1710
+ client_spec:
1711
+ class_name: "helm.clients.together_client.TogetherClient"
1712
+ args:
1713
+ together_model: togethercomputer/falcon-40b-instruct
1714
+
1715
+ ## Together
1716
+ # These are models fine-tuned by Together (and not simply hosted by Together).
1717
+ - name: together/gpt-jt-6b-v1
1718
+ model_name: together/gpt-jt-6b-v1
1719
+ tokenizer_name: EleutherAI/gpt-j-6B
1720
+ max_sequence_length: 2048
1721
+ max_request_length: 2049
1722
+ client_spec:
1723
+ class_name: "helm.clients.together_client.TogetherClient"
1724
+ args:
1725
+ together_model: togethercomputer/GPT-JT-6B-v1
1726
+
1727
+ - name: together/gpt-neoxt-chat-base-20b
1728
+ model_name: together/gpt-neoxt-chat-base-20b
1729
+ tokenizer_name: EleutherAI/gpt-neox-20b
1730
+ max_sequence_length: 2048
1731
+ max_request_length: 2049
1732
+ client_spec:
1733
+ class_name: "helm.clients.together_client.TogetherClient"
1734
+ args:
1735
+ together_model: togethercomputer/GPT-NeoXT-Chat-Base-20B
1736
+
1737
+ - name: together/redpajama-incite-base-3b-v1
1738
+ model_name: together/redpajama-incite-base-3b-v1
1739
+ tokenizer_name: EleutherAI/gpt-neox-20b
1740
+ max_sequence_length: 2048
1741
+ max_request_length: 2049
1742
+ client_spec:
1743
+ class_name: "helm.clients.together_client.TogetherClient"
1744
+ args:
1745
+ together_model: togethercomputer/RedPajama-INCITE-Base-3B-v1
1746
+
1747
+ - name: together/redpajama-incite-instruct-3b-v1
1748
+ model_name: together/redpajama-incite-instruct-3b-v1
1749
+ tokenizer_name: EleutherAI/gpt-neox-20b
1750
+ max_sequence_length: 2048
1751
+ max_request_length: 2049
1752
+ client_spec:
1753
+ class_name: "helm.clients.together_client.TogetherClient"
1754
+ args:
1755
+ together_model: togethercomputer/RedPajama-INCITE-Instruct-3B-v1
1756
+
1757
+ - name: together/redpajama-incite-base-7b
1758
+ model_name: together/redpajama-incite-base-7b
1759
+ tokenizer_name: EleutherAI/gpt-neox-20b
1760
+ max_sequence_length: 2048
1761
+ max_request_length: 2049
1762
+ client_spec:
1763
+ class_name: "helm.clients.together_client.TogetherClient"
1764
+ args:
1765
+ together_model: togethercomputer/RedPajama-INCITE-7B-Base
1766
+
1767
+ - name: together/redpajama-incite-instruct-7b
1768
+ model_name: together/redpajama-incite-instruct-7b
1769
+ tokenizer_name: EleutherAI/gpt-neox-20b
1770
+ max_sequence_length: 2048
1771
+ max_request_length: 2049
1772
+ client_spec:
1773
+ class_name: "helm.clients.together_client.TogetherClient"
1774
+ args:
1775
+ together_model: togethercomputer/RedPajama-INCITE-7B-Instruct
1776
+
1777
+ ## Tsinghua
1778
+ - name: together/glm
1779
+ deprecated: true # Removed from Together
1780
+ model_name: tsinghua/glm
1781
+ tokenizer_name: TsinghuaKEG/ice
1782
+ max_sequence_length: 2048
1783
+ max_request_length: 2049
1784
+ client_spec:
1785
+ class_name: "helm.clients.together_client.TogetherClient"
1786
+ window_service_spec:
1787
+ class_name: "helm.benchmark.window_services.ice_window_service.ICEWindowService"
1788
+
1789
+ - name: thudm/cogview2
1790
+ model_name: thudm/cogview2
1791
+ tokenizer_name: openai/clip-vit-large-patch14
1792
+ max_sequence_length: 75
1793
+ client_spec:
1794
+ class_name: "helm.clients.image_generation.cogview2_client.CogView2Client"
1795
+ window_service_spec:
1796
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
1797
+
1798
+ ## Yandex
1799
+ - name: together/yalm
1800
+ deprecated: true # Removed from Together
1801
+ model_name: yandex/yalm
1802
+ tokenizer_name: Yandex/yalm
1803
+ max_sequence_length: 2048
1804
+ max_request_length: 2049
1805
+ client_spec:
1806
+ class_name: "helm.clients.together_client.TogetherClient"
1807
+ window_service_spec:
1808
+ class_name: "helm.benchmark.window_services.yalm_window_service.YaLMWindowService"
1809
+
1810
+ # Writer
1811
+ - name: writer/palmyra-base
1812
+ model_name: writer/palmyra-base
1813
+ tokenizer_name: writer/gpt2
1814
+ max_sequence_length: 2048
1815
+ max_sequence_and_generated_tokens_length: 2048
1816
+ client_spec:
1817
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1818
+
1819
+ - name: writer/palmyra-large
1820
+ model_name: writer/palmyra-large
1821
+ tokenizer_name: writer/gpt2
1822
+ max_sequence_length: 2048
1823
+ max_sequence_and_generated_tokens_length: 2048
1824
+ client_spec:
1825
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1826
+
1827
+ - name: writer/palmyra-instruct-30
1828
+ model_name: writer/palmyra-instruct-30
1829
+ tokenizer_name: writer/gpt2
1830
+ max_sequence_length: 2048
1831
+ max_sequence_and_generated_tokens_length: 2048
1832
+ client_spec:
1833
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1834
+
1835
+ - name: writer/palmyra-e
1836
+ model_name: writer/palmyra-e
1837
+ tokenizer_name: writer/gpt2
1838
+ max_sequence_length: 2048
1839
+ max_sequence_and_generated_tokens_length: 2048
1840
+ client_spec:
1841
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1842
+
1843
+ - name: writer/silk-road
1844
+ model_name: writer/silk-road
1845
+ tokenizer_name: writer/gpt2
1846
+ max_sequence_length: 8192
1847
+ max_sequence_and_generated_tokens_length: 8192
1848
+ client_spec:
1849
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1850
+
1851
+ - name: writer/palmyra-x
1852
+ model_name: writer/palmyra-x
1853
+ tokenizer_name: writer/gpt2
1854
+ max_sequence_length: 8192
1855
+ max_sequence_and_generated_tokens_length: 8192
1856
+ client_spec:
1857
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1858
+
1859
+ - name: writer/palmyra-x-v2
1860
+ model_name: writer/palmyra-x-v2
1861
+ tokenizer_name: writer/gpt2
1862
+ max_sequence_length: 6000
1863
+ max_sequence_and_generated_tokens_length: 7024
1864
+ client_spec:
1865
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1866
+
1867
+ - name: writer/palmyra-x-v3
1868
+ model_name: writer/palmyra-x-v3
1869
+ tokenizer_name: writer/gpt2
1870
+ max_sequence_length: 6000
1871
+ max_sequence_and_generated_tokens_length: 7024
1872
+ client_spec:
1873
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1874
+
1875
+ - name: writer/palmyra-x-32k
1876
+ model_name: writer/palmyra-x-32k
1877
+ tokenizer_name: writer/gpt2
1878
+ max_sequence_length: 28000
1879
+ max_sequence_and_generated_tokens_length: 30048
1880
+ client_spec:
1881
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1882
+
1883
+ # Qwen
1884
+
1885
+ - name: together/qwen-7b
1886
+ model_name: qwen/qwen-7b
1887
+ tokenizer_name: qwen/qwen-7b
1888
+ max_sequence_length: 8191
1889
+ client_spec:
1890
+ class_name: "helm.clients.together_client.TogetherClient"
1891
+ args:
1892
+ together_model: togethercomputer/Qwen-7B
1893
+
1894
+ - name: together/qwen1.5-7b
1895
+ model_name: qwen/qwen1.5-7b
1896
+ tokenizer_name: qwen/qwen1.5-7b
1897
+ max_sequence_length: 32767
1898
+ client_spec:
1899
+ class_name: "helm.clients.together_client.TogetherClient"
1900
+ args:
1901
+ together_model: Qwen/Qwen1.5-7B
1902
+
1903
+ - name: together/qwen1.5-14b
1904
+ model_name: qwen/qwen1.5-14b
1905
+ tokenizer_name: qwen/qwen1.5-7b
1906
+ max_sequence_length: 32767
1907
+ client_spec:
1908
+ class_name: "helm.clients.together_client.TogetherClient"
1909
+ args:
1910
+ together_model: Qwen/Qwen1.5-14B
1911
+
1912
+ - name: together/qwen1.5-32b
1913
+ model_name: qwen/qwen1.5-32b
1914
+ tokenizer_name: qwen/qwen1.5-7b
1915
+ max_sequence_length: 32767
1916
+ client_spec:
1917
+ class_name: "helm.clients.together_client.TogetherClient"
1918
+ args:
1919
+ together_model: Qwen/Qwen1.5-32B
1920
+
1921
+ - name: together/qwen1.5-72b
1922
+ model_name: qwen/qwen1.5-72b
1923
+ tokenizer_name: qwen/qwen1.5-7b
1924
+ max_sequence_length: 4095
1925
+ client_spec:
1926
+ class_name: "helm.clients.together_client.TogetherClient"
1927
+ args:
1928
+ together_model: Qwen/Qwen1.5-72B
1929
+
1930
+ - name: huggingface/qwen-vl
1931
+ model_name: qwen/qwen-vl
1932
+ tokenizer_name: qwen/qwen-vl
1933
+ max_sequence_length: 8191
1934
+ client_spec:
1935
+ class_name: "helm.clients.vision_language.qwen_vlm_client.QwenVLMClient"
1936
+
1937
+ - name: huggingface/qwen-vl-chat
1938
+ model_name: qwen/qwen-vl-chat
1939
+ tokenizer_name: qwen/qwen-vl-chat
1940
+ max_sequence_length: 8191
1941
+ client_spec:
1942
+ class_name: "helm.clients.vision_language.qwen_vlm_client.QwenVLMClient"