crfm-helm 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (499) hide show
  1. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/METADATA +138 -31
  2. crfm_helm-0.5.1.dist-info/RECORD +654 -0
  3. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +31 -3
  5. helm/benchmark/adaptation/adapters/adapter.py +2 -2
  6. helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
  7. helm/benchmark/adaptation/adapters/generation_adapter.py +1 -0
  8. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -4
  9. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +2 -3
  10. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
  11. helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py +7 -0
  12. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
  13. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +2 -1
  14. helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +2 -0
  15. helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
  16. helm/benchmark/adaptation/adapters/test_generation_adapter.py +32 -8
  17. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +7 -19
  18. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +60 -6
  19. helm/benchmark/adaptation/common_adapter_specs.py +376 -0
  20. helm/benchmark/adaptation/request_state.py +6 -1
  21. helm/benchmark/adaptation/scenario_state.py +6 -2
  22. helm/benchmark/annotation/annotator.py +43 -0
  23. helm/benchmark/annotation/annotator_factory.py +61 -0
  24. helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
  25. helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
  26. helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
  27. helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
  28. helm/benchmark/annotation/test_annotator_factory.py +26 -0
  29. helm/benchmark/annotation/test_dummy_annotator.py +44 -0
  30. helm/benchmark/annotation_executor.py +124 -0
  31. helm/benchmark/augmentations/data_augmenter.py +0 -2
  32. helm/benchmark/augmentations/gender_perturbation.py +1 -1
  33. helm/benchmark/augmentations/perturbation.py +25 -3
  34. helm/benchmark/augmentations/perturbation_description.py +1 -1
  35. helm/benchmark/augmentations/suffix_perturbation.py +29 -0
  36. helm/benchmark/augmentations/test_perturbation.py +41 -7
  37. helm/benchmark/augmentations/translate_perturbation.py +30 -0
  38. helm/benchmark/config_registry.py +7 -1
  39. helm/benchmark/executor.py +46 -16
  40. helm/benchmark/huggingface_registration.py +20 -7
  41. helm/benchmark/metrics/basic_metrics.py +169 -664
  42. helm/benchmark/metrics/bbq_metrics.py +3 -4
  43. helm/benchmark/metrics/bias_metrics.py +6 -6
  44. helm/benchmark/metrics/classification_metrics.py +11 -8
  45. helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
  46. helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
  47. helm/benchmark/metrics/code_metrics_helper.py +0 -2
  48. helm/benchmark/metrics/common_metric_specs.py +167 -0
  49. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
  50. helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
  51. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
  52. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
  53. helm/benchmark/metrics/disinformation_metrics.py +4 -110
  54. helm/benchmark/metrics/dry_run_metrics.py +2 -2
  55. helm/benchmark/metrics/efficiency_metrics.py +213 -0
  56. helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
  57. helm/benchmark/metrics/evaluate_reference_metrics.py +392 -0
  58. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
  59. helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
  60. helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
  61. helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
  62. helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
  63. helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
  64. helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
  65. helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
  66. helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
  67. helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
  68. helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
  69. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
  70. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
  71. helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
  72. helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
  73. helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
  74. helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
  75. helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
  76. helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
  77. helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
  78. helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
  79. helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
  80. helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
  81. helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
  82. helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
  83. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
  84. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
  85. helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
  86. helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
  87. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
  88. helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
  89. helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
  90. helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
  91. helm/benchmark/metrics/language_modeling_metrics.py +99 -0
  92. helm/benchmark/metrics/machine_translation_metrics.py +89 -0
  93. helm/benchmark/metrics/metric.py +93 -172
  94. helm/benchmark/metrics/metric_name.py +0 -1
  95. helm/benchmark/metrics/metric_service.py +16 -0
  96. helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
  97. helm/benchmark/metrics/ranking_metrics.py +2 -2
  98. helm/benchmark/metrics/reference_metric.py +148 -0
  99. helm/benchmark/metrics/summac/model_summac.py +0 -2
  100. helm/benchmark/metrics/summarization_metrics.py +2 -2
  101. helm/benchmark/metrics/test_classification_metrics.py +8 -5
  102. helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
  103. helm/benchmark/metrics/{test_basic_metrics.py → test_evaluate_reference_metrics.py} +5 -1
  104. helm/benchmark/metrics/test_metric.py +2 -2
  105. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +10 -2
  106. helm/benchmark/metrics/toxicity_metrics.py +1 -1
  107. helm/benchmark/metrics/toxicity_utils.py +23 -0
  108. helm/benchmark/metrics/unitxt_metrics.py +81 -0
  109. helm/benchmark/metrics/vision_language/__init__.py +0 -0
  110. helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
  111. helm/benchmark/metrics/vision_language/image_metrics.py +575 -0
  112. helm/benchmark/metrics/vision_language/image_utils.py +100 -0
  113. helm/benchmark/model_deployment_registry.py +74 -0
  114. helm/benchmark/model_metadata_registry.py +41 -1
  115. helm/benchmark/multi_gpu_runner.py +133 -0
  116. helm/benchmark/presentation/create_plots.py +8 -7
  117. helm/benchmark/presentation/run_display.py +26 -10
  118. helm/benchmark/presentation/schema.py +15 -40
  119. helm/benchmark/presentation/summarize.py +119 -79
  120. helm/benchmark/presentation/table.py +8 -8
  121. helm/benchmark/presentation/test_contamination.py +2 -2
  122. helm/benchmark/presentation/test_run_entry.py +1 -2
  123. helm/benchmark/presentation/test_summarize.py +3 -3
  124. helm/benchmark/run.py +54 -26
  125. helm/benchmark/run_expander.py +205 -35
  126. helm/benchmark/run_spec.py +93 -0
  127. helm/benchmark/run_spec_factory.py +163 -0
  128. helm/benchmark/run_specs/__init__.py +0 -0
  129. helm/benchmark/run_specs/classic_run_specs.py +1510 -0
  130. helm/benchmark/run_specs/cleva_run_specs.py +277 -0
  131. helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
  132. helm/benchmark/run_specs/heim_run_specs.py +623 -0
  133. helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
  134. helm/benchmark/run_specs/lite_run_specs.py +307 -0
  135. helm/benchmark/run_specs/simple_run_specs.py +104 -0
  136. helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
  137. helm/benchmark/run_specs/vlm_run_specs.py +757 -0
  138. helm/benchmark/runner.py +51 -57
  139. helm/benchmark/runner_config_registry.py +21 -0
  140. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  141. helm/benchmark/scenarios/bold_scenario.py +2 -2
  142. helm/benchmark/scenarios/code_scenario.py +1 -0
  143. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
  144. helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
  145. helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
  146. helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
  147. helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
  148. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
  149. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
  150. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
  151. helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
  152. helm/benchmark/scenarios/image_generation/__init__.py +0 -0
  153. helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
  154. helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
  155. helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
  156. helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
  157. helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
  158. helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
  159. helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
  160. helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
  161. helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
  162. helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
  163. helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
  164. helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
  165. helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
  166. helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
  167. helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
  168. helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
  169. helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
  170. helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
  171. helm/benchmark/scenarios/imdb_scenario.py +0 -1
  172. helm/benchmark/scenarios/legalbench_scenario.py +6 -2
  173. helm/benchmark/scenarios/live_qa_scenario.py +94 -0
  174. helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
  175. helm/benchmark/scenarios/math_scenario.py +19 -2
  176. helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
  177. helm/benchmark/scenarios/numeracy_scenario.py +1 -1
  178. helm/benchmark/scenarios/opinions_qa_scenario.py +0 -4
  179. helm/benchmark/scenarios/scenario.py +4 -0
  180. helm/benchmark/scenarios/simple_scenarios.py +122 -1
  181. helm/benchmark/scenarios/test_math_scenario.py +6 -0
  182. helm/benchmark/scenarios/test_scenario.py +6 -3
  183. helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
  184. helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
  185. helm/benchmark/scenarios/unitxt_scenario.py +56 -0
  186. helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
  187. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  188. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +83 -0
  189. helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
  190. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +134 -0
  191. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +74 -0
  192. helm/benchmark/scenarios/vision_language/gqa_scenario.py +91 -0
  193. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +94 -0
  194. helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
  195. helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
  196. helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
  197. helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
  198. helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
  199. helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
  200. helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
  201. helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
  202. helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
  203. helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
  204. helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
  205. helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
  206. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +117 -0
  207. helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
  208. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +103 -0
  209. helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
  210. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
  211. helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py +92 -0
  212. helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py +117 -0
  213. helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
  214. helm/benchmark/scenarios/vision_language/originality_scenario.py +35 -0
  215. helm/benchmark/scenarios/vision_language/pairs_scenario.py +246 -0
  216. helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
  217. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
  218. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
  219. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +3 -4
  220. helm/benchmark/scenarios/vision_language/vqa_scenario.py +5 -3
  221. helm/benchmark/scenarios/wmt_14_scenario.py +1 -1
  222. helm/benchmark/server.py +24 -1
  223. helm/benchmark/slurm_runner.py +70 -49
  224. helm/benchmark/static/benchmarking.js +1 -1
  225. helm/benchmark/static/schema_classic.yaml +258 -1066
  226. helm/benchmark/static/schema_image2structure.yaml +304 -0
  227. helm/benchmark/static/schema_instruction_following.yaml +210 -0
  228. helm/benchmark/static/schema_lite.yaml +2 -227
  229. helm/benchmark/static/schema_mmlu.yaml +1507 -0
  230. helm/benchmark/static/schema_unitxt.yaml +428 -0
  231. helm/benchmark/static/schema_vhelm_lite.yaml +164 -0
  232. helm/benchmark/static/schema_vlm.yaml +823 -0
  233. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  234. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  235. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  236. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  237. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  238. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  239. helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
  240. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  241. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  242. helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
  243. helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
  244. helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
  245. helm/benchmark/static_build/assets/index-737eef9e.js +10 -0
  246. helm/benchmark/static_build/assets/index-878a1094.css +1 -0
  247. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  248. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  249. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  250. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  251. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  252. helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
  253. helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
  254. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  255. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  256. helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
  257. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  258. helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  259. helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  260. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  261. helm/benchmark/static_build/config.js +4 -0
  262. helm/benchmark/static_build/index.html +20 -0
  263. helm/benchmark/test_data_preprocessor.py +3 -3
  264. helm/benchmark/test_run_expander.py +1 -1
  265. helm/benchmark/window_services/ai21_window_service.py +22 -33
  266. helm/benchmark/window_services/cohere_window_service.py +1 -63
  267. helm/benchmark/window_services/default_window_service.py +2 -44
  268. helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
  269. helm/benchmark/window_services/ice_window_service.py +0 -34
  270. helm/benchmark/window_services/image_generation/__init__.py +0 -0
  271. helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
  272. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
  273. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
  274. helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
  275. helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
  276. helm/benchmark/window_services/local_window_service.py +21 -4
  277. helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
  278. helm/benchmark/window_services/test_bloom_window_service.py +2 -1
  279. helm/benchmark/window_services/test_cohere_window_service.py +2 -1
  280. helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
  281. helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
  282. helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
  283. helm/benchmark/window_services/test_gptj_window_service.py +3 -2
  284. helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
  285. helm/benchmark/window_services/test_ice_window_service.py +2 -1
  286. helm/benchmark/window_services/test_openai_window_service.py +2 -1
  287. helm/benchmark/window_services/test_opt_window_service.py +3 -2
  288. helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
  289. helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
  290. helm/benchmark/window_services/test_t511b_window_service.py +2 -1
  291. helm/benchmark/window_services/test_ul2_window_service.py +2 -1
  292. helm/benchmark/window_services/test_utils.py +3 -2
  293. helm/benchmark/window_services/test_yalm_window_service.py +2 -1
  294. helm/benchmark/window_services/window_service.py +42 -0
  295. helm/benchmark/window_services/window_service_factory.py +4 -1
  296. helm/benchmark/window_services/yalm_window_service.py +0 -27
  297. helm/clients/__init__.py +0 -0
  298. helm/{proxy/clients → clients}/ai21_client.py +3 -9
  299. helm/clients/aleph_alpha_client.py +112 -0
  300. helm/{proxy/clients → clients}/anthropic_client.py +233 -18
  301. helm/{proxy/clients → clients}/auto_client.py +59 -31
  302. helm/clients/bedrock_client.py +128 -0
  303. helm/clients/bedrock_utils.py +72 -0
  304. helm/{proxy/clients → clients}/client.py +65 -7
  305. helm/clients/clip_score_client.py +49 -0
  306. helm/clients/clip_scorers/__init__.py +0 -0
  307. helm/clients/clip_scorers/base_clip_scorer.py +18 -0
  308. helm/clients/clip_scorers/clip_scorer.py +50 -0
  309. helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
  310. helm/{proxy/clients → clients}/cohere_client.py +4 -11
  311. helm/clients/gcs_client.py +82 -0
  312. helm/{proxy/clients → clients}/google_client.py +5 -5
  313. helm/clients/google_translate_client.py +35 -0
  314. helm/{proxy/clients → clients}/http_model_client.py +5 -7
  315. helm/{proxy/clients → clients}/huggingface_client.py +43 -64
  316. helm/clients/image_generation/__init__.py +0 -0
  317. helm/clients/image_generation/adobe_vision_client.py +78 -0
  318. helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
  319. helm/clients/image_generation/cogview2/__init__.py +0 -0
  320. helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
  321. helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
  322. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
  323. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
  324. helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
  325. helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
  326. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
  327. helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
  328. helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
  329. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
  330. helm/clients/image_generation/cogview2_client.py +191 -0
  331. helm/clients/image_generation/dalle2_client.py +192 -0
  332. helm/clients/image_generation/dalle3_client.py +108 -0
  333. helm/clients/image_generation/dalle_mini/__init__.py +3 -0
  334. helm/clients/image_generation/dalle_mini/data.py +442 -0
  335. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
  336. helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
  337. helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
  338. helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
  339. helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
  340. helm/clients/image_generation/dalle_mini/model/text.py +251 -0
  341. helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
  342. helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
  343. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
  344. helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
  345. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
  346. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
  347. helm/clients/image_generation/dalle_mini_client.py +190 -0
  348. helm/clients/image_generation/deep_floyd_client.py +78 -0
  349. helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
  350. helm/clients/image_generation/image_generation_client_utils.py +9 -0
  351. helm/clients/image_generation/lexica_client.py +86 -0
  352. helm/clients/image_generation/mindalle/__init__.py +0 -0
  353. helm/clients/image_generation/mindalle/models/__init__.py +216 -0
  354. helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
  355. helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
  356. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
  357. helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
  358. helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
  359. helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
  360. helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
  361. helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
  362. helm/clients/image_generation/mindalle/utils/config.py +129 -0
  363. helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
  364. helm/clients/image_generation/mindalle/utils/utils.py +89 -0
  365. helm/clients/image_generation/mindalle_client.py +115 -0
  366. helm/clients/image_generation/nudity_check_client.py +64 -0
  367. helm/clients/image_generation/together_image_generation_client.py +111 -0
  368. helm/{proxy/clients → clients}/lit_gpt_client.py +4 -4
  369. helm/{proxy/clients → clients}/megatron_client.py +5 -5
  370. helm/clients/mistral_client.py +134 -0
  371. helm/clients/moderation_api_client.py +109 -0
  372. helm/clients/open_lm_client.py +43 -0
  373. helm/clients/openai_client.py +301 -0
  374. helm/{proxy/clients → clients}/palmyra_client.py +6 -8
  375. helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
  376. helm/clients/simple_client.py +64 -0
  377. helm/{proxy/clients → clients}/test_auto_client.py +13 -15
  378. helm/clients/test_client.py +100 -0
  379. helm/{proxy/clients → clients}/test_huggingface_client.py +15 -16
  380. helm/clients/test_simple_client.py +19 -0
  381. helm/{proxy/clients → clients}/test_together_client.py +20 -8
  382. helm/{proxy/clients → clients}/together_client.py +104 -73
  383. helm/clients/vertexai_client.py +400 -0
  384. helm/clients/vision_language/__init__.py +0 -0
  385. helm/clients/vision_language/huggingface_vision2seq_client.py +145 -0
  386. helm/clients/vision_language/huggingface_vlm_client.py +111 -0
  387. helm/{proxy/clients → clients}/vision_language/idefics_client.py +54 -49
  388. helm/clients/vision_language/open_flamingo/__init__.py +2 -0
  389. helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
  390. helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
  391. helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
  392. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
  393. helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
  394. helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
  395. helm/clients/vision_language/open_flamingo_client.py +155 -0
  396. helm/clients/vision_language/qwen_vlm_client.py +171 -0
  397. helm/clients/vllm_client.py +46 -0
  398. helm/common/cache.py +16 -4
  399. helm/common/cache_backend_config.py +47 -0
  400. helm/common/clip_score_request.py +41 -0
  401. helm/common/file_caches/__init__.py +0 -0
  402. helm/common/file_caches/file_cache.py +16 -0
  403. helm/common/file_caches/local_file_cache.py +61 -0
  404. helm/common/file_caches/test_local_file_cache.py +25 -0
  405. helm/common/file_upload_request.py +27 -0
  406. helm/common/general.py +1 -1
  407. helm/common/image_generation_parameters.py +25 -0
  408. helm/common/images_utils.py +33 -3
  409. helm/common/key_value_store.py +35 -4
  410. helm/common/media_object.py +13 -0
  411. helm/common/moderations_api_request.py +71 -0
  412. helm/common/mongo_key_value_store.py +3 -3
  413. helm/common/multimodal_request_utils.py +31 -0
  414. helm/common/nudity_check_request.py +29 -0
  415. helm/common/request.py +15 -17
  416. helm/common/test_general.py +6 -0
  417. helm/common/tokenization_request.py +1 -1
  418. helm/config/model_deployments.yaml +1159 -538
  419. helm/config/model_metadata.yaml +868 -41
  420. helm/config/tokenizer_configs.yaml +149 -43
  421. helm/proxy/accounts.py +31 -4
  422. helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
  423. helm/proxy/critique/model_critique_client.py +8 -6
  424. helm/proxy/example_queries.py +29 -17
  425. helm/proxy/server.py +70 -5
  426. helm/proxy/services/remote_service.py +31 -0
  427. helm/proxy/services/server_service.py +96 -16
  428. helm/proxy/services/service.py +30 -0
  429. helm/proxy/services/test_remote_service.py +4 -3
  430. helm/proxy/services/test_service.py +0 -12
  431. helm/proxy/test_accounts.py +32 -0
  432. helm/proxy/token_counters/auto_token_counter.py +37 -37
  433. helm/proxy/token_counters/test_auto_token_counter.py +164 -0
  434. helm/proxy/token_counters/token_counter.py +3 -5
  435. helm/tokenizers/__init__.py +0 -0
  436. helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
  437. helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +1 -1
  438. helm/{proxy/tokenizers → tokenizers}/auto_tokenizer.py +6 -9
  439. helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
  440. helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
  441. helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +7 -26
  442. helm/tokenizers/simple_tokenizer.py +33 -0
  443. helm/{proxy/tokenizers → tokenizers}/test_anthropic_tokenizer.py +1 -1
  444. helm/{proxy/tokenizers → tokenizers}/test_huggingface_tokenizer.py +3 -0
  445. helm/tokenizers/test_simple_tokenizer.py +33 -0
  446. helm/{proxy/tokenizers → tokenizers}/vertexai_tokenizer.py +1 -1
  447. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
  448. helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
  449. helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
  450. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
  451. crfm_helm-0.4.0.dist-info/RECORD +0 -397
  452. helm/benchmark/run_specs.py +0 -2762
  453. helm/benchmark/test_model_deployment_definition.py +0 -92
  454. helm/benchmark/test_model_properties.py +0 -1570
  455. helm/benchmark/vlm_run_specs.py +0 -97
  456. helm/benchmark/window_services/flan_t5_window_service.py +0 -29
  457. helm/benchmark/window_services/gpt2_window_service.py +0 -32
  458. helm/benchmark/window_services/huggingface_window_service.py +0 -60
  459. helm/benchmark/window_services/t0pp_window_service.py +0 -35
  460. helm/benchmark/window_services/t511b_window_service.py +0 -30
  461. helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
  462. helm/benchmark/window_services/ul2_window_service.py +0 -30
  463. helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
  464. helm/common/cache_utils.py +0 -14
  465. helm/proxy/clients/aleph_alpha_client.py +0 -95
  466. helm/proxy/clients/goose_ai_client.py +0 -99
  467. helm/proxy/clients/microsoft_client.py +0 -180
  468. helm/proxy/clients/openai_client.py +0 -206
  469. helm/proxy/clients/simple_client.py +0 -60
  470. helm/proxy/clients/test_client.py +0 -49
  471. helm/proxy/clients/vertexai_client.py +0 -115
  472. helm/proxy/token_counters/ai21_token_counter.py +0 -20
  473. helm/proxy/token_counters/cohere_token_counter.py +0 -13
  474. helm/proxy/token_counters/free_token_counter.py +0 -12
  475. helm/proxy/token_counters/gooseai_token_counter.py +0 -24
  476. helm/proxy/token_counters/openai_token_counter.py +0 -22
  477. helm/proxy/token_counters/test_ai21_token_counter.py +0 -88
  478. helm/proxy/token_counters/test_openai_token_counter.py +0 -81
  479. helm/proxy/tokenizers/simple_tokenizer.py +0 -32
  480. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/LICENSE +0 -0
  481. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/entry_points.txt +0 -0
  482. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/top_level.txt +0 -0
  483. /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
  484. /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
  485. /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
  486. /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
  487. /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
  488. /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
  489. /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
  490. /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
  491. /helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +0 -0
  492. /helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +0 -0
  493. /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
  494. /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
  495. /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
  496. /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
  497. /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
  498. /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
  499. /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
@@ -8,16 +8,23 @@
8
8
  # # This file defines all the model deployments that you do not want to be public.
9
9
  # model_deployments: [] # Leave empty to disable private model deployments
10
10
 
11
-
12
11
  model_deployments:
13
-
14
12
  - name: simple/model1
15
13
  model_name: simple/model1
16
- tokenizer_name: simple/model1
14
+ tokenizer_name: simple/tokenizer1
17
15
  max_sequence_length: 2048
18
16
  client_spec:
19
- class_name: "helm.proxy.clients.simple_client.SimpleClient"
20
- args: {}
17
+ class_name: "helm.clients.simple_client.SimpleClient"
18
+
19
+ # Adobe
20
+ - name: adobe/giga-gan
21
+ model_name: adobe/giga-gan
22
+ tokenizer_name: openai/clip-vit-large-patch14
23
+ max_sequence_length: 75
24
+ client_spec:
25
+ class_name: "helm.clients.image_generation.adobe_vision_client.AdobeVisionClient"
26
+ window_service_spec:
27
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
21
28
 
22
29
  # AI21 Labs
23
30
 
@@ -29,14 +36,9 @@ model_deployments:
29
36
  tokenizer_name: ai21/j1
30
37
  max_sequence_length: 2047
31
38
  client_spec:
32
- class_name: "helm.proxy.clients.ai21_client.AI21Client"
33
- args: {}
39
+ class_name: "helm.clients.ai21_client.AI21Client"
34
40
  window_service_spec:
35
41
  class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
36
- args:
37
- gpt2_window_service:
38
- class_name: "helm.benchmark.window_services.gpt2_window_service.GPT2WindowService"
39
- args: {}
40
42
 
41
43
  - name: ai21/j1-large
42
44
  deprecated: true
@@ -44,14 +46,9 @@ model_deployments:
44
46
  tokenizer_name: ai21/j1
45
47
  max_sequence_length: 2047
46
48
  client_spec:
47
- class_name: "helm.proxy.clients.ai21_client.AI21Client"
48
- args: {}
49
+ class_name: "helm.clients.ai21_client.AI21Client"
49
50
  window_service_spec:
50
51
  class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
51
- args:
52
- gpt2_window_service:
53
- class_name: "helm.benchmark.window_services.gpt2_window_service.GPT2WindowService"
54
- args: {}
55
52
 
56
53
  - name: ai21/j1-grande
57
54
  deprecated: true
@@ -59,14 +56,9 @@ model_deployments:
59
56
  tokenizer_name: ai21/j1
60
57
  max_sequence_length: 2047
61
58
  client_spec:
62
- class_name: "helm.proxy.clients.ai21_client.AI21Client"
63
- args: {}
59
+ class_name: "helm.clients.ai21_client.AI21Client"
64
60
  window_service_spec:
65
61
  class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
66
- args:
67
- gpt2_window_service:
68
- class_name: "helm.benchmark.window_services.gpt2_window_service.GPT2WindowService"
69
- args: {}
70
62
 
71
63
  - name: ai21/j1-grande-v2-beta
72
64
  deprecated: true
@@ -74,58 +66,36 @@ model_deployments:
74
66
  tokenizer_name: ai21/j1
75
67
  max_sequence_length: 2047
76
68
  client_spec:
77
- class_name: "helm.proxy.clients.ai21_client.AI21Client"
78
- args: {}
69
+ class_name: "helm.clients.ai21_client.AI21Client"
79
70
  window_service_spec:
80
71
  class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
81
- args:
82
- gpt2_window_service:
83
- class_name: "helm.benchmark.window_services.gpt2_window_service.GPT2WindowService"
84
- args: {}
85
72
 
86
73
  - name: ai21/j2-jumbo
87
74
  model_name: ai21/j2-jumbo
88
75
  tokenizer_name: ai21/j1
89
76
  max_sequence_length: 6000
90
77
  client_spec:
91
- class_name: "helm.proxy.clients.ai21_client.AI21Client"
92
- args: {}
78
+ class_name: "helm.clients.ai21_client.AI21Client"
93
79
  window_service_spec:
94
- class_name: "helm.benchmark.window_services.wider_ai21_window_service.AI21Jurassic2JumboWindowService"
95
- args:
96
- gpt2_window_service:
97
- class_name: "helm.benchmark.window_services.gpt2_window_service.GPT2WindowService"
98
- args: {}
80
+ class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
99
81
 
100
82
  - name: ai21/j2-large
101
83
  model_name: ai21/j2-large
102
84
  tokenizer_name: ai21/j1
103
85
  max_sequence_length: 2047
104
86
  client_spec:
105
- class_name: "helm.proxy.clients.ai21_client.AI21Client"
106
- args: {}
87
+ class_name: "helm.clients.ai21_client.AI21Client"
107
88
  window_service_spec:
108
89
  class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
109
- args:
110
- gpt2_window_service:
111
- class_name: "helm.benchmark.window_services.gpt2_window_service.GPT2WindowService"
112
- args: {}
113
90
 
114
91
  - name: ai21/j2-grande
115
92
  model_name: ai21/j2-grande
116
93
  tokenizer_name: ai21/j1
117
94
  max_sequence_length: 2047
118
95
  client_spec:
119
- class_name: "helm.proxy.clients.ai21_client.AI21Client"
120
- args: {}
96
+ class_name: "helm.clients.ai21_client.AI21Client"
121
97
  window_service_spec:
122
98
  class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
123
- args:
124
- gpt2_window_service:
125
- class_name: "helm.benchmark.window_services.gpt2_window_service.GPT2WindowService"
126
- args: {}
127
-
128
-
129
99
 
130
100
  # Aleph Alpha
131
101
  - name: AlephAlpha/luminous-base
@@ -133,29 +103,57 @@ model_deployments:
133
103
  tokenizer_name: AlephAlpha/luminous-base
134
104
  max_sequence_length: 2048
135
105
  client_spec:
136
- class_name: "helm.proxy.clients.aleph_alpha_client.AlephAlphaClient"
137
- args: {}
106
+ class_name: "helm.clients.aleph_alpha_client.AlephAlphaClient"
138
107
 
139
108
  - name: AlephAlpha/luminous-extended
140
109
  model_name: AlephAlpha/luminous-extended
141
110
  tokenizer_name: AlephAlpha/luminous-extended
142
111
  max_sequence_length: 2048
143
112
  client_spec:
144
- class_name: "helm.proxy.clients.aleph_alpha_client.AlephAlphaClient"
145
- args: {}
113
+ class_name: "helm.clients.aleph_alpha_client.AlephAlphaClient"
146
114
 
147
115
  - name: AlephAlpha/luminous-supreme
148
116
  model_name: AlephAlpha/luminous-supreme
149
117
  tokenizer_name: AlephAlpha/luminous-supreme
150
118
  max_sequence_length: 2048
151
119
  client_spec:
152
- class_name: "helm.proxy.clients.aleph_alpha_client.AlephAlphaClient"
153
- args: {}
120
+ class_name: "helm.clients.aleph_alpha_client.AlephAlphaClient"
154
121
 
155
- # TODO: Add luminous-world once it is released.
122
+ # TODO: Add luminous-world once it is released
123
+
124
+ - name: AlephAlpha/m-vader
125
+ model_name: AlephAlpha/m-vader
126
+ tokenizer_name: openai/clip-vit-large-patch14
127
+ max_sequence_length: 75
128
+ client_spec:
129
+ class_name: "helm.clients.image_generation.aleph_alpha_image_generation_client.AlephAlphaImageGenerationClient"
130
+ window_service_spec:
131
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
132
+
133
+
134
+ # Amazon
135
+ - name: amazon/titan-text-lite-v1
136
+ model_name: amazon/titan-text-lite-v1
137
+ tokenizer_name: huggingface/gpt2
138
+ max_sequence_length: 4000
139
+ client_spec:
140
+ class_name: "helm.clients.bedrock_client.BedrockTitanClient"
141
+
142
+ - name: amazon/titan-tg1-large
143
+ model_name: amazon/titan-tg1-large
144
+ tokenizer_name: huggingface/gpt2
145
+ max_sequence_length: 8000
146
+ client_spec:
147
+ class_name: "helm.clients.bedrock_client.BedrockTitanClient"
148
+
149
+ - name: amazon/titan-text-express-v1
150
+ model_name: amazon/titan-text-express-v1
151
+ tokenizer_name: huggingface/gpt2
152
+ max_sequence_length: 8000
153
+ client_spec:
154
+ class_name: "helm.clients.bedrock_client.BedrockTitanClient"
156
155
 
157
156
 
158
-
159
157
  # Anthropic
160
158
  - name: anthropic/claude-v1.3
161
159
  model_name: anthropic/claude-v1.3
@@ -163,8 +161,7 @@ model_deployments:
163
161
  max_sequence_length: 8000
164
162
  max_sequence_and_generated_tokens_length: 9016
165
163
  client_spec:
166
- class_name: "helm.proxy.clients.anthropic_client.AnthropicClient"
167
- args: {}
164
+ class_name: "helm.clients.anthropic_client.AnthropicClient"
168
165
 
169
166
  - name: anthropic/claude-instant-v1
170
167
  model_name: anthropic/claude-instant-v1
@@ -172,8 +169,7 @@ model_deployments:
172
169
  max_sequence_length: 8000
173
170
  max_sequence_and_generated_tokens_length: 9016
174
171
  client_spec:
175
- class_name: "helm.proxy.clients.anthropic_client.AnthropicClient"
176
- args: {}
172
+ class_name: "helm.clients.anthropic_client.AnthropicClient"
177
173
 
178
174
  - name: anthropic/claude-instant-1.2
179
175
  model_name: anthropic/claude-instant-1.2
@@ -181,8 +177,7 @@ model_deployments:
181
177
  max_sequence_length: 8000
182
178
  max_sequence_and_generated_tokens_length: 9016
183
179
  client_spec:
184
- class_name: "helm.proxy.clients.anthropic_client.AnthropicClient"
185
- args: {}
180
+ class_name: "helm.clients.anthropic_client.AnthropicClient"
186
181
 
187
182
  - name: anthropic/claude-2.0
188
183
  model_name: anthropic/claude-2.0
@@ -190,8 +185,7 @@ model_deployments:
190
185
  max_sequence_length: 8000
191
186
  max_sequence_and_generated_tokens_length: 9016
192
187
  client_spec:
193
- class_name: "helm.proxy.clients.anthropic_client.AnthropicClient"
194
- args: {}
188
+ class_name: "helm.clients.anthropic_client.AnthropicClient"
195
189
 
196
190
  - name: anthropic/claude-2.1
197
191
  model_name: anthropic/claude-2.1
@@ -199,8 +193,28 @@ model_deployments:
199
193
  max_sequence_length: 8000
200
194
  max_sequence_and_generated_tokens_length: 9016
201
195
  client_spec:
202
- class_name: "helm.proxy.clients.anthropic_client.AnthropicClient"
203
- args: {}
196
+ class_name: "helm.clients.anthropic_client.AnthropicClient"
197
+
198
+ - name: anthropic/claude-3-sonnet-20240229
199
+ model_name: anthropic/claude-3-sonnet-20240229
200
+ tokenizer_name: anthropic/claude
201
+ max_sequence_length: 200000
202
+ client_spec:
203
+ class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
204
+
205
+ - name: anthropic/claude-3-haiku-20240307
206
+ model_name: anthropic/claude-3-haiku-20240307
207
+ tokenizer_name: anthropic/claude
208
+ max_sequence_length: 200000
209
+ client_spec:
210
+ class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
211
+
212
+ - name: anthropic/claude-3-opus-20240229
213
+ model_name: anthropic/claude-3-opus-20240229
214
+ tokenizer_name: anthropic/claude
215
+ max_sequence_length: 200000
216
+ client_spec:
217
+ class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
204
218
 
205
219
  - name: anthropic/stanford-online-all-v4-s3
206
220
  deprecated: true # Closed model, not accessible via API
@@ -208,8 +222,7 @@ model_deployments:
208
222
  tokenizer_name: huggingface/gpt2
209
223
  max_sequence_length: 8192
210
224
  client_spec:
211
- class_name: "helm.proxy.clients.anthropic_client.AnthropicLegacyClient"
212
- args: {}
225
+ class_name: "helm.clients.anthropic_client.AnthropicLegacyClient"
213
226
 
214
227
  # Cohere
215
228
  - name: cohere/xlarge-20220609
@@ -218,11 +231,9 @@ model_deployments:
218
231
  max_sequence_length: 2047
219
232
  max_request_length: 2048
220
233
  client_spec:
221
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
222
- args: {}
234
+ class_name: "helm.clients.cohere_client.CohereClient"
223
235
  window_service_spec:
224
236
  class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
225
- args: {}
226
237
 
227
238
  - name: cohere/large-20220720
228
239
  model_name: cohere/large-20220720
@@ -230,11 +241,9 @@ model_deployments:
230
241
  max_sequence_length: 2047
231
242
  max_request_length: 2048
232
243
  client_spec:
233
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
234
- args: {}
244
+ class_name: "helm.clients.cohere_client.CohereClient"
235
245
  window_service_spec:
236
246
  class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
237
- args: {}
238
247
 
239
248
  - name: cohere/medium-20220720
240
249
  model_name: cohere/medium-20220720
@@ -242,11 +251,9 @@ model_deployments:
242
251
  max_sequence_length: 2047
243
252
  max_request_length: 2048
244
253
  client_spec:
245
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
246
- args: {}
254
+ class_name: "helm.clients.cohere_client.CohereClient"
247
255
  window_service_spec:
248
256
  class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
249
- args: {}
250
257
 
251
258
  - name: cohere/small-20220720
252
259
  model_name: cohere/small-20220720
@@ -254,11 +261,9 @@ model_deployments:
254
261
  max_sequence_length: 2047
255
262
  max_request_length: 2048
256
263
  client_spec:
257
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
258
- args: {}
264
+ class_name: "helm.clients.cohere_client.CohereClient"
259
265
  window_service_spec:
260
266
  class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
261
- args: {}
262
267
 
263
268
  - name: cohere/xlarge-20221108
264
269
  model_name: cohere/xlarge-20221108
@@ -266,11 +271,9 @@ model_deployments:
266
271
  max_sequence_length: 2047
267
272
  max_request_length: 2048
268
273
  client_spec:
269
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
270
- args: {}
274
+ class_name: "helm.clients.cohere_client.CohereClient"
271
275
  window_service_spec:
272
276
  class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
273
- args: {}
274
277
 
275
278
  - name: cohere/medium-20221108
276
279
  model_name: cohere/medium-20221108
@@ -278,11 +281,9 @@ model_deployments:
278
281
  max_sequence_length: 2047
279
282
  max_request_length: 2048
280
283
  client_spec:
281
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
282
- args: {}
284
+ class_name: "helm.clients.cohere_client.CohereClient"
283
285
  window_service_spec:
284
286
  class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
285
- args: {}
286
287
 
287
288
  - name: cohere/command-medium-beta
288
289
  model_name: cohere/command-medium-beta
@@ -290,11 +291,9 @@ model_deployments:
290
291
  max_sequence_length: 2019
291
292
  max_request_length: 2020
292
293
  client_spec:
293
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
294
- args: {}
294
+ class_name: "helm.clients.cohere_client.CohereClient"
295
295
  window_service_spec:
296
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereCommandWindowService"
297
- args: {}
296
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
298
297
 
299
298
  - name: cohere/command-xlarge-beta
300
299
  model_name: cohere/command-xlarge-beta
@@ -302,11 +301,9 @@ model_deployments:
302
301
  max_sequence_length: 2019
303
302
  max_request_length: 2020
304
303
  client_spec:
305
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
306
- args: {}
304
+ class_name: "helm.clients.cohere_client.CohereClient"
307
305
  window_service_spec:
308
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereCommandWindowService"
309
- args: {}
306
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
310
307
 
311
308
  - name: cohere/command
312
309
  model_name: cohere/command
@@ -314,11 +311,9 @@ model_deployments:
314
311
  max_sequence_length: 2019 # TODO: verify this
315
312
  max_request_length: 2020 # TODO: verify this
316
313
  client_spec:
317
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
318
- args: {}
314
+ class_name: "helm.clients.cohere_client.CohereClient"
319
315
  window_service_spec:
320
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereCommandWindowService"
321
- args: {}
316
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
322
317
 
323
318
  - name: cohere/command-light
324
319
  model_name: cohere/command-light
@@ -326,38 +321,171 @@ model_deployments:
326
321
  max_sequence_length: 2019 # TODO: verify this
327
322
  max_request_length: 2020 # TODO: verify this
328
323
  client_spec:
329
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
330
- args: {}
324
+ class_name: "helm.clients.cohere_client.CohereClient"
331
325
  window_service_spec:
332
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereCommandWindowService"
333
- args: {}
326
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
327
+
328
+ # Craiyon
329
+
330
+ - name: craiyon/dalle-mini
331
+ model_name: craiyon/dalle-mini
332
+ tokenizer_name: openai/clip-vit-large-patch14
333
+ max_sequence_length: 75
334
+ client_spec:
335
+ class_name: "helm.clients.image_generation.dalle_mini_client.DALLEMiniClient"
336
+ window_service_spec:
337
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
338
+
339
+ - name: craiyon/dalle-mega
340
+ model_name: craiyon/dalle-mega
341
+ tokenizer_name: openai/clip-vit-large-patch14
342
+ max_sequence_length: 75
343
+ client_spec:
344
+ class_name: "helm.clients.image_generation.dalle_mini_client.DALLEMiniClient"
345
+ window_service_spec:
346
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
347
+
348
+ # Databricks
349
+
350
+ - name: together/dbrx-instruct
351
+ model_name: databricks/dbrx-instruct
352
+ tokenizer_name: databricks/dbrx-instruct
353
+ max_sequence_length: 32767
354
+ client_spec:
355
+ class_name: "helm.clients.together_client.TogetherClient"
334
356
 
357
+ # DeepFloyd
335
358
 
359
+ - name: DeepFloyd/IF-I-M-v1.0
360
+ model_name: DeepFloyd/IF-I-M-v1.0
361
+ tokenizer_name: openai/clip-vit-large-patch14
362
+ max_sequence_length: 75
363
+ client_spec:
364
+ class_name: "helm.clients.image_generation.deep_floyd_client.DeepFloydClient"
365
+ window_service_spec:
366
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
367
+
368
+ - name: DeepFloyd/IF-I-L-v1.0
369
+ model_name: DeepFloyd/IF-I-L-v1.0
370
+ tokenizer_name: openai/clip-vit-large-patch14
371
+ max_sequence_length: 75
372
+ client_spec:
373
+ class_name: "helm.clients.image_generation.deep_floyd_client.DeepFloydClient"
374
+ window_service_spec:
375
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
376
+
377
+ - name: DeepFloyd/IF-I-XL-v1.0
378
+ model_name: DeepFloyd/IF-I-XL-v1.0
379
+ tokenizer_name: openai/clip-vit-large-patch14
380
+ max_sequence_length: 75
381
+ client_spec:
382
+ class_name: "helm.clients.image_generation.deep_floyd_client.DeepFloydClient"
383
+ window_service_spec:
384
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
385
+
386
+ # Deepseek
387
+
388
+ - name: together/deepseek-llm-67b-chat
389
+ model_name: deepseek-ai/deepseek-llm-67b-chat
390
+ tokenizer_name: deepseek-ai/deepseek-llm-67b-chat
391
+ max_sequence_length: 4095
392
+ client_spec:
393
+ class_name: "helm.clients.together_client.TogetherClient"
336
394
 
337
395
  # Gooseai
338
396
 
397
+ # TODO: Migrate these models to use OpenAIClient
398
+
339
399
  ## EleutherAI
340
- - name: gooseai/gpt-neo-20b
341
- model_name: eleutherai/gpt-neox-20b
342
- tokenizer_name: EleutherAI/gpt-neox-20b
343
- max_sequence_length: 2048
344
- max_request_length: 2049
400
+ # - name: gooseai/gpt-neo-20b
401
+ # model_name: eleutherai/gpt-neox-20b
402
+ # tokenizer_name: EleutherAI/gpt-neox-20b
403
+ # max_sequence_length: 2048
404
+ # max_request_length: 2049
405
+ # client_spec:
406
+ # class_name: "helm.clients.goose_ai_client.GooseAIClient"
407
+
408
+ # - name: gooseai/gpt-j-6b
409
+ # model_name: eleutherai/gpt-j-6b
410
+ # tokenizer_name: EleutherAI/gpt-j-6B
411
+ # max_sequence_length: 2048
412
+ # max_request_length: 2049
413
+ # client_spec:
414
+ # class_name: "helm.clients.goose_ai_client.GooseAIClient"
415
+
416
+ # Google
417
+ # See: https://cloud.google.com/vertex-ai/docs/generative-ai/learn/model-versioning
418
+
419
+ ## Gemini
420
+ # See: https://ai.google.dev/models/gemini#model_variations
421
+ - name: google/gemini-pro
422
+ model_name: google/gemini-pro
423
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
424
+ max_sequence_length: 30720
425
+ max_sequence_and_generated_tokens_length: 32768 # Officially max_sequence_length + 2048
345
426
  client_spec:
346
- class_name: "helm.proxy.clients.goose_ai_client.GooseAIClient"
347
- args: {}
427
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
348
428
 
349
- - name: gooseai/gpt-j-6b
350
- model_name: eleutherai/gpt-j-6b
351
- tokenizer_name: EleutherAI/gpt-j-6B
352
- max_sequence_length: 2048
353
- max_request_length: 2049
429
+ - name: google/gemini-1.0-pro-001
430
+ model_name: google/gemini-1.0-pro-001
431
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
432
+ max_sequence_length: 30720
433
+ max_sequence_and_generated_tokens_length: 32768 # Officially max_sequence_length + 2048
434
+ client_spec:
435
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
436
+
437
+ - name: google/gemini-pro-vision
438
+ model_name: google/gemini-pro-vision
439
+ tokenizer_name: openai/cl100k_base
440
+ max_sequence_length: 12288
441
+ max_sequence_and_generated_tokens_length: 16384 # Officially max_sequence_length + 4096, in practice max_output_tokens <= 2048 for vision models
354
442
  client_spec:
355
- class_name: "helm.proxy.clients.goose_ai_client.GooseAIClient"
356
- args: {}
443
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
357
444
 
445
+ - name: google/gemini-1.0-pro-vision-001
446
+ model_name: google/gemini-1.0-pro-vision-001
447
+ tokenizer_name: hf-internal-testing/llama-tokenizer
448
+ max_sequence_length: 12288
449
+ max_sequence_and_generated_tokens_length: 16384
450
+ client_spec:
451
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
358
452
 
453
+ - name: google/gemini-1.5-pro-preview-0409
454
+ model_name: google/gemini-1.5-pro-preview-0409
455
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
456
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
457
+ # TODO: Max output tokens: 8192
458
+ client_spec:
459
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
359
460
 
360
- # Google
461
+ ## Gemma
462
+ - name: together/gemma-2b
463
+ model_name: google/gemma-2b
464
+ tokenizer_name: google/gemma-2b
465
+ max_sequence_length: 7167
466
+ client_spec:
467
+ class_name: "helm.clients.together_client.TogetherClient"
468
+
469
+ - name: together/gemma-2b-it
470
+ model_name: google/gemma-2b-it
471
+ tokenizer_name: google/gemma-2b
472
+ max_sequence_length: 7167
473
+ client_spec:
474
+ class_name: "helm.clients.together_client.TogetherClient"
475
+
476
+ - name: together/gemma-7b
477
+ model_name: google/gemma-7b
478
+ tokenizer_name: google/gemma-2b
479
+ max_sequence_length: 7167
480
+ client_spec:
481
+ class_name: "helm.clients.together_client.TogetherClient"
482
+
483
+ - name: together/gemma-7b-it
484
+ model_name: google/gemma-7b-it
485
+ tokenizer_name: google/gemma-2b
486
+ max_sequence_length: 7167
487
+ client_spec:
488
+ class_name: "helm.clients.together_client.TogetherClient"
361
489
 
362
490
  ## PaLM 2
363
491
  - name: google/text-bison@001
@@ -366,20 +494,30 @@ model_deployments:
366
494
  max_sequence_length: 6000 # Officially 8192
367
495
  max_sequence_and_generated_tokens_length: 7000 # Officially 9216
368
496
  client_spec:
369
- class_name: "helm.proxy.clients.vertexai_client.VertexAIClient"
370
- args: {}
497
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
371
498
  window_service_spec:
372
499
  class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
373
500
 
501
+ - name: google/text-bison@002
502
+ model_name: google/text-bison@002
503
+ tokenizer_name: google/text-bison@002
504
+ max_sequence_length: 6000 # Officially 8192
505
+ max_sequence_and_generated_tokens_length: 9216
506
+ client_spec:
507
+ class_name: "helm.proxy.clients.vertexai_client.VertexAITextClient"
508
+ window_service_spec:
509
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
374
510
 
375
511
  - name: google/text-bison-32k
376
512
  model_name: google/text-bison-32k
377
- tokenizer_name: google/mt5-base
513
+ tokenizer_name: google/text-bison@001
378
514
  max_sequence_length: 32000
379
515
  max_sequence_and_generated_tokens_length: 32000
380
516
  client_spec:
381
- class_name: "helm.proxy.clients.vertexai_client.VertexAIClient"
382
- args: {}
517
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
518
+ window_service_spec:
519
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
520
+
383
521
 
384
522
  - name: google/text-unicorn@001
385
523
  model_name: google/text-unicorn@001
@@ -387,30 +525,39 @@ model_deployments:
387
525
  max_sequence_length: 6000 # Officially 8192
388
526
  max_sequence_and_generated_tokens_length: 7000 # Officially 9216
389
527
  client_spec:
390
- class_name: "helm.proxy.clients.vertexai_client.VertexAIClient"
391
- args: {}
528
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
392
529
  window_service_spec:
393
530
  class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
394
531
 
395
532
  - name: google/code-bison@001
396
533
  model_name: google/code-bison@001
397
- tokenizer_name: google/mt5-base
534
+ tokenizer_name: google/mt5-base # TODO #2188: change to actual tokenizer
398
535
  max_sequence_length: 6000 # Officially 6144
399
536
  max_sequence_and_generated_tokens_length: 7000 # Officially 7168
400
537
  client_spec:
401
- class_name: "helm.proxy.clients.vertexai_client.VertexAIClient"
402
- args: {}
538
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
539
+ window_service_spec:
540
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
541
+
542
+ - name: google/code-bison@002
543
+ model_name: google/code-bison@002
544
+ tokenizer_name: google/mt5-base # TODO #2188: change to actual tokenizer
545
+ max_sequence_length: 6000 # Officially 6144
546
+ max_sequence_and_generated_tokens_length: 7168
547
+ client_spec:
548
+ class_name: "helm.proxy.clients.vertexai_client.VertexAITextClient"
549
+ window_service_spec:
550
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
403
551
 
404
552
  - name: google/code-bison-32k
405
553
  model_name: google/code-bison-32k
406
- tokenizer_name: google/mt5-base
554
+ tokenizer_name: google/mt5-base # TODO #2188: change to actual tokenizer
407
555
  max_sequence_length: 32000
408
556
  max_sequence_and_generated_tokens_length: 32000
409
557
  client_spec:
410
- class_name: "helm.proxy.clients.vertexai_client.VertexAIClient"
411
- args: {}
412
-
413
-
558
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
559
+ window_service_spec:
560
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
414
561
 
415
562
  # HuggingFace
416
563
 
@@ -420,26 +567,238 @@ model_deployments:
420
567
  tokenizer_name: bigcode/santacoder
421
568
  max_sequence_length: 2048
422
569
  client_spec:
423
- class_name: "helm.proxy.clients.huggingface_client.HuggingFaceClient"
424
- args: {}
570
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
425
571
 
426
572
  - name: huggingface/starcoder
427
573
  model_name: bigcode/starcoder
428
574
  tokenizer_name: bigcode/starcoder
429
575
  max_sequence_length: 8192
430
576
  client_spec:
431
- class_name: "helm.proxy.clients.huggingface_client.HuggingFaceClient"
432
- args: {}
577
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
578
+
579
+ ## Databricks
580
+ - name: huggingface/dolly-v2-3b
581
+ model_name: databricks/dolly-v2-3b
582
+ tokenizer_name: EleutherAI/gpt-neox-20b
583
+ max_sequence_length: 2048
584
+ client_spec:
585
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
586
+
587
+ - name: huggingface/dolly-v2-7b
588
+ model_name: databricks/dolly-v2-7b
589
+ tokenizer_name: EleutherAI/gpt-neox-20b
590
+ max_sequence_length: 2048
591
+ client_spec:
592
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
593
+
594
+ - name: huggingface/dolly-v2-12b
595
+ model_name: databricks/dolly-v2-12b
596
+ tokenizer_name: EleutherAI/gpt-neox-20b
597
+ max_sequence_length: 2048
598
+ client_spec:
599
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
433
600
 
434
601
  ## EleutherAI
602
+ - name: huggingface/pythia-1b-v0
603
+ model_name: eleutherai/pythia-1b-v0
604
+ tokenizer_name: EleutherAI/gpt-neox-20b
605
+ max_sequence_length: 2048
606
+ client_spec:
607
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
608
+
609
+ - name: huggingface/pythia-2.8b-v0
610
+ model_name: eleutherai/pythia-2.8b-v0
611
+ tokenizer_name: EleutherAI/gpt-neox-20b
612
+ max_sequence_length: 2048
613
+ client_spec:
614
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
615
+
616
+ - name: huggingface/pythia-6.9b
617
+ model_name: eleutherai/pythia-6.9b
618
+ tokenizer_name: EleutherAI/gpt-neox-20b
619
+ max_sequence_length: 2048
620
+ client_spec:
621
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
622
+
623
+ - name: huggingface/pythia-12b-v0
624
+ model_name: eleutherai/pythia-12b-v0
625
+ tokenizer_name: EleutherAI/gpt-neox-20b
626
+ max_sequence_length: 2048
627
+ client_spec:
628
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
629
+
435
630
  - name: huggingface/gpt-j-6b
436
631
  model_name: eleutherai/gpt-j-6b
437
632
  tokenizer_name: EleutherAI/gpt-j-6B
438
633
  max_sequence_length: 2048
439
634
  max_request_length: 2049
440
635
  client_spec:
441
- class_name: "helm.proxy.clients.huggingface_client.HuggingFaceClient"
442
- args: {}
636
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
637
+
638
+ - name: huggingface/gpt-neox-20b
639
+ model_name: eleutherai/gpt-neox-20b
640
+ tokenizer_name: EleutherAI/gpt-neox-20b
641
+ max_sequence_length: 2048
642
+ max_request_length: 2049
643
+ client_spec:
644
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
645
+
646
+ ## LMSYS
647
+ - name: huggingface/vicuna-7b-v1.3
648
+ model_name: lmsys/vicuna-7b-v1.3
649
+ tokenizer_name: hf-internal-testing/llama-tokenizer
650
+ max_sequence_length: 2048
651
+ client_spec:
652
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
653
+
654
+ - name: huggingface/vicuna-13b-v1.3
655
+ model_name: lmsys/vicuna-13b-v1.3
656
+ tokenizer_name: hf-internal-testing/llama-tokenizer
657
+ max_sequence_length: 2048
658
+ client_spec:
659
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
660
+
661
+ ## Meta
662
+ - name: huggingface/opt-175b
663
+ model_name: meta/opt-175b
664
+ tokenizer_name: facebook/opt-66b
665
+ max_sequence_length: 2048
666
+ client_spec:
667
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
668
+ args:
669
+ pretrained_model_name_or_path: facebook/opt-175b
670
+
671
+ - name: huggingface/opt-66b
672
+ model_name: meta/opt-66b
673
+ tokenizer_name: facebook/opt-66b
674
+ max_sequence_length: 2048
675
+ client_spec:
676
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
677
+ args:
678
+ pretrained_model_name_or_path: facebook/opt-66b
679
+
680
+ - name: huggingface/opt-6.7b
681
+ model_name: meta/opt-6.7b
682
+ tokenizer_name: facebook/opt-66b
683
+ max_sequence_length: 2048
684
+ client_spec:
685
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
686
+ args:
687
+ pretrained_model_name_or_path: facebook/opt-6.7b
688
+
689
+ - name: huggingface/opt-1.3b
690
+ model_name: meta/opt-1.3b
691
+ tokenizer_name: facebook/opt-66b
692
+ max_sequence_length: 2048
693
+ client_spec:
694
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
695
+ args:
696
+ pretrained_model_name_or_path: facebook/opt-1.3b
697
+
698
+ ## Microsoft
699
+ - name: huggingface/llava-1.5-7b-hf
700
+ model_name: microsoft/llava-1.5-7b-hf
701
+ tokenizer_name: hf-internal-testing/llama-tokenizer
702
+ max_sequence_length: 2048
703
+ client_spec:
704
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
705
+
706
+ - name: huggingface/llava-1.5-13b-hf
707
+ model_name: microsoft/llava-1.5-13b-hf
708
+ tokenizer_name: hf-internal-testing/llama-tokenizer
709
+ max_sequence_length: 2048
710
+ client_spec:
711
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
712
+
713
+ - name: huggingface/llava-v1.6-vicuna-7b-hf
714
+ model_name: uw-madison/llava-v1.6-vicuna-7b-hf
715
+ tokenizer_name: hf-internal-testing/llama-tokenizer
716
+ max_sequence_length: 2048
717
+ client_spec:
718
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
719
+
720
+ - name: huggingface/llava-v1.6-vicuna-13b-hf
721
+ model_name: uw-madison/llava-v1.6-vicuna-13b-hf
722
+ tokenizer_name: hf-internal-testing/llama-tokenizer
723
+ max_sequence_length: 2048
724
+ client_spec:
725
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
726
+
727
+ - name: huggingface/llava-v1.6-mistral-7b-hf
728
+ model_name: uw-madison/llava-v1.6-mistral-7b-hf
729
+ tokenizer_name: hf-internal-testing/llama-tokenizer
730
+ max_sequence_length: 2048
731
+ client_spec:
732
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
733
+
734
+ - name: huggingface/llava-v1.6-34b-hf
735
+ model_name: uw-madison/llava-v1.6-34b-hf
736
+ tokenizer_name: hf-internal-testing/llama-tokenizer
737
+ max_sequence_length: 2048
738
+ client_spec:
739
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
740
+
741
+ ## OpenFlamingo
742
+ - name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
743
+ model_name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
744
+ tokenizer_name: anas-awadalla/mpt-7b
745
+ max_sequence_length: 2048
746
+ client_spec:
747
+ class_name: "helm.clients.vision_language.open_flamingo_client.OpenFlamingoClient"
748
+ args:
749
+ checkpoint_path: "openflamingo/OpenFlamingo-9B-vitl-mpt7b"
750
+ tokenizer_name: "anas-awadalla-2/mpt-7b"
751
+ cross_attn_every_n_layers: 4
752
+
753
+ - name: together/phi-2
754
+ model_name: microsoft/phi-2
755
+ tokenizer_name: microsoft/phi-2
756
+ max_sequence_length: 2047
757
+ client_spec:
758
+ class_name: "helm.clients.together_client.TogetherClient"
759
+
760
+ ## Mistral AI
761
+ - name: huggingface/bakLlava-v1-hf
762
+ model_name: mistralai/bakLlava-v1-hf
763
+ tokenizer_name: hf-internal-testing/llama-tokenizer
764
+ max_sequence_length: 2048
765
+ client_spec:
766
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
767
+
768
+ ## MosaicML
769
+ - name: huggingface/mpt-7b
770
+ model_name: mosaicml/mpt-7b
771
+ tokenizer_name: EleutherAI/gpt-neox-20b
772
+ max_sequence_length: 2048
773
+ client_spec:
774
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
775
+ args:
776
+ pretrained_model_name_or_path: mosaicml/mpt-7b
777
+
778
+ - name: huggingface/mpt-instruct-7b
779
+ model_name: mosaicml/mpt-instruct-7b
780
+ tokenizer_name: EleutherAI/gpt-neox-20b
781
+ max_sequence_length: 2048
782
+ client_spec:
783
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
784
+ args:
785
+ pretrained_model_name_or_path: mosaicml/mpt-7b-instruct
786
+
787
+ - name: huggingface/mpt-30b
788
+ model_name: mosaicml/mpt-30b
789
+ tokenizer_name: EleutherAI/gpt-neox-20b
790
+ max_sequence_length: 2048
791
+ client_spec:
792
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
793
+
794
+ - name: huggingface/mpt-instruct-30b
795
+ model_name: mosaicml/mpt-instruct-30b
796
+ tokenizer_name: EleutherAI/gpt-neox-20b
797
+ max_sequence_length: 2048
798
+ client_spec:
799
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
800
+ args:
801
+ pretrained_model_name_or_path: mosaicml/mpt-30b-instruct
443
802
 
444
803
  ## OpenAI
445
804
  - name: huggingface/gpt2
@@ -448,43 +807,246 @@ model_deployments:
448
807
  max_sequence_length: 1024
449
808
  max_request_length: 1025
450
809
  client_spec:
451
- class_name: "helm.proxy.clients.huggingface_client.HuggingFaceClient"
452
- args: {}
810
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
811
+ args:
812
+ pretrained_model_name_or_path: openai-community/gpt2
813
+
814
+ ## StabilityAI
815
+ - name: huggingface/stablelm-base-alpha-3b
816
+ model_name: stabilityai/stablelm-base-alpha-3b
817
+ tokenizer_name: EleutherAI/gpt-neox-20b
818
+ max_sequence_length: 4096
819
+ client_spec:
820
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
821
+
822
+ - name: huggingface/stablelm-base-alpha-7b
823
+ model_name: stabilityai/stablelm-base-alpha-7b
824
+ tokenizer_name: EleutherAI/gpt-neox-20b
825
+ max_sequence_length: 4096
826
+ client_spec:
827
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
828
+
829
+ ## Text-to-Image Diffusion Models
830
+
831
+ - name: huggingface/dreamlike-diffusion-v1-0
832
+ model_name: huggingface/dreamlike-diffusion-v1-0
833
+ tokenizer_name: openai/clip-vit-large-patch14
834
+ max_sequence_length: 75
835
+ client_spec:
836
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
837
+ window_service_spec:
838
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
839
+
840
+ - name: huggingface/dreamlike-photoreal-v2-0
841
+ model_name: huggingface/dreamlike-photoreal-v2-0
842
+ tokenizer_name: openai/clip-vit-large-patch14
843
+ max_sequence_length: 75
844
+ client_spec:
845
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
846
+ window_service_spec:
847
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
848
+
849
+ - name: huggingface/openjourney-v1-0
850
+ model_name: huggingface/openjourney-v1-0
851
+ tokenizer_name: openai/clip-vit-large-patch14
852
+ max_sequence_length: 75
853
+ client_spec:
854
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
855
+ window_service_spec:
856
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
857
+
858
+ - name: huggingface/openjourney-v2-0
859
+ model_name: huggingface/openjourney-v2-0
860
+ tokenizer_name: openai/clip-vit-large-patch14
861
+ max_sequence_length: 75
862
+ client_spec:
863
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
864
+ window_service_spec:
865
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
866
+
867
+ - name: huggingface/redshift-diffusion
868
+ model_name: huggingface/redshift-diffusion
869
+ tokenizer_name: openai/clip-vit-large-patch14
870
+ max_sequence_length: 75
871
+ client_spec:
872
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
873
+ window_service_spec:
874
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
875
+
876
+ - name: huggingface/promptist-stable-diffusion-v1-4
877
+ model_name: huggingface/promptist-stable-diffusion-v1-4
878
+ tokenizer_name: openai/clip-vit-large-patch14
879
+ max_sequence_length: 75
880
+ client_spec:
881
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
882
+ window_service_spec:
883
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
884
+
885
+ - name: huggingface/stable-diffusion-v1-4
886
+ model_name: huggingface/stable-diffusion-v1-4
887
+ tokenizer_name: openai/clip-vit-large-patch14
888
+ max_sequence_length: 75
889
+ client_spec:
890
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
891
+ window_service_spec:
892
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
893
+
894
+ - name: huggingface/stable-diffusion-v1-5
895
+ model_name: huggingface/stable-diffusion-v1-5
896
+ tokenizer_name: openai/clip-vit-large-patch14
897
+ max_sequence_length: 75
898
+ client_spec:
899
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
900
+ window_service_spec:
901
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
902
+
903
+ - name: huggingface/stable-diffusion-v2-base
904
+ model_name: huggingface/stable-diffusion-v2-base
905
+ tokenizer_name: openai/clip-vit-large-patch14
906
+ max_sequence_length: 75
907
+ client_spec:
908
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
909
+ window_service_spec:
910
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
911
+
912
+ - name: huggingface/stable-diffusion-v2-1-base
913
+ model_name: huggingface/stable-diffusion-v2-1-base
914
+ tokenizer_name: openai/clip-vit-large-patch14
915
+ max_sequence_length: 75
916
+ client_spec:
917
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
918
+ window_service_spec:
919
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
920
+
921
+ - name: huggingface/stable-diffusion-safe-weak
922
+ model_name: huggingface/stable-diffusion-safe-weak
923
+ tokenizer_name: openai/clip-vit-large-patch14
924
+ max_sequence_length: 75
925
+ client_spec:
926
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
927
+ window_service_spec:
928
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
929
+
930
+ - name: huggingface/stable-diffusion-safe-medium
931
+ model_name: huggingface/stable-diffusion-safe-medium
932
+ tokenizer_name: openai/clip-vit-large-patch14
933
+ max_sequence_length: 75
934
+ client_spec:
935
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
936
+ window_service_spec:
937
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
938
+
939
+ - name: huggingface/stable-diffusion-safe-strong
940
+ model_name: huggingface/stable-diffusion-safe-strong
941
+ tokenizer_name: openai/clip-vit-large-patch14
942
+ max_sequence_length: 75
943
+ client_spec:
944
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
945
+ window_service_spec:
946
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
947
+
948
+ - name: huggingface/stable-diffusion-safe-max
949
+ model_name: huggingface/stable-diffusion-safe-max
950
+ tokenizer_name: openai/clip-vit-large-patch14
951
+ max_sequence_length: 75
952
+ client_spec:
953
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
954
+ window_service_spec:
955
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
956
+
957
+ - name: huggingface/vintedois-diffusion-v0-1
958
+ model_name: huggingface/vintedois-diffusion-v0-1
959
+ tokenizer_name: openai/clip-vit-large-patch14
960
+ max_sequence_length: 75
961
+ client_spec:
962
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
963
+ window_service_spec:
964
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
965
+
966
+ - name: segmind/Segmind-Vega
967
+ model_name: segmind/Segmind-Vega
968
+ tokenizer_name: openai/clip-vit-large-patch14
969
+ max_sequence_length: 75
970
+ client_spec:
971
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
972
+ window_service_spec:
973
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
974
+
975
+ - name: segmind/SSD-1B
976
+ model_name: segmind/SSD-1B
977
+ tokenizer_name: openai/clip-vit-large-patch14
978
+ max_sequence_length: 75
979
+ client_spec:
980
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
981
+ window_service_spec:
982
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
983
+
984
+ - name: stabilityai/stable-diffusion-xl-base-1.0
985
+ model_name: stabilityai/stable-diffusion-xl-base-1.0
986
+ tokenizer_name: openai/clip-vit-large-patch14
987
+ max_sequence_length: 75
988
+ client_spec:
989
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
990
+ window_service_spec:
991
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
453
992
 
454
993
  # HuggingFaceM4
994
+ - name: HuggingFaceM4/idefics2-8b
995
+ model_name: HuggingFaceM4/idefics2-8b
996
+ # From https://huggingface.co/docs/transformers/main/en/model_doc/idefics2,
997
+ # "constructs a IDEFICS2 processor which wraps a LLama tokenizer."
998
+ tokenizer_name: hf-internal-testing/llama-tokenizer
999
+ max_sequence_length: 2048
1000
+ client_spec:
1001
+ class_name: "helm.clients.vision_language.huggingface_vision2seq_client.HuggingFaceVision2SeqClient"
1002
+
455
1003
  - name: HuggingFaceM4/idefics-9b
456
1004
  model_name: HuggingFaceM4/idefics-9b
457
1005
  tokenizer_name: HuggingFaceM4/idefics-9b
458
1006
  max_sequence_length: 2048
459
1007
  client_spec:
460
- class_name: "helm.proxy.clients.vision_language.idefics_client.IDEFICSClient"
461
- args: {}
1008
+ class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
462
1009
 
463
1010
  - name: HuggingFaceM4/idefics-9b-instruct
464
1011
  model_name: HuggingFaceM4/idefics-9b-instruct
465
1012
  tokenizer_name: HuggingFaceM4/idefics-9b-instruct
466
1013
  max_sequence_length: 2048
467
1014
  client_spec:
468
- class_name: "helm.proxy.clients.vision_language.idefics_client.IDEFICSClient"
469
- args: {}
1015
+ class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
470
1016
 
471
1017
  - name: HuggingFaceM4/idefics-80b
472
1018
  model_name: HuggingFaceM4/idefics-80b
473
1019
  tokenizer_name: HuggingFaceM4/idefics-80b
474
1020
  max_sequence_length: 2048
475
1021
  client_spec:
476
- class_name: "helm.proxy.clients.vision_language.idefics_client.IDEFICSClient"
477
- args: {}
1022
+ class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
478
1023
 
479
1024
  - name: HuggingFaceM4/idefics-80b-instruct
480
1025
  model_name: HuggingFaceM4/idefics-80b-instruct
481
1026
  tokenizer_name: HuggingFaceM4/idefics-80b-instruct
482
1027
  max_sequence_length: 2048
483
1028
  client_spec:
484
- class_name: "helm.proxy.clients.vision_language.idefics_client.IDEFICSClient"
485
- args: {}
1029
+ class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
486
1030
 
1031
+ # Lexica
1032
+ - name: lexica/search-stable-diffusion-1.5
1033
+ model_name: lexica/search-stable-diffusion-1.5
1034
+ tokenizer_name: openai/clip-vit-large-patch14
1035
+ max_sequence_length: 200
1036
+ client_spec:
1037
+ class_name: "helm.clients.image_generation.lexica_client.LexicaClient"
1038
+ window_service_spec:
1039
+ class_name: "helm.benchmark.window_services.image_generation.lexica_search_window_service.LexicaSearchWindowService"
487
1040
 
1041
+ # Kakao
1042
+ - name: kakaobrain/mindall-e
1043
+ model_name: kakaobrain/mindall-e
1044
+ tokenizer_name: openai/clip-vit-large-patch14
1045
+ max_sequence_length: 75
1046
+ client_spec:
1047
+ class_name: "helm.clients.image_generation.mindalle_client.MinDALLEClient"
1048
+ window_service_spec:
1049
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
488
1050
 
489
1051
  # Lighting AI
490
1052
  - name: lightningai/lit-gpt
@@ -492,44 +1054,49 @@ model_deployments:
492
1054
  tokenizer_name: lightningai/lit-gpt
493
1055
  max_sequence_length: 2048
494
1056
  client_spec:
495
- class_name: "helm.proxy.clients.lit_gpt_client.LitGPTClient"
1057
+ class_name: "helm.clients.lit_gpt_client.LitGPTClient"
496
1058
  args:
497
1059
  checkpoint_dir: "" # Path to the checkpoint directory
498
1060
  precision: bf16-true
499
1061
 
500
-
501
-
502
- # Microsoft
503
- - name: microsoft/TNLGv2_530B
504
- model_name: microsoft/TNLGv2_530B
505
- tokenizer_name: microsoft/gpt2
506
- max_sequence_length: 2047
507
- max_request_length: 2048
1062
+ # Mistral AI
1063
+ - name: mistralai/mistral-tiny
1064
+ model_name: mistralai/mistral-7b-v0.1
1065
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1066
+ max_sequence_length: 32000
508
1067
  client_spec:
509
- class_name: "helm.proxy.clients.microsoft_client.MicrosoftClient"
510
- args: {}
1068
+ class_name: "helm.clients.mistral_client.MistralAIClient"
1069
+ args:
1070
+ mistral_model: "mistral-tiny"
511
1071
 
512
- - name: microsoft/TNLGv2_7B
513
- model_name: microsoft/TNLGv2_7B
514
- tokenizer_name: microsoft/gpt2
515
- max_sequence_length: 2047
516
- max_request_length: 2048
1072
+ - name: mistralai/mistral-small-2402
1073
+ model_name: mistralai/mistral-small-2402
1074
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1075
+ max_sequence_length: 32000
517
1076
  client_spec:
518
- class_name: "helm.proxy.clients.microsoft_client.MicrosoftClient"
519
- args: {}
1077
+ class_name: "helm.clients.mistral_client.MistralAIClient"
520
1078
 
1079
+ - name: mistralai/mistral-medium-2312
1080
+ model_name: mistralai/mistral-medium-2312
1081
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1082
+ max_sequence_length: 32000
1083
+ client_spec:
1084
+ class_name: "helm.clients.mistral_client.MistralAIClient"
521
1085
 
1086
+ - name: mistralai/mistral-large-2402
1087
+ model_name: mistralai/mistral-large-2402
1088
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1089
+ max_sequence_length: 32000
1090
+ client_spec:
1091
+ class_name: "helm.clients.mistral_client.MistralAIClient"
522
1092
 
523
1093
  # Neurips
524
1094
  - name: neurips/local
525
1095
  model_name: neurips/local
526
1096
  tokenizer_name: neurips/local
527
1097
  max_sequence_length: 2048
528
- client_spec:
529
- class_name: "helm.proxy.clients.http_model_client.HTTPModelClient"
530
- args: {}
531
-
532
-
1098
+ client_spec:
1099
+ class_name: "helm.clients.http_model_client.HTTPModelClient"
533
1100
 
534
1101
  # Nvidia
535
1102
  - name: nvidia/megatron-gpt2
@@ -537,17 +1104,33 @@ model_deployments:
537
1104
  tokenizer_name: huggingface/gpt2
538
1105
  max_sequence_length: 1024
539
1106
  client_spec:
540
- class_name: "helm.proxy.clients.megatron_client.MegatronClient"
541
- args: {}
542
-
543
-
1107
+ class_name: "helm.clients.megatron_client.MegatronClient"
544
1108
 
545
1109
  # OpenAI
546
1110
 
547
1111
  ## GPT 3 Models
1112
+
1113
+ - name: openai/davinci-002
1114
+ model_name: openai/davinci-002
1115
+ tokenizer_name: openai/cl100k_base
1116
+ # Claimed sequence length is 16,384 tokens but we round down to 16,000 tokens
1117
+ # to provide a margin of error.
1118
+ max_sequence_length: 16000
1119
+ client_spec:
1120
+ class_name: "helm.clients.openai_client.OpenAIClient"
1121
+
1122
+ - name: openai/babbage-002
1123
+ model_name: openai/babbage-002
1124
+ tokenizer_name: openai/cl100k_base
1125
+ # Claimed sequence length is 16,384 tokens but we round down to 16,000 tokens
1126
+ # to provide a margin of error.
1127
+ max_sequence_length: 16000
1128
+ client_spec:
1129
+ class_name: "helm.clients.openai_client.OpenAIClient"
1130
+
548
1131
  # The list of models can be found here: https://beta.openai.com/docs/engines/gpt-3
549
1132
  # DEPRECATED: Announced on July 06 2023 that these models will be shut down on January 04 2024.
550
-
1133
+
551
1134
  - name: openai/davinci
552
1135
  deprecated: true
553
1136
  model_name: openai/davinci
@@ -555,8 +1138,7 @@ model_deployments:
555
1138
  max_sequence_length: 2048
556
1139
  max_request_length: 2049
557
1140
  client_spec:
558
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
559
- args: {}
1141
+ class_name: "helm.clients.openai_client.OpenAIClient"
560
1142
 
561
1143
  - name: openai/curie
562
1144
  deprecated: true
@@ -565,8 +1147,7 @@ model_deployments:
565
1147
  max_sequence_length: 2048
566
1148
  max_request_length: 2049
567
1149
  client_spec:
568
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
569
- args: {}
1150
+ class_name: "helm.clients.openai_client.OpenAIClient"
570
1151
 
571
1152
  - name: openai/babbage
572
1153
  deprecated: true
@@ -575,8 +1156,7 @@ model_deployments:
575
1156
  max_sequence_length: 2048
576
1157
  max_request_length: 2049
577
1158
  client_spec:
578
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
579
- args: {}
1159
+ class_name: "helm.clients.openai_client.OpenAIClient"
580
1160
 
581
1161
  - name: openai/ada
582
1162
  deprecated: true
@@ -585,8 +1165,7 @@ model_deployments:
585
1165
  max_sequence_length: 2048
586
1166
  max_request_length: 2049
587
1167
  client_spec:
588
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
589
- args: {}
1168
+ class_name: "helm.clients.openai_client.OpenAIClient"
590
1169
 
591
1170
  - name: openai/text-davinci-003
592
1171
  deprecated: true
@@ -595,8 +1174,7 @@ model_deployments:
595
1174
  max_sequence_length: 4000
596
1175
  max_request_length: 4001
597
1176
  client_spec:
598
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
599
- args: {}
1177
+ class_name: "helm.clients.openai_client.OpenAIClient"
600
1178
 
601
1179
  - name: openai/text-davinci-002
602
1180
  deprecated: true
@@ -605,8 +1183,7 @@ model_deployments:
605
1183
  max_sequence_length: 4000
606
1184
  max_request_length: 4001
607
1185
  client_spec:
608
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
609
- args: {}
1186
+ class_name: "helm.clients.openai_client.OpenAIClient"
610
1187
 
611
1188
  - name: openai/text-davinci-001
612
1189
  deprecated: true
@@ -615,8 +1192,7 @@ model_deployments:
615
1192
  max_sequence_length: 2048
616
1193
  max_request_length: 2049
617
1194
  client_spec:
618
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
619
- args: {}
1195
+ class_name: "helm.clients.openai_client.OpenAIClient"
620
1196
 
621
1197
  - name: openai/text-curie-001
622
1198
  deprecated: true
@@ -625,8 +1201,7 @@ model_deployments:
625
1201
  max_sequence_length: 2048
626
1202
  max_request_length: 2049
627
1203
  client_spec:
628
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
629
- args: {}
1204
+ class_name: "helm.clients.openai_client.OpenAIClient"
630
1205
 
631
1206
  - name: openai/text-babbage-001
632
1207
  deprecated: true
@@ -635,8 +1210,7 @@ model_deployments:
635
1210
  max_sequence_length: 2048
636
1211
  max_request_length: 2049
637
1212
  client_spec:
638
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
639
- args: {}
1213
+ class_name: "helm.clients.openai_client.OpenAIClient"
640
1214
 
641
1215
  - name: openai/text-ada-001
642
1216
  deprecated: true
@@ -645,13 +1219,19 @@ model_deployments:
645
1219
  max_sequence_length: 2048
646
1220
  max_request_length: 2049
647
1221
  client_spec:
648
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
649
- args: {}
650
-
1222
+ class_name: "helm.clients.openai_client.OpenAIClient"
651
1223
 
652
1224
  ## GPT 3.5 Turbo Models
653
1225
  # ChatGPT: https://openai.com/blog/chatgpt
654
1226
 
1227
+ - name: openai/gpt-3.5-turbo-instruct
1228
+ model_name: openai/gpt-3.5-turbo-instruct
1229
+ tokenizer_name: openai/cl100k_base
1230
+ max_sequence_length: 4096
1231
+ max_request_length: 4097
1232
+ client_spec:
1233
+ class_name: "helm.clients.openai_client.OpenAIClient"
1234
+
655
1235
  # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable
656
1236
  # sequence length is smaller at 4087 with one user input message and one assistant
657
1237
  # output message because ChatGPT uses special tokens for message roles and boundaries.
@@ -662,8 +1242,7 @@ model_deployments:
662
1242
  max_sequence_length: 4000
663
1243
  max_request_length: 4001
664
1244
  client_spec:
665
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
666
- args: {}
1245
+ class_name: "helm.clients.openai_client.OpenAIClient"
667
1246
 
668
1247
  # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable
669
1248
  # sequence length is smaller at 4087 with one user input message and one assistant
@@ -675,8 +1254,7 @@ model_deployments:
675
1254
  max_sequence_length: 4000
676
1255
  max_request_length: 4001
677
1256
  client_spec:
678
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
679
- args: {}
1257
+ class_name: "helm.clients.openai_client.OpenAIClient"
680
1258
 
681
1259
  # Claimed length is 16,384; we round down to 16,000 for the same reasons as explained
682
1260
  # in the openai/gpt-3.5-turbo-0613 comment
@@ -686,9 +1264,26 @@ model_deployments:
686
1264
  max_sequence_length: 16000
687
1265
  max_request_length: 16001
688
1266
  client_spec:
689
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
690
- args: {}
1267
+ class_name: "helm.clients.openai_client.OpenAIClient"
1268
+
1269
+ # Claimed length is 16,384; we round down to 16,000 for the same reasons as explained
1270
+ # in the openai/gpt-3.5-turbo-0613 comment
1271
+ - name: openai/gpt-3.5-turbo-1106
1272
+ model_name: openai/gpt-3.5-turbo-1106
1273
+ tokenizer_name: openai/cl100k_base
1274
+ max_sequence_length: 16000
1275
+ max_request_length: 16001
1276
+ client_spec:
1277
+ class_name: "helm.clients.openai_client.OpenAIClient"
691
1278
 
1279
+ # Claimed length is 16,384; we round down to 16,000 for the same reasons as explained
1280
+ # in the openai/gpt-3.5-turbo-0613 comment
1281
+ - name: openai/gpt-3.5-turbo-0125
1282
+ model_name: openai/gpt-3.5-turbo-0125
1283
+ tokenizer_name: openai/cl100k_base
1284
+ max_sequence_length: 16000
1285
+ client_spec:
1286
+ class_name: "helm.clients.openai_client.OpenAIClient"
692
1287
 
693
1288
  ## GPT 4 Models
694
1289
 
@@ -701,8 +1296,7 @@ model_deployments:
701
1296
  max_sequence_length: 128000
702
1297
  max_request_length: 128001
703
1298
  client_spec:
704
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
705
- args: {}
1299
+ class_name: "helm.clients.openai_client.OpenAIClient"
706
1300
 
707
1301
  - name: openai/gpt-4-0314
708
1302
  model_name: openai/gpt-4-0314
@@ -710,8 +1304,7 @@ model_deployments:
710
1304
  max_sequence_length: 8192
711
1305
  max_request_length: 8193
712
1306
  client_spec:
713
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
714
- args: {}
1307
+ class_name: "helm.clients.openai_client.OpenAIClient"
715
1308
 
716
1309
  - name: openai/gpt-4-32k-0314
717
1310
  model_name: openai/gpt-4-32k-0314
@@ -719,8 +1312,7 @@ model_deployments:
719
1312
  max_sequence_length: 32768
720
1313
  max_request_length: 32769
721
1314
  client_spec:
722
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
723
- args: {}
1315
+ class_name: "helm.clients.openai_client.OpenAIClient"
724
1316
 
725
1317
  - name: openai/gpt-4-0613
726
1318
  model_name: openai/gpt-4-0613
@@ -728,8 +1320,7 @@ model_deployments:
728
1320
  max_sequence_length: 8192
729
1321
  max_request_length: 8193
730
1322
  client_spec:
731
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
732
- args: {}
1323
+ class_name: "helm.clients.openai_client.OpenAIClient"
733
1324
 
734
1325
  - name: openai/gpt-4-32k-0613
735
1326
  model_name: openai/gpt-4-32k-0613
@@ -737,9 +1328,43 @@ model_deployments:
737
1328
  max_sequence_length: 32768
738
1329
  max_request_length: 32769
739
1330
  client_spec:
740
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
741
- args: {}
1331
+ class_name: "helm.clients.openai_client.OpenAIClient"
1332
+
1333
+ - name: openai/gpt-4-0125-preview
1334
+ model_name: openai/gpt-4-0125-preview
1335
+ tokenizer_name: openai/cl100k_base
1336
+ # According to https://help.openai.com/en/articles/8555510-gpt-4-turbo,
1337
+ # the maximum number of output tokens for this model is 4096
1338
+ # TODO: add max_generated_tokens_length of 4096 https://github.com/stanford-crfm/helm/issues/2098
1339
+ max_sequence_length: 128000
1340
+ max_request_length: 128001
1341
+ client_spec:
1342
+ class_name: "helm.clients.openai_client.OpenAIClient"
1343
+
1344
+ - name: openai/gpt-4-turbo-2024-04-09
1345
+ model_name: openai/gpt-4-turbo-2024-04-09
1346
+ tokenizer_name: openai/cl100k_base
1347
+ max_sequence_length: 128000
1348
+ client_spec:
1349
+ class_name: "helm.clients.openai_client.OpenAIClient"
1350
+
1351
+ - name: openai/gpt-4-vision-preview
1352
+ model_name: openai/gpt-4-vision-preview
1353
+ tokenizer_name: openai/cl100k_base
1354
+ max_sequence_length: 128000 # According to https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
1355
+ max_request_length: 128001
1356
+ max_sequence_and_generated_tokens_length: 132096
1357
+ client_spec:
1358
+ class_name: "helm.clients.openai_client.OpenAIClient"
742
1359
 
1360
+ - name: openai/gpt-4-1106-vision-preview
1361
+ model_name: openai/gpt-4-1106-vision-preview
1362
+ tokenizer_name: openai/cl100k_base
1363
+ max_sequence_length: 128000 # According to https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
1364
+ max_request_length: 128001
1365
+ max_sequence_and_generated_tokens_length: 132096
1366
+ client_spec:
1367
+ class_name: "helm.clients.openai_client.OpenAIClient"
743
1368
 
744
1369
  ## Codex Models
745
1370
  # DEPRECATED: Codex models have been shut down on March 23 2023.
@@ -751,8 +1376,7 @@ model_deployments:
751
1376
  max_sequence_length: 4000
752
1377
  max_request_length: 4001
753
1378
  client_spec:
754
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
755
- args: {}
1379
+ class_name: "helm.clients.openai_client.OpenAIClient"
756
1380
 
757
1381
  - name: openai/code-davinci-001
758
1382
  deprecated: true
@@ -761,8 +1385,7 @@ model_deployments:
761
1385
  max_sequence_length: 2048
762
1386
  max_request_length: 2049
763
1387
  client_spec:
764
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
765
- args: {}
1388
+ class_name: "helm.clients.openai_client.OpenAIClient"
766
1389
 
767
1390
  - name: openai/code-cushman-001
768
1391
  deprecated: true
@@ -771,10 +1394,8 @@ model_deployments:
771
1394
  max_sequence_length: 2048
772
1395
  max_request_length: 2049
773
1396
  client_spec:
774
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
775
- args: {}
1397
+ class_name: "helm.clients.openai_client.OpenAIClient"
776
1398
 
777
-
778
1399
  ## Text Similarity Models
779
1400
  # OpenAI similarity embedding models: https://beta.openai.com/docs/guides/embeddings
780
1401
  # The number of parameters is guessed based on the number of parameters of the
@@ -789,8 +1410,7 @@ model_deployments:
789
1410
  max_sequence_length: 2048
790
1411
  max_request_length: 2049
791
1412
  client_spec:
792
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
793
- args: {}
1413
+ class_name: "helm.clients.openai_client.OpenAIClient"
794
1414
 
795
1415
  - name: openai/text-similarity-curie-001
796
1416
  deprecated: true
@@ -799,8 +1419,7 @@ model_deployments:
799
1419
  max_sequence_length: 2048
800
1420
  max_request_length: 2049
801
1421
  client_spec:
802
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
803
- args: {}
1422
+ class_name: "helm.clients.openai_client.OpenAIClient"
804
1423
 
805
1424
  - name: openai/text-similarity-babbage-001
806
1425
  deprecated: true
@@ -809,8 +1428,7 @@ model_deployments:
809
1428
  max_sequence_length: 2048
810
1429
  max_request_length: 2049
811
1430
  client_spec:
812
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
813
- args: {}
1431
+ class_name: "helm.clients.openai_client.OpenAIClient"
814
1432
 
815
1433
  - name: openai/text-similarity-ada-001
816
1434
  deprecated: true
@@ -819,8 +1437,7 @@ model_deployments:
819
1437
  max_sequence_length: 2048
820
1438
  max_request_length: 2049
821
1439
  client_spec:
822
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
823
- args: {}
1440
+ class_name: "helm.clients.openai_client.OpenAIClient"
824
1441
 
825
1442
  # As of 2023-11-07, text-embedding-ada-002 is not deprecated:
826
1443
  # "We recommend using text-embedding-ada-002 for nearly all use cases."
@@ -831,10 +1448,53 @@ model_deployments:
831
1448
  max_sequence_length: 2048
832
1449
  max_request_length: 2049
833
1450
  client_spec:
834
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
835
- args: {}
1451
+ class_name: "helm.clients.openai_client.OpenAIClient"
1452
+
1453
+ # Text-to-image models
1454
+ - name: openai/dall-e-2
1455
+ model_name: openai/dall-e-2
1456
+ tokenizer_name: openai/clip-vit-large-patch14
1457
+ max_sequence_length: 1000
1458
+ client_spec:
1459
+ class_name: "helm.clients.image_generation.dalle2_client.DALLE2Client"
1460
+ window_service_spec:
1461
+ class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
1462
+
1463
+ - name: openai/dall-e-3
1464
+ model_name: openai/dall-e-3
1465
+ tokenizer_name: openai/clip-vit-large-patch14
1466
+ max_sequence_length: 1000
1467
+ client_spec:
1468
+ class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
1469
+ window_service_spec:
1470
+ class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
1471
+
1472
+ - name: openai/dall-e-3-natural
1473
+ model_name: openai/dall-e-3-natural
1474
+ tokenizer_name: openai/clip-vit-large-patch14
1475
+ max_sequence_length: 1000
1476
+ client_spec:
1477
+ class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
1478
+ window_service_spec:
1479
+ class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
836
1480
 
1481
+ - name: openai/dall-e-3-hd
1482
+ model_name: openai/dall-e-3-hd
1483
+ tokenizer_name: openai/clip-vit-large-patch14
1484
+ max_sequence_length: 1000
1485
+ client_spec:
1486
+ class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
1487
+ window_service_spec:
1488
+ class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
837
1489
 
1490
+ - name: openai/dall-e-3-hd-natural
1491
+ model_name: openai/dall-e-3-hd-natural
1492
+ tokenizer_name: openai/clip-vit-large-patch14
1493
+ max_sequence_length: 1000
1494
+ client_spec:
1495
+ class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
1496
+ window_service_spec:
1497
+ class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
838
1498
 
839
1499
  # Together
840
1500
  # The list of models served by Together changes often, to check the latest list, visit:
@@ -844,275 +1504,154 @@ model_deployments:
844
1504
 
845
1505
  ## BigScience
846
1506
  - name: together/bloom
847
- deprecated: true # Removed from together
1507
+ deprecated: true # Removed from Together
848
1508
  model_name: bigscience/bloom
849
1509
  tokenizer_name: bigscience/bloom
850
1510
  max_sequence_length: 2048
851
1511
  max_request_length: 2049
852
1512
  client_spec:
853
- class_name: "helm.proxy.clients.together_client.TogetherClient"
854
- args: {}
1513
+ class_name: "helm.clients.together_client.TogetherClient"
855
1514
 
856
1515
  - name: together/t0pp
857
- deprecated: true # Removed from together
1516
+ deprecated: true # Removed from Together
858
1517
  model_name: bigscience/t0pp
859
1518
  tokenizer_name: bigscience/T0pp
860
1519
  max_sequence_length: 1024
861
1520
  client_spec:
862
- class_name: "helm.proxy.clients.together_client.TogetherClient"
863
- args: {}
1521
+ class_name: "helm.clients.together_client.TogetherClient"
864
1522
  window_service_spec:
865
- class_name: "helm.benchmark.window_services.t0pp_window_service.T0ppWindowService"
866
- args: {}
867
-
868
- ## Databricks
869
- - name: together/dolly-v2-3b
870
- model_name: databricks/dolly-v2-3b
871
- tokenizer_name: EleutherAI/gpt-neox-20b
872
- max_sequence_length: 2048
873
- max_request_length: 2049
874
- client_spec:
875
- class_name: "helm.proxy.clients.together_client.TogetherClient"
876
- args: {}
877
-
878
- - name: together/dolly-v2-7b
879
- model_name: databricks/dolly-v2-7b
880
- tokenizer_name: EleutherAI/gpt-neox-20b
881
- max_sequence_length: 2048
882
- max_request_length: 2049
883
- client_spec:
884
- class_name: "helm.proxy.clients.together_client.TogetherClient"
885
- args: {}
886
-
887
- - name: together/dolly-v2-12b
888
- model_name: databricks/dolly-v2-12b
889
- tokenizer_name: EleutherAI/gpt-neox-20b
890
- max_sequence_length: 2048
891
- max_request_length: 2049
892
- client_spec:
893
- class_name: "helm.proxy.clients.together_client.TogetherClient"
894
- args: {}
895
-
896
- ## EleutherAI
897
- - name: together/gpt-j-6b
898
- deprecated: true # Removed from together
899
- model_name: eleutherai/gpt-j-6b
900
- tokenizer_name: EleutherAI/gpt-j-6B
901
- max_sequence_length: 2048
902
- max_request_length: 2049
903
- client_spec:
904
- class_name: "helm.proxy.clients.together_client.TogetherClient"
905
- args: {}
906
-
907
- - name: together/gpt-neox-20b
908
- deprecated: true # Removed from together
909
- model_name: eleutherai/gpt-neox-20b
910
- tokenizer_name: EleutherAI/gpt-neox-20b
911
- max_sequence_length: 2048
912
- max_request_length: 2049
913
- client_spec:
914
- class_name: "helm.proxy.clients.together_client.TogetherClient"
915
- args: {}
916
-
917
- - name: together/pythia-1b-v0
918
- model_name: eleutherai/pythia-1b-v0
919
- tokenizer_name: EleutherAI/gpt-neox-20b
920
- max_sequence_length: 2048
921
- max_request_length: 2049
922
- client_spec:
923
- class_name: "helm.proxy.clients.together_client.TogetherClient"
924
- args: {}
925
-
926
- - name: together/pythia-2.8b-v0
927
- model_name: eleutherai/pythia-2.8b-v0
928
- tokenizer_name: EleutherAI/gpt-neox-20b
929
- max_sequence_length: 2048
930
- max_request_length: 2049
931
- client_spec:
932
- class_name: "helm.proxy.clients.together_client.TogetherClient"
933
- args: {}
934
-
935
- - name: together/pythia-6.9b
936
- model_name: eleutherai/pythia-6.9b
937
- tokenizer_name: EleutherAI/gpt-neox-20b
938
- max_sequence_length: 2048
939
- max_request_length: 2049
940
- client_spec:
941
- class_name: "helm.proxy.clients.together_client.TogetherClient"
942
- args: {}
943
-
944
- - name: together/pythia-12b-v0
945
- model_name: eleutherai/pythia-12b-v0
946
- tokenizer_name: EleutherAI/gpt-neox-20b
947
- max_sequence_length: 2048
948
- max_request_length: 2049
949
- client_spec:
950
- class_name: "helm.proxy.clients.together_client.TogetherClient"
951
- args: {}
1523
+ class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
952
1524
 
953
1525
  ## Google
954
1526
  - name: together/t5-11b
955
- deprecated: true # Removed from together
1527
+ deprecated: true # Removed from Together
956
1528
  model_name: google/t5-11b
957
1529
  tokenizer_name: google/t5-11b
958
1530
  max_sequence_length: 511
959
1531
  client_spec:
960
- class_name: "helm.proxy.clients.together_client.TogetherClient"
961
- args: {}
1532
+ class_name: "helm.clients.together_client.TogetherClient"
962
1533
  window_service_spec:
963
- class_name: "helm.benchmark.window_services.t511b_window_service.T511bWindowService"
964
- args: {}
1534
+ class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
965
1535
 
966
1536
  - name: together/flan-t5-xxl
967
- deprecated: true # Removed from together
1537
+ deprecated: true # Removed from Together
968
1538
  model_name: google/flan-t5-xxl
969
1539
  tokenizer_name: google/flan-t5-xxl
970
1540
  max_sequence_length: 511
971
1541
  client_spec:
972
- class_name: "helm.proxy.clients.together_client.TogetherClient"
973
- args: {}
1542
+ class_name: "helm.clients.together_client.TogetherClient"
974
1543
  window_service_spec:
975
- class_name: "helm.benchmark.window_services.flan_t5_window_service.FlanT5WindowService"
976
- args: {}
1544
+ class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
977
1545
 
978
1546
  - name: together/ul2
979
- deprecated: true # Removed from together
1547
+ deprecated: true # Removed from Together
980
1548
  model_name: google/ul2
981
1549
  tokenizer_name: google/ul2
982
1550
  max_sequence_length: 511
983
1551
  client_spec:
984
- class_name: "helm.proxy.clients.together_client.TogetherClient"
985
- args: {}
1552
+ class_name: "helm.clients.together_client.TogetherClient"
986
1553
  window_service_spec:
987
- class_name: "helm.benchmark.window_services.ul2_window_service.UL2WindowService"
988
- args: {}
989
-
990
- ## HazyResearch
991
- - name: together/h3-2.7b
992
- deprecated: true # Not available on Together yet
993
- model_name: hazyresearch/h3-2.7b
994
- tokenizer_name: huggingface/gpt2
995
- max_sequence_length: 1024
996
- max_request_length: 1025
997
- client_spec:
998
- class_name: "helm.proxy.clients.together_client.TogetherClient"
999
- args: {}
1000
-
1001
- ## LMSYS
1002
- # TODO: might be deprecated. Needs to be checked.
1003
- # Together officialy supports vicuna 1.5, not sure if 1.3 is still supported.
1004
- - name: together/vicuna-7b-v1.3
1005
- model_name: lmsys/vicuna-7b-v1.3
1006
- tokenizer_name: hf-internal-testing/llama-tokenizer
1007
- max_sequence_length: 2048
1008
- client_spec:
1009
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1010
- args: {}
1011
-
1012
- - name: together/vicuna-13b-v1.3
1013
- model_name: lmsys/vicuna-13b-v1.3
1014
- tokenizer_name: hf-internal-testing/llama-tokenizer
1015
- max_sequence_length: 2048
1016
- client_spec:
1017
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1018
- args: {}
1554
+ class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
1019
1555
 
1020
1556
  ## Meta
1021
1557
  - name: together/llama-7b
1022
1558
  model_name: meta/llama-7b
1023
1559
  tokenizer_name: hf-internal-testing/llama-tokenizer
1024
- max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1560
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1025
1561
  client_spec:
1026
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1027
- args: {}
1562
+ class_name: "helm.clients.together_client.TogetherClient"
1563
+ args:
1564
+ together_model: huggyllama/llama-7b
1028
1565
 
1029
1566
  - name: together/llama-13b
1030
1567
  model_name: meta/llama-13b
1031
1568
  tokenizer_name: hf-internal-testing/llama-tokenizer
1032
- max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1569
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1033
1570
  client_spec:
1034
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1035
- args: {}
1571
+ class_name: "helm.clients.together_client.TogetherClient"
1572
+ args:
1573
+ together_model: huggyllama/llama-13b
1036
1574
 
1037
1575
  - name: together/llama-30b
1038
1576
  model_name: meta/llama-30b
1039
1577
  tokenizer_name: hf-internal-testing/llama-tokenizer
1040
- max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1578
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1041
1579
  client_spec:
1042
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1043
- args: {}
1580
+ class_name: "helm.clients.together_client.TogetherClient"
1581
+ args:
1582
+ together_model: huggyllama/llama-30b
1044
1583
 
1045
1584
  - name: together/llama-65b
1046
1585
  model_name: meta/llama-65b
1047
1586
  tokenizer_name: hf-internal-testing/llama-tokenizer
1048
- max_sequence_length: 2047 # Subtract 1 tokens to work around a off-by-one bug in Together's input validation token counting (#2080)
1587
+ max_sequence_length: 2047 # Subtract 1 tokens to work around a off-by-one bug in Together's input validation token counting (#2080)
1049
1588
  client_spec:
1050
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1051
- args: {}
1589
+ class_name: "helm.clients.together_client.TogetherClient"
1590
+ args:
1591
+ together_model: huggyllama/llama-65b
1052
1592
 
1053
1593
  - name: together/llama-2-7b
1054
1594
  model_name: meta/llama-2-7b
1055
1595
  tokenizer_name: meta-llama/Llama-2-7b-hf
1056
- max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
1596
+ max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
1057
1597
  client_spec:
1058
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1059
- args: {}
1598
+ class_name: "helm.clients.together_client.TogetherClient"
1599
+ args:
1600
+ together_model: togethercomputer/llama-2-7b
1060
1601
 
1061
1602
  - name: together/llama-2-13b
1062
1603
  model_name: meta/llama-2-13b
1063
1604
  tokenizer_name: meta-llama/Llama-2-7b-hf
1064
- max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
1605
+ max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
1065
1606
  client_spec:
1066
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1067
- args: {}
1607
+ class_name: "helm.clients.together_client.TogetherClient"
1608
+ args:
1609
+ together_model: togethercomputer/llama-2-13b
1068
1610
 
1069
1611
  - name: together/llama-2-70b
1070
1612
  model_name: meta/llama-2-70b
1071
1613
  tokenizer_name: meta-llama/Llama-2-7b-hf
1072
- max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
1614
+ max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
1073
1615
  client_spec:
1074
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1075
- args: {}
1616
+ class_name: "helm.clients.together_client.TogetherClient"
1617
+ args:
1618
+ together_model: togethercomputer/llama-2-70b
1076
1619
 
1077
- - name: together/opt-175b
1078
- deprecated: true # Not available on Together yet
1079
- model_name: meta/opt-175b
1080
- tokenizer_name: facebook/opt-66b
1081
- max_sequence_length: 2048
1082
- max_request_length: 2049
1620
+ - name: together/llama-3-8b
1621
+ model_name: meta/llama-3-8b
1622
+ tokenizer_name: meta/llama-3-8b
1623
+ max_sequence_length: 8191
1083
1624
  client_spec:
1084
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1085
- args: {}
1625
+ class_name: "helm.clients.together_client.TogetherClient"
1626
+ args:
1627
+ together_model: meta-llama/Meta-Llama-3-8B
1086
1628
 
1087
- - name: together/opt-66b
1088
- deprecated: true # Not available on Together yet
1089
- model_name: meta/opt-66b
1090
- tokenizer_name: facebook/opt-66b
1091
- max_sequence_length: 2048
1092
- max_request_length: 2049
1629
+ - name: together/llama-3-70b
1630
+ model_name: meta/llama-3-70b
1631
+ tokenizer_name: meta/llama-3-8b
1632
+ max_sequence_length: 8191
1093
1633
  client_spec:
1094
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1095
- args: {}
1634
+ class_name: "helm.clients.together_client.TogetherClient"
1635
+ args:
1636
+ together_model: meta-llama/Meta-Llama-3-70B
1096
1637
 
1097
- - name: together/opt-6.7b
1098
- deprecated: true # Not available on Together yet
1099
- model_name: meta/opt-6.7b
1100
- tokenizer_name: facebook/opt-66b
1101
- max_sequence_length: 2048
1102
- max_request_length: 2049
1638
+ - name: together/llama-3-8b-chat
1639
+ model_name: meta/llama-3-8b-chat
1640
+ tokenizer_name: meta/llama-3-8b
1641
+ max_sequence_length: 8191
1103
1642
  client_spec:
1104
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1105
- args: {}
1643
+ class_name: "helm.clients.together_client.TogetherClient"
1644
+ args:
1645
+ together_model: meta-llama/Meta-Llama-3-8B
1106
1646
 
1107
- - name: together/opt-1.3b
1108
- deprecated: true # Not available on Together yet
1109
- model_name: meta/opt-1.3b
1110
- tokenizer_name: facebook/opt-66b
1111
- max_sequence_length: 2048
1112
- max_request_length: 2049
1647
+ - name: together/llama-3-70b-chat
1648
+ model_name: meta/llama-3-70b-chat
1649
+ tokenizer_name: meta/llama-3-8b
1650
+ max_sequence_length: 8191
1113
1651
  client_spec:
1114
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1115
- args: {}
1652
+ class_name: "helm.clients.together_client.TogetherClient"
1653
+ args:
1654
+ together_model: meta-llama/Meta-Llama-3-70B
1116
1655
 
1117
1656
  # 01.AI
1118
1657
  - name: together/yi-6b
@@ -1120,93 +1659,109 @@ model_deployments:
1120
1659
  tokenizer_name: 01-ai/Yi-6B
1121
1660
  max_sequence_length: 4095
1122
1661
  client_spec:
1123
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1124
- args: {}
1662
+ class_name: "helm.clients.together_client.TogetherClient"
1663
+ args:
1664
+ together_model: zero-one-ai/Yi-6B
1125
1665
 
1126
1666
  - name: together/yi-34b
1127
1667
  model_name: 01-ai/yi-34b
1128
1668
  tokenizer_name: 01-ai/Yi-6B
1129
1669
  max_sequence_length: 4095
1130
1670
  client_spec:
1131
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1132
- args: {}
1671
+ class_name: "helm.clients.together_client.TogetherClient"
1672
+ args:
1673
+ together_model: zero-one-ai/Yi-34B
1674
+
1675
+ - name: together/yi-6b-chat
1676
+ model_name: 01-ai/yi-6b-chat
1677
+ tokenizer_name: 01-ai/Yi-6B
1678
+ max_sequence_length: 4095
1679
+ client_spec:
1680
+ class_name: "helm.clients.together_client.TogetherClient"
1681
+ args:
1682
+ together_model: zero-one-ai/Yi-6B
1683
+
1684
+ - name: together/yi-34b-chat
1685
+ model_name: 01-ai/yi-34b-chat
1686
+ tokenizer_name: 01-ai/Yi-6B
1687
+ max_sequence_length: 4095
1688
+ client_spec:
1689
+ class_name: "helm.clients.together_client.TogetherClient"
1690
+ args:
1691
+ together_model: zero-one-ai/Yi-34B
1692
+
1693
+
1694
+ # Allen Institute for AI
1695
+ - name: together/olmo-7b
1696
+ model_name: allenai/olmo-7b
1697
+ tokenizer_name: allenai/olmo-7b
1698
+ max_sequence_length: 2047
1699
+ client_spec:
1700
+ class_name: "helm.clients.together_client.TogetherClient"
1701
+
1702
+ - name: together/olmo-7b-twin-2t
1703
+ model_name: allenai/olmo-7b-twin-2t
1704
+ tokenizer_name: allenai/olmo-7b
1705
+ max_sequence_length: 2047
1706
+ client_spec:
1707
+ class_name: "helm.clients.together_client.TogetherClient"
1708
+
1709
+ - name: together/olmo-7b-instruct
1710
+ model_name: allenai/olmo-7b-instruct
1711
+ tokenizer_name: allenai/olmo-7b
1712
+ max_sequence_length: 2047
1713
+ client_spec:
1714
+ class_name: "helm.clients.together_client.TogetherClient"
1715
+
1133
1716
 
1134
1717
  ## MistralAI
1135
1718
  - name: together/mistral-7b-v0.1
1136
1719
  model_name: mistralai/mistral-7b-v0.1
1137
1720
  tokenizer_name: mistralai/Mistral-7B-v0.1
1138
- max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1721
+ max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1139
1722
  client_spec:
1140
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1141
- args: {}
1723
+ class_name: "helm.clients.together_client.TogetherClient"
1724
+ args:
1725
+ together_model: mistralai/Mistral-7B-v0.1
1142
1726
 
1143
1727
  - name: together/mixtral-8x7b-32kseqlen
1144
1728
  model_name: mistralai/mixtral-8x7b-32kseqlen
1145
1729
  tokenizer_name: mistralai/Mistral-7B-v0.1
1146
- max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1147
- client_spec:
1148
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1149
- args: {}
1150
-
1151
- ## MosaicML
1152
- - name: together/mpt-7b
1153
- deprecated: true # Not available on Together yet
1154
- model_name: mosaicml/mpt-7b
1155
- tokenizer_name: EleutherAI/gpt-neox-20b
1156
- max_sequence_length: 2048
1157
- max_request_length: 2049
1730
+ max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1158
1731
  client_spec:
1159
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1160
- args: {}
1732
+ class_name: "helm.clients.together_client.TogetherClient"
1733
+ args:
1734
+ together_model: mistralai/mixtral-8x7b-32kseqlen
1161
1735
 
1162
- - name: together/mpt-instruct-7b
1163
- deprecated: true # Not available on Together yet
1164
- model_name: mosaicml/mpt-instruct-7b
1165
- tokenizer_name: EleutherAI/gpt-neox-20b
1166
- max_sequence_length: 2048
1167
- max_request_length: 2049
1736
+ - name: together/mixtral-8x7b-instruct-v0.1
1737
+ model_name: mistralai/mixtral-8x7b-instruct-v0.1
1738
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1739
+ max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1168
1740
  client_spec:
1169
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1170
- args: {}
1741
+ class_name: "helm.clients.together_client.TogetherClient"
1171
1742
 
1172
- - name: together/mpt-30b
1173
- model_name: mosaicml/mpt-30b
1174
- tokenizer_name: EleutherAI/gpt-neox-20b
1175
- max_sequence_length: 2048
1176
- max_request_length: 2049
1743
+ - name: together/mixtral-8x22b
1744
+ model_name: mistralai/mixtral-8x22b
1745
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1746
+ max_sequence_length: 65535
1177
1747
  client_spec:
1178
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1179
- args: {}
1748
+ class_name: "helm.clients.together_client.TogetherClient"
1180
1749
 
1181
- - name: together/mpt-instruct-30b
1182
- model_name: mosaicml/mpt-instruct-30b
1183
- tokenizer_name: EleutherAI/gpt-neox-20b
1184
- max_sequence_length: 2048
1185
- max_request_length: 2049
1750
+ - name: together/mixtral-8x22b-instruct-v0.1
1751
+ model_name: mistralai/mixtral-8x22b-instruct-v0.1
1752
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1753
+ max_sequence_length: 65535
1186
1754
  client_spec:
1187
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1188
- args: {}
1755
+ class_name: "helm.clients.together_client.TogetherClient"
1189
1756
 
1190
- ## StabilityAI
1191
- - name: together/stablelm-base-alpha-3b
1192
- deprecated: true # Removed from together
1193
- model_name: stabilityai/stablelm-base-alpha-3b
1194
- tokenizer_name: EleutherAI/gpt-neox-20b
1195
- max_sequence_length: 4096
1196
- max_request_length: 4097
1197
- client_spec:
1198
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1199
- args: {}
1200
1757
 
1201
- - name: together/stablelm-base-alpha-7b
1202
- deprecated: true # Removed from together
1203
- model_name: stabilityai/stablelm-base-alpha-7b
1204
- tokenizer_name: EleutherAI/gpt-neox-20b
1205
- max_sequence_length: 4096
1206
- max_request_length: 4097
1758
+ ## Snowflake
1759
+ - name: together/snowflake-arctic-instruct
1760
+ model_name: snowflake/snowflake-arctic-instruct
1761
+ tokenizer_name: snowflake/snowflake-arctic-instruct
1762
+ max_sequence_length: 4000 # Lower than 4096 because of chat tokens
1207
1763
  client_spec:
1208
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1209
- args: {}
1764
+ class_name: "helm.clients.together_client.TogetherChatClient"
1210
1765
 
1211
1766
  ## Stanford
1212
1767
  - name: together/alpaca-7b
@@ -1214,41 +1769,46 @@ model_deployments:
1214
1769
  tokenizer_name: hf-internal-testing/llama-tokenizer
1215
1770
  max_sequence_length: 2048
1216
1771
  client_spec:
1217
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1218
- args: {}
1772
+ class_name: "helm.clients.together_client.TogetherClient"
1773
+ args:
1774
+ together_model: togethercomputer/alpaca-7b
1219
1775
 
1220
1776
  ## Tiiuae
1221
1777
  - name: together/falcon-7b
1222
1778
  model_name: tiiuae/falcon-7b
1223
1779
  tokenizer_name: tiiuae/falcon-7b
1224
- max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1780
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1225
1781
  client_spec:
1226
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1227
- args: {}
1782
+ class_name: "helm.clients.together_client.TogetherClient"
1783
+ args:
1784
+ together_model: togethercomputer/falcon-7b
1228
1785
 
1229
1786
  - name: together/falcon-7b-instruct
1230
1787
  model_name: tiiuae/falcon-7b-instruct
1231
1788
  tokenizer_name: tiiuae/falcon-7b
1232
- max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1789
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1233
1790
  client_spec:
1234
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1235
- args: {}
1791
+ class_name: "helm.clients.together_client.TogetherClient"
1792
+ args:
1793
+ together_model: togethercomputer/falcon-7b-instruct
1236
1794
 
1237
1795
  - name: together/falcon-40b
1238
1796
  model_name: tiiuae/falcon-40b
1239
1797
  tokenizer_name: tiiuae/falcon-7b
1240
- max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1798
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1241
1799
  client_spec:
1242
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1243
- args: {}
1800
+ class_name: "helm.clients.together_client.TogetherClient"
1801
+ args:
1802
+ together_model: togethercomputer/falcon-40b
1244
1803
 
1245
1804
  - name: together/falcon-40b-instruct
1246
1805
  model_name: tiiuae/falcon-40b-instruct
1247
1806
  tokenizer_name: tiiuae/falcon-7b
1248
- max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1807
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1249
1808
  client_spec:
1250
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1251
- args: {}
1809
+ class_name: "helm.clients.together_client.TogetherClient"
1810
+ args:
1811
+ together_model: togethercomputer/falcon-40b-instruct
1252
1812
 
1253
1813
  ## Together
1254
1814
  # These are models fine-tuned by Together (and not simply hosted by Together).
@@ -1258,8 +1818,9 @@ model_deployments:
1258
1818
  max_sequence_length: 2048
1259
1819
  max_request_length: 2049
1260
1820
  client_spec:
1261
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1262
- args: {}
1821
+ class_name: "helm.clients.together_client.TogetherClient"
1822
+ args:
1823
+ together_model: togethercomputer/GPT-JT-6B-v1
1263
1824
 
1264
1825
  - name: together/gpt-neoxt-chat-base-20b
1265
1826
  model_name: together/gpt-neoxt-chat-base-20b
@@ -1267,8 +1828,9 @@ model_deployments:
1267
1828
  max_sequence_length: 2048
1268
1829
  max_request_length: 2049
1269
1830
  client_spec:
1270
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1271
- args: {}
1831
+ class_name: "helm.clients.together_client.TogetherClient"
1832
+ args:
1833
+ together_model: togethercomputer/GPT-NeoXT-Chat-Base-20B
1272
1834
 
1273
1835
  - name: together/redpajama-incite-base-3b-v1
1274
1836
  model_name: together/redpajama-incite-base-3b-v1
@@ -1276,8 +1838,9 @@ model_deployments:
1276
1838
  max_sequence_length: 2048
1277
1839
  max_request_length: 2049
1278
1840
  client_spec:
1279
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1280
- args: {}
1841
+ class_name: "helm.clients.together_client.TogetherClient"
1842
+ args:
1843
+ together_model: togethercomputer/RedPajama-INCITE-Base-3B-v1
1281
1844
 
1282
1845
  - name: together/redpajama-incite-instruct-3b-v1
1283
1846
  model_name: together/redpajama-incite-instruct-3b-v1
@@ -1285,8 +1848,9 @@ model_deployments:
1285
1848
  max_sequence_length: 2048
1286
1849
  max_request_length: 2049
1287
1850
  client_spec:
1288
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1289
- args: {}
1851
+ class_name: "helm.clients.together_client.TogetherClient"
1852
+ args:
1853
+ together_model: togethercomputer/RedPajama-INCITE-Instruct-3B-v1
1290
1854
 
1291
1855
  - name: together/redpajama-incite-base-7b
1292
1856
  model_name: together/redpajama-incite-base-7b
@@ -1294,8 +1858,9 @@ model_deployments:
1294
1858
  max_sequence_length: 2048
1295
1859
  max_request_length: 2049
1296
1860
  client_spec:
1297
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1298
- args: {}
1861
+ class_name: "helm.clients.together_client.TogetherClient"
1862
+ args:
1863
+ together_model: togethercomputer/RedPajama-INCITE-7B-Base
1299
1864
 
1300
1865
  - name: together/redpajama-incite-instruct-7b
1301
1866
  model_name: together/redpajama-incite-instruct-7b
@@ -1303,38 +1868,42 @@ model_deployments:
1303
1868
  max_sequence_length: 2048
1304
1869
  max_request_length: 2049
1305
1870
  client_spec:
1306
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1307
- args: {}
1871
+ class_name: "helm.clients.together_client.TogetherClient"
1872
+ args:
1873
+ together_model: togethercomputer/RedPajama-INCITE-7B-Instruct
1308
1874
 
1309
1875
  ## Tsinghua
1310
1876
  - name: together/glm
1311
- deprecated: true # Not available on Together yet
1877
+ deprecated: true # Removed from Together
1312
1878
  model_name: tsinghua/glm
1313
1879
  tokenizer_name: TsinghuaKEG/ice
1314
1880
  max_sequence_length: 2048
1315
1881
  max_request_length: 2049
1316
1882
  client_spec:
1317
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1318
- args: {}
1883
+ class_name: "helm.clients.together_client.TogetherClient"
1319
1884
  window_service_spec:
1320
1885
  class_name: "helm.benchmark.window_services.ice_window_service.ICEWindowService"
1321
- args: {}
1886
+
1887
+ - name: thudm/cogview2
1888
+ model_name: thudm/cogview2
1889
+ tokenizer_name: openai/clip-vit-large-patch14
1890
+ max_sequence_length: 75
1891
+ client_spec:
1892
+ class_name: "helm.clients.image_generation.cogview2_client.CogView2Client"
1893
+ window_service_spec:
1894
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
1322
1895
 
1323
1896
  ## Yandex
1324
1897
  - name: together/yalm
1325
- deprecated: true # Not available on Together yet
1898
+ deprecated: true # Removed from Together
1326
1899
  model_name: yandex/yalm
1327
1900
  tokenizer_name: Yandex/yalm
1328
1901
  max_sequence_length: 2048
1329
1902
  max_request_length: 2049
1330
1903
  client_spec:
1331
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1332
- args: {}
1904
+ class_name: "helm.clients.together_client.TogetherClient"
1333
1905
  window_service_spec:
1334
1906
  class_name: "helm.benchmark.window_services.yalm_window_service.YaLMWindowService"
1335
- args: {}
1336
-
1337
-
1338
1907
 
1339
1908
  # Writer
1340
1909
  - name: writer/palmyra-base
@@ -1343,8 +1912,7 @@ model_deployments:
1343
1912
  max_sequence_length: 2048
1344
1913
  max_sequence_and_generated_tokens_length: 2048
1345
1914
  client_spec:
1346
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1347
- args: {}
1915
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1348
1916
 
1349
1917
  - name: writer/palmyra-large
1350
1918
  model_name: writer/palmyra-large
@@ -1352,8 +1920,7 @@ model_deployments:
1352
1920
  max_sequence_length: 2048
1353
1921
  max_sequence_and_generated_tokens_length: 2048
1354
1922
  client_spec:
1355
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1356
- args: {}
1923
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1357
1924
 
1358
1925
  - name: writer/palmyra-instruct-30
1359
1926
  model_name: writer/palmyra-instruct-30
@@ -1361,8 +1928,7 @@ model_deployments:
1361
1928
  max_sequence_length: 2048
1362
1929
  max_sequence_and_generated_tokens_length: 2048
1363
1930
  client_spec:
1364
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1365
- args: {}
1931
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1366
1932
 
1367
1933
  - name: writer/palmyra-e
1368
1934
  model_name: writer/palmyra-e
@@ -1370,8 +1936,7 @@ model_deployments:
1370
1936
  max_sequence_length: 2048
1371
1937
  max_sequence_and_generated_tokens_length: 2048
1372
1938
  client_spec:
1373
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1374
- args: {}
1939
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1375
1940
 
1376
1941
  - name: writer/silk-road
1377
1942
  model_name: writer/silk-road
@@ -1379,8 +1944,7 @@ model_deployments:
1379
1944
  max_sequence_length: 8192
1380
1945
  max_sequence_and_generated_tokens_length: 8192
1381
1946
  client_spec:
1382
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1383
- args: {}
1947
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1384
1948
 
1385
1949
  - name: writer/palmyra-x
1386
1950
  model_name: writer/palmyra-x
@@ -1388,8 +1952,7 @@ model_deployments:
1388
1952
  max_sequence_length: 8192
1389
1953
  max_sequence_and_generated_tokens_length: 8192
1390
1954
  client_spec:
1391
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1392
- args: {}
1955
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1393
1956
 
1394
1957
  - name: writer/palmyra-x-v2
1395
1958
  model_name: writer/palmyra-x-v2
@@ -1397,8 +1960,7 @@ model_deployments:
1397
1960
  max_sequence_length: 6000
1398
1961
  max_sequence_and_generated_tokens_length: 7024
1399
1962
  client_spec:
1400
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1401
- args: {}
1963
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1402
1964
 
1403
1965
  - name: writer/palmyra-x-v3
1404
1966
  model_name: writer/palmyra-x-v3
@@ -1406,8 +1968,7 @@ model_deployments:
1406
1968
  max_sequence_length: 6000
1407
1969
  max_sequence_and_generated_tokens_length: 7024
1408
1970
  client_spec:
1409
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1410
- args: {}
1971
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1411
1972
 
1412
1973
  - name: writer/palmyra-x-32k
1413
1974
  model_name: writer/palmyra-x-32k
@@ -1415,5 +1976,65 @@ model_deployments:
1415
1976
  max_sequence_length: 28000
1416
1977
  max_sequence_and_generated_tokens_length: 30048
1417
1978
  client_spec:
1418
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1419
- args: {}
1979
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1980
+
1981
+ # Qwen
1982
+
1983
+ - name: together/qwen-7b
1984
+ model_name: qwen/qwen-7b
1985
+ tokenizer_name: qwen/qwen-7b
1986
+ max_sequence_length: 8191
1987
+ client_spec:
1988
+ class_name: "helm.clients.together_client.TogetherClient"
1989
+ args:
1990
+ together_model: togethercomputer/Qwen-7B
1991
+
1992
+ - name: together/qwen1.5-7b
1993
+ model_name: qwen/qwen1.5-7b
1994
+ tokenizer_name: qwen/qwen1.5-7b
1995
+ max_sequence_length: 32767
1996
+ client_spec:
1997
+ class_name: "helm.clients.together_client.TogetherClient"
1998
+ args:
1999
+ together_model: Qwen/Qwen1.5-7B
2000
+
2001
+ - name: together/qwen1.5-14b
2002
+ model_name: qwen/qwen1.5-14b
2003
+ tokenizer_name: qwen/qwen1.5-7b
2004
+ max_sequence_length: 32767
2005
+ client_spec:
2006
+ class_name: "helm.clients.together_client.TogetherClient"
2007
+ args:
2008
+ together_model: Qwen/Qwen1.5-14B
2009
+
2010
+ - name: together/qwen1.5-32b
2011
+ model_name: qwen/qwen1.5-32b
2012
+ tokenizer_name: qwen/qwen1.5-7b
2013
+ max_sequence_length: 32767
2014
+ client_spec:
2015
+ class_name: "helm.clients.together_client.TogetherClient"
2016
+ args:
2017
+ together_model: Qwen/Qwen1.5-32B
2018
+
2019
+ - name: together/qwen1.5-72b
2020
+ model_name: qwen/qwen1.5-72b
2021
+ tokenizer_name: qwen/qwen1.5-7b
2022
+ max_sequence_length: 4095
2023
+ client_spec:
2024
+ class_name: "helm.clients.together_client.TogetherClient"
2025
+ args:
2026
+ together_model: Qwen/Qwen1.5-72B
2027
+
2028
+ - name: huggingface/qwen-vl
2029
+ model_name: qwen/qwen-vl
2030
+ tokenizer_name: qwen/qwen-vl
2031
+ max_sequence_length: 8191
2032
+ client_spec:
2033
+ class_name: "helm.clients.vision_language.qwen_vlm_client.QwenVLMClient"
2034
+
2035
+ - name: huggingface/qwen-vl-chat
2036
+ model_name: qwen/qwen-vl-chat
2037
+ tokenizer_name: qwen/qwen-vl-chat
2038
+ max_sequence_length: 8191
2039
+ client_spec:
2040
+ class_name: "helm.clients.vision_language.qwen_vlm_client.QwenVLMClient"