crfm-helm 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (482) hide show
  1. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/METADATA +134 -31
  2. crfm_helm-0.5.0.dist-info/RECORD +642 -0
  3. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +31 -3
  5. helm/benchmark/adaptation/adapters/adapter.py +2 -2
  6. helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
  7. helm/benchmark/adaptation/adapters/generation_adapter.py +1 -0
  8. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -4
  9. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +2 -3
  10. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
  11. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +2 -1
  12. helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
  13. helm/benchmark/adaptation/adapters/test_generation_adapter.py +32 -8
  14. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +7 -19
  15. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +60 -6
  16. helm/benchmark/adaptation/common_adapter_specs.py +376 -0
  17. helm/benchmark/adaptation/request_state.py +6 -1
  18. helm/benchmark/adaptation/scenario_state.py +6 -2
  19. helm/benchmark/annotation/annotator.py +43 -0
  20. helm/benchmark/annotation/annotator_factory.py +61 -0
  21. helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
  22. helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
  23. helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
  24. helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
  25. helm/benchmark/annotation/test_annotator_factory.py +26 -0
  26. helm/benchmark/annotation/test_dummy_annotator.py +44 -0
  27. helm/benchmark/annotation_executor.py +124 -0
  28. helm/benchmark/augmentations/data_augmenter.py +0 -2
  29. helm/benchmark/augmentations/gender_perturbation.py +1 -1
  30. helm/benchmark/augmentations/perturbation.py +8 -2
  31. helm/benchmark/augmentations/perturbation_description.py +1 -1
  32. helm/benchmark/augmentations/suffix_perturbation.py +29 -0
  33. helm/benchmark/augmentations/test_perturbation.py +11 -7
  34. helm/benchmark/augmentations/translate_perturbation.py +30 -0
  35. helm/benchmark/config_registry.py +7 -1
  36. helm/benchmark/executor.py +46 -16
  37. helm/benchmark/huggingface_registration.py +20 -7
  38. helm/benchmark/metrics/basic_metrics.py +169 -664
  39. helm/benchmark/metrics/bbq_metrics.py +3 -4
  40. helm/benchmark/metrics/bias_metrics.py +6 -6
  41. helm/benchmark/metrics/classification_metrics.py +11 -8
  42. helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
  43. helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
  44. helm/benchmark/metrics/code_metrics_helper.py +0 -2
  45. helm/benchmark/metrics/common_metric_specs.py +167 -0
  46. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
  47. helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
  48. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
  49. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
  50. helm/benchmark/metrics/disinformation_metrics.py +4 -110
  51. helm/benchmark/metrics/dry_run_metrics.py +2 -2
  52. helm/benchmark/metrics/efficiency_metrics.py +206 -0
  53. helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
  54. helm/benchmark/metrics/evaluate_reference_metrics.py +376 -0
  55. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
  56. helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
  57. helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
  58. helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
  59. helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
  60. helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
  61. helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
  62. helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
  63. helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
  64. helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
  65. helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
  66. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
  67. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
  68. helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
  69. helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
  70. helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
  71. helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
  72. helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
  73. helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
  74. helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
  75. helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
  76. helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
  77. helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
  78. helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
  79. helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
  80. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
  81. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
  82. helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
  83. helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
  84. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
  85. helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
  86. helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
  87. helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
  88. helm/benchmark/metrics/language_modeling_metrics.py +99 -0
  89. helm/benchmark/metrics/machine_translation_metrics.py +89 -0
  90. helm/benchmark/metrics/metric.py +93 -172
  91. helm/benchmark/metrics/metric_name.py +0 -1
  92. helm/benchmark/metrics/metric_service.py +16 -0
  93. helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
  94. helm/benchmark/metrics/ranking_metrics.py +2 -2
  95. helm/benchmark/metrics/reference_metric.py +148 -0
  96. helm/benchmark/metrics/summac/model_summac.py +0 -2
  97. helm/benchmark/metrics/summarization_metrics.py +2 -2
  98. helm/benchmark/metrics/test_classification_metrics.py +8 -5
  99. helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
  100. helm/benchmark/metrics/{test_basic_metrics.py → test_evaluate_reference_metrics.py} +5 -1
  101. helm/benchmark/metrics/test_metric.py +2 -2
  102. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +10 -2
  103. helm/benchmark/metrics/toxicity_metrics.py +1 -1
  104. helm/benchmark/metrics/toxicity_utils.py +23 -0
  105. helm/benchmark/metrics/unitxt_metrics.py +81 -0
  106. helm/benchmark/metrics/vision_language/__init__.py +0 -0
  107. helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
  108. helm/benchmark/metrics/vision_language/image_metrics.py +450 -0
  109. helm/benchmark/metrics/vision_language/image_utils.py +100 -0
  110. helm/benchmark/model_deployment_registry.py +74 -0
  111. helm/benchmark/model_metadata_registry.py +36 -0
  112. helm/benchmark/multi_gpu_runner.py +133 -0
  113. helm/benchmark/presentation/create_plots.py +8 -7
  114. helm/benchmark/presentation/run_display.py +26 -10
  115. helm/benchmark/presentation/schema.py +15 -40
  116. helm/benchmark/presentation/summarize.py +119 -79
  117. helm/benchmark/presentation/table.py +8 -8
  118. helm/benchmark/presentation/test_contamination.py +2 -2
  119. helm/benchmark/presentation/test_run_entry.py +1 -2
  120. helm/benchmark/presentation/test_summarize.py +3 -3
  121. helm/benchmark/run.py +54 -26
  122. helm/benchmark/run_expander.py +214 -16
  123. helm/benchmark/run_spec.py +93 -0
  124. helm/benchmark/run_spec_factory.py +162 -0
  125. helm/benchmark/run_specs/__init__.py +0 -0
  126. helm/benchmark/run_specs/classic_run_specs.py +1510 -0
  127. helm/benchmark/run_specs/cleva_run_specs.py +277 -0
  128. helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
  129. helm/benchmark/run_specs/heim_run_specs.py +623 -0
  130. helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
  131. helm/benchmark/run_specs/lite_run_specs.py +307 -0
  132. helm/benchmark/run_specs/simple_run_specs.py +104 -0
  133. helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
  134. helm/benchmark/run_specs/vlm_run_specs.py +501 -0
  135. helm/benchmark/runner.py +51 -57
  136. helm/benchmark/runner_config_registry.py +21 -0
  137. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  138. helm/benchmark/scenarios/bold_scenario.py +2 -2
  139. helm/benchmark/scenarios/code_scenario.py +1 -0
  140. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
  141. helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
  142. helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
  143. helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
  144. helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
  145. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
  146. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
  147. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
  148. helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
  149. helm/benchmark/scenarios/image_generation/__init__.py +0 -0
  150. helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
  151. helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
  152. helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
  153. helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
  154. helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
  155. helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
  156. helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
  157. helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
  158. helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
  159. helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
  160. helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
  161. helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
  162. helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
  163. helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
  164. helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
  165. helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
  166. helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
  167. helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
  168. helm/benchmark/scenarios/imdb_scenario.py +0 -1
  169. helm/benchmark/scenarios/live_qa_scenario.py +94 -0
  170. helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
  171. helm/benchmark/scenarios/math_scenario.py +19 -2
  172. helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
  173. helm/benchmark/scenarios/numeracy_scenario.py +1 -1
  174. helm/benchmark/scenarios/opinions_qa_scenario.py +0 -4
  175. helm/benchmark/scenarios/scenario.py +4 -0
  176. helm/benchmark/scenarios/simple_scenarios.py +122 -1
  177. helm/benchmark/scenarios/test_math_scenario.py +6 -0
  178. helm/benchmark/scenarios/test_scenario.py +6 -3
  179. helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
  180. helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
  181. helm/benchmark/scenarios/unitxt_scenario.py +56 -0
  182. helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
  183. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  184. helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
  185. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +92 -0
  186. helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
  187. helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
  188. helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
  189. helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
  190. helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
  191. helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
  192. helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
  193. helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
  194. helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
  195. helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
  196. helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
  197. helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
  198. helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
  199. helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
  200. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
  201. helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
  202. helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
  203. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
  204. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
  205. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -2
  206. helm/benchmark/scenarios/vision_language/vqa_scenario.py +1 -1
  207. helm/benchmark/scenarios/wmt_14_scenario.py +1 -1
  208. helm/benchmark/server.py +24 -1
  209. helm/benchmark/slurm_runner.py +70 -49
  210. helm/benchmark/static/benchmarking.js +1 -1
  211. helm/benchmark/static/schema_classic.yaml +258 -1066
  212. helm/benchmark/static/schema_instruction_following.yaml +210 -0
  213. helm/benchmark/static/schema_lite.yaml +2 -227
  214. helm/benchmark/static/schema_mmlu.yaml +1507 -0
  215. helm/benchmark/static/schema_unitxt.yaml +428 -0
  216. helm/benchmark/static/schema_vlm.yaml +576 -0
  217. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  218. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  219. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  220. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  221. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  222. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  223. helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
  224. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  225. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  226. helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
  227. helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
  228. helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
  229. helm/benchmark/static_build/assets/index-5088afcb.css +1 -0
  230. helm/benchmark/static_build/assets/index-d839df55.js +9 -0
  231. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  232. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  233. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  234. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  235. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  236. helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
  237. helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
  238. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  239. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  240. helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
  241. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  242. helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  243. helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  244. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  245. helm/benchmark/static_build/config.js +4 -0
  246. helm/benchmark/static_build/index.html +20 -0
  247. helm/benchmark/test_data_preprocessor.py +3 -3
  248. helm/benchmark/test_model_deployment_definition.py +14 -16
  249. helm/benchmark/test_run_expander.py +1 -1
  250. helm/benchmark/window_services/ai21_window_service.py +22 -33
  251. helm/benchmark/window_services/cohere_window_service.py +1 -63
  252. helm/benchmark/window_services/default_window_service.py +2 -44
  253. helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
  254. helm/benchmark/window_services/ice_window_service.py +0 -34
  255. helm/benchmark/window_services/image_generation/__init__.py +0 -0
  256. helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
  257. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
  258. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
  259. helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
  260. helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
  261. helm/benchmark/window_services/local_window_service.py +21 -4
  262. helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
  263. helm/benchmark/window_services/test_bloom_window_service.py +2 -1
  264. helm/benchmark/window_services/test_cohere_window_service.py +2 -1
  265. helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
  266. helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
  267. helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
  268. helm/benchmark/window_services/test_gptj_window_service.py +3 -2
  269. helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
  270. helm/benchmark/window_services/test_ice_window_service.py +2 -1
  271. helm/benchmark/window_services/test_openai_window_service.py +2 -1
  272. helm/benchmark/window_services/test_opt_window_service.py +3 -2
  273. helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
  274. helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
  275. helm/benchmark/window_services/test_t511b_window_service.py +2 -1
  276. helm/benchmark/window_services/test_ul2_window_service.py +2 -1
  277. helm/benchmark/window_services/test_utils.py +3 -2
  278. helm/benchmark/window_services/test_yalm_window_service.py +2 -1
  279. helm/benchmark/window_services/window_service.py +42 -0
  280. helm/benchmark/window_services/window_service_factory.py +4 -1
  281. helm/benchmark/window_services/yalm_window_service.py +0 -27
  282. helm/clients/__init__.py +0 -0
  283. helm/{proxy/clients → clients}/ai21_client.py +3 -9
  284. helm/clients/aleph_alpha_client.py +112 -0
  285. helm/{proxy/clients → clients}/anthropic_client.py +203 -18
  286. helm/{proxy/clients → clients}/auto_client.py +59 -31
  287. helm/clients/bedrock_client.py +128 -0
  288. helm/clients/bedrock_utils.py +72 -0
  289. helm/{proxy/clients → clients}/client.py +65 -7
  290. helm/clients/clip_score_client.py +49 -0
  291. helm/clients/clip_scorers/__init__.py +0 -0
  292. helm/clients/clip_scorers/base_clip_scorer.py +18 -0
  293. helm/clients/clip_scorers/clip_scorer.py +50 -0
  294. helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
  295. helm/{proxy/clients → clients}/cohere_client.py +4 -11
  296. helm/clients/gcs_client.py +82 -0
  297. helm/{proxy/clients → clients}/google_client.py +5 -5
  298. helm/clients/google_translate_client.py +35 -0
  299. helm/{proxy/clients → clients}/http_model_client.py +5 -7
  300. helm/{proxy/clients → clients}/huggingface_client.py +43 -64
  301. helm/clients/image_generation/__init__.py +0 -0
  302. helm/clients/image_generation/adobe_vision_client.py +78 -0
  303. helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
  304. helm/clients/image_generation/cogview2/__init__.py +0 -0
  305. helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
  306. helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
  307. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
  308. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
  309. helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
  310. helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
  311. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
  312. helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
  313. helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
  314. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
  315. helm/clients/image_generation/cogview2_client.py +191 -0
  316. helm/clients/image_generation/dalle2_client.py +192 -0
  317. helm/clients/image_generation/dalle3_client.py +108 -0
  318. helm/clients/image_generation/dalle_mini/__init__.py +3 -0
  319. helm/clients/image_generation/dalle_mini/data.py +442 -0
  320. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
  321. helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
  322. helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
  323. helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
  324. helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
  325. helm/clients/image_generation/dalle_mini/model/text.py +251 -0
  326. helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
  327. helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
  328. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
  329. helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
  330. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
  331. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
  332. helm/clients/image_generation/dalle_mini_client.py +190 -0
  333. helm/clients/image_generation/deep_floyd_client.py +78 -0
  334. helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
  335. helm/clients/image_generation/image_generation_client_utils.py +9 -0
  336. helm/clients/image_generation/lexica_client.py +86 -0
  337. helm/clients/image_generation/mindalle/__init__.py +0 -0
  338. helm/clients/image_generation/mindalle/models/__init__.py +216 -0
  339. helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
  340. helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
  341. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
  342. helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
  343. helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
  344. helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
  345. helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
  346. helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
  347. helm/clients/image_generation/mindalle/utils/config.py +129 -0
  348. helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
  349. helm/clients/image_generation/mindalle/utils/utils.py +89 -0
  350. helm/clients/image_generation/mindalle_client.py +115 -0
  351. helm/clients/image_generation/nudity_check_client.py +64 -0
  352. helm/clients/image_generation/together_image_generation_client.py +111 -0
  353. helm/{proxy/clients → clients}/lit_gpt_client.py +4 -4
  354. helm/{proxy/clients → clients}/megatron_client.py +5 -5
  355. helm/clients/mistral_client.py +134 -0
  356. helm/clients/moderation_api_client.py +109 -0
  357. helm/clients/open_lm_client.py +43 -0
  358. helm/clients/openai_client.py +302 -0
  359. helm/{proxy/clients → clients}/palmyra_client.py +6 -8
  360. helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
  361. helm/clients/simple_client.py +64 -0
  362. helm/{proxy/clients → clients}/test_auto_client.py +13 -15
  363. helm/clients/test_client.py +100 -0
  364. helm/{proxy/clients → clients}/test_huggingface_client.py +15 -16
  365. helm/clients/test_simple_client.py +19 -0
  366. helm/{proxy/clients → clients}/test_together_client.py +20 -8
  367. helm/{proxy/clients → clients}/together_client.py +12 -72
  368. helm/clients/vertexai_client.py +391 -0
  369. helm/clients/vision_language/__init__.py +0 -0
  370. helm/clients/vision_language/huggingface_vlm_client.py +104 -0
  371. helm/{proxy/clients → clients}/vision_language/idefics_client.py +53 -48
  372. helm/clients/vision_language/open_flamingo/__init__.py +2 -0
  373. helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
  374. helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
  375. helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
  376. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
  377. helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
  378. helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
  379. helm/clients/vision_language/open_flamingo_client.py +155 -0
  380. helm/clients/vision_language/qwen_vlm_client.py +171 -0
  381. helm/clients/vllm_client.py +46 -0
  382. helm/common/cache.py +16 -4
  383. helm/common/cache_backend_config.py +47 -0
  384. helm/common/clip_score_request.py +41 -0
  385. helm/common/file_caches/__init__.py +0 -0
  386. helm/common/file_caches/file_cache.py +16 -0
  387. helm/common/file_caches/local_file_cache.py +61 -0
  388. helm/common/file_caches/test_local_file_cache.py +25 -0
  389. helm/common/file_upload_request.py +27 -0
  390. helm/common/general.py +1 -1
  391. helm/common/image_generation_parameters.py +25 -0
  392. helm/common/images_utils.py +24 -1
  393. helm/common/key_value_store.py +35 -4
  394. helm/common/media_object.py +13 -0
  395. helm/common/moderations_api_request.py +71 -0
  396. helm/common/mongo_key_value_store.py +3 -3
  397. helm/common/multimodal_request_utils.py +31 -0
  398. helm/common/nudity_check_request.py +29 -0
  399. helm/common/request.py +15 -17
  400. helm/common/test_general.py +6 -0
  401. helm/common/tokenization_request.py +1 -1
  402. helm/config/model_deployments.yaml +1069 -546
  403. helm/config/model_metadata.yaml +753 -31
  404. helm/config/tokenizer_configs.yaml +142 -43
  405. helm/proxy/accounts.py +31 -4
  406. helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
  407. helm/proxy/critique/model_critique_client.py +8 -6
  408. helm/proxy/example_queries.py +29 -17
  409. helm/proxy/server.py +70 -5
  410. helm/proxy/services/remote_service.py +31 -0
  411. helm/proxy/services/server_service.py +96 -16
  412. helm/proxy/services/service.py +30 -0
  413. helm/proxy/services/test_remote_service.py +4 -3
  414. helm/proxy/services/test_service.py +0 -12
  415. helm/proxy/test_accounts.py +32 -0
  416. helm/proxy/token_counters/auto_token_counter.py +37 -37
  417. helm/proxy/token_counters/test_auto_token_counter.py +164 -0
  418. helm/proxy/token_counters/token_counter.py +3 -5
  419. helm/tokenizers/__init__.py +0 -0
  420. helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
  421. helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +1 -1
  422. helm/{proxy/tokenizers → tokenizers}/auto_tokenizer.py +6 -9
  423. helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
  424. helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
  425. helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +7 -26
  426. helm/tokenizers/simple_tokenizer.py +33 -0
  427. helm/{proxy/tokenizers → tokenizers}/test_anthropic_tokenizer.py +1 -1
  428. helm/{proxy/tokenizers → tokenizers}/test_huggingface_tokenizer.py +3 -0
  429. helm/tokenizers/test_simple_tokenizer.py +33 -0
  430. helm/{proxy/tokenizers → tokenizers}/vertexai_tokenizer.py +1 -1
  431. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
  432. helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
  433. helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
  434. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
  435. crfm_helm-0.4.0.dist-info/RECORD +0 -397
  436. helm/benchmark/run_specs.py +0 -2762
  437. helm/benchmark/test_model_properties.py +0 -1570
  438. helm/benchmark/vlm_run_specs.py +0 -97
  439. helm/benchmark/window_services/flan_t5_window_service.py +0 -29
  440. helm/benchmark/window_services/gpt2_window_service.py +0 -32
  441. helm/benchmark/window_services/huggingface_window_service.py +0 -60
  442. helm/benchmark/window_services/t0pp_window_service.py +0 -35
  443. helm/benchmark/window_services/t511b_window_service.py +0 -30
  444. helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
  445. helm/benchmark/window_services/ul2_window_service.py +0 -30
  446. helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
  447. helm/common/cache_utils.py +0 -14
  448. helm/proxy/clients/aleph_alpha_client.py +0 -95
  449. helm/proxy/clients/goose_ai_client.py +0 -99
  450. helm/proxy/clients/microsoft_client.py +0 -180
  451. helm/proxy/clients/openai_client.py +0 -206
  452. helm/proxy/clients/simple_client.py +0 -60
  453. helm/proxy/clients/test_client.py +0 -49
  454. helm/proxy/clients/vertexai_client.py +0 -115
  455. helm/proxy/token_counters/ai21_token_counter.py +0 -20
  456. helm/proxy/token_counters/cohere_token_counter.py +0 -13
  457. helm/proxy/token_counters/free_token_counter.py +0 -12
  458. helm/proxy/token_counters/gooseai_token_counter.py +0 -24
  459. helm/proxy/token_counters/openai_token_counter.py +0 -22
  460. helm/proxy/token_counters/test_ai21_token_counter.py +0 -88
  461. helm/proxy/token_counters/test_openai_token_counter.py +0 -81
  462. helm/proxy/tokenizers/simple_tokenizer.py +0 -32
  463. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/LICENSE +0 -0
  464. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/entry_points.txt +0 -0
  465. {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/top_level.txt +0 -0
  466. /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
  467. /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
  468. /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
  469. /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
  470. /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
  471. /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
  472. /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
  473. /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
  474. /helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +0 -0
  475. /helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +0 -0
  476. /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
  477. /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
  478. /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
  479. /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
  480. /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
  481. /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
  482. /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
@@ -8,16 +8,23 @@
8
8
  # # This file defines all the model deployments that you do not want to be public.
9
9
  # model_deployments: [] # Leave empty to disable private model deployments
10
10
 
11
-
12
11
  model_deployments:
13
-
14
12
  - name: simple/model1
15
13
  model_name: simple/model1
16
- tokenizer_name: simple/model1
14
+ tokenizer_name: simple/tokenizer1
17
15
  max_sequence_length: 2048
18
16
  client_spec:
19
- class_name: "helm.proxy.clients.simple_client.SimpleClient"
20
- args: {}
17
+ class_name: "helm.clients.simple_client.SimpleClient"
18
+
19
+ # Adobe
20
+ - name: adobe/giga-gan
21
+ model_name: adobe/giga-gan
22
+ tokenizer_name: openai/clip-vit-large-patch14
23
+ max_sequence_length: 75
24
+ client_spec:
25
+ class_name: "helm.clients.image_generation.adobe_vision_client.AdobeVisionClient"
26
+ window_service_spec:
27
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
21
28
 
22
29
  # AI21 Labs
23
30
 
@@ -29,14 +36,9 @@ model_deployments:
29
36
  tokenizer_name: ai21/j1
30
37
  max_sequence_length: 2047
31
38
  client_spec:
32
- class_name: "helm.proxy.clients.ai21_client.AI21Client"
33
- args: {}
39
+ class_name: "helm.clients.ai21_client.AI21Client"
34
40
  window_service_spec:
35
41
  class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
36
- args:
37
- gpt2_window_service:
38
- class_name: "helm.benchmark.window_services.gpt2_window_service.GPT2WindowService"
39
- args: {}
40
42
 
41
43
  - name: ai21/j1-large
42
44
  deprecated: true
@@ -44,14 +46,9 @@ model_deployments:
44
46
  tokenizer_name: ai21/j1
45
47
  max_sequence_length: 2047
46
48
  client_spec:
47
- class_name: "helm.proxy.clients.ai21_client.AI21Client"
48
- args: {}
49
+ class_name: "helm.clients.ai21_client.AI21Client"
49
50
  window_service_spec:
50
51
  class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
51
- args:
52
- gpt2_window_service:
53
- class_name: "helm.benchmark.window_services.gpt2_window_service.GPT2WindowService"
54
- args: {}
55
52
 
56
53
  - name: ai21/j1-grande
57
54
  deprecated: true
@@ -59,14 +56,9 @@ model_deployments:
59
56
  tokenizer_name: ai21/j1
60
57
  max_sequence_length: 2047
61
58
  client_spec:
62
- class_name: "helm.proxy.clients.ai21_client.AI21Client"
63
- args: {}
59
+ class_name: "helm.clients.ai21_client.AI21Client"
64
60
  window_service_spec:
65
61
  class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
66
- args:
67
- gpt2_window_service:
68
- class_name: "helm.benchmark.window_services.gpt2_window_service.GPT2WindowService"
69
- args: {}
70
62
 
71
63
  - name: ai21/j1-grande-v2-beta
72
64
  deprecated: true
@@ -74,58 +66,36 @@ model_deployments:
74
66
  tokenizer_name: ai21/j1
75
67
  max_sequence_length: 2047
76
68
  client_spec:
77
- class_name: "helm.proxy.clients.ai21_client.AI21Client"
78
- args: {}
69
+ class_name: "helm.clients.ai21_client.AI21Client"
79
70
  window_service_spec:
80
71
  class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
81
- args:
82
- gpt2_window_service:
83
- class_name: "helm.benchmark.window_services.gpt2_window_service.GPT2WindowService"
84
- args: {}
85
72
 
86
73
  - name: ai21/j2-jumbo
87
74
  model_name: ai21/j2-jumbo
88
75
  tokenizer_name: ai21/j1
89
76
  max_sequence_length: 6000
90
77
  client_spec:
91
- class_name: "helm.proxy.clients.ai21_client.AI21Client"
92
- args: {}
78
+ class_name: "helm.clients.ai21_client.AI21Client"
93
79
  window_service_spec:
94
- class_name: "helm.benchmark.window_services.wider_ai21_window_service.AI21Jurassic2JumboWindowService"
95
- args:
96
- gpt2_window_service:
97
- class_name: "helm.benchmark.window_services.gpt2_window_service.GPT2WindowService"
98
- args: {}
80
+ class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
99
81
 
100
82
  - name: ai21/j2-large
101
83
  model_name: ai21/j2-large
102
84
  tokenizer_name: ai21/j1
103
85
  max_sequence_length: 2047
104
86
  client_spec:
105
- class_name: "helm.proxy.clients.ai21_client.AI21Client"
106
- args: {}
87
+ class_name: "helm.clients.ai21_client.AI21Client"
107
88
  window_service_spec:
108
89
  class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
109
- args:
110
- gpt2_window_service:
111
- class_name: "helm.benchmark.window_services.gpt2_window_service.GPT2WindowService"
112
- args: {}
113
90
 
114
91
  - name: ai21/j2-grande
115
92
  model_name: ai21/j2-grande
116
93
  tokenizer_name: ai21/j1
117
94
  max_sequence_length: 2047
118
95
  client_spec:
119
- class_name: "helm.proxy.clients.ai21_client.AI21Client"
120
- args: {}
96
+ class_name: "helm.clients.ai21_client.AI21Client"
121
97
  window_service_spec:
122
98
  class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
123
- args:
124
- gpt2_window_service:
125
- class_name: "helm.benchmark.window_services.gpt2_window_service.GPT2WindowService"
126
- args: {}
127
-
128
-
129
99
 
130
100
  # Aleph Alpha
131
101
  - name: AlephAlpha/luminous-base
@@ -133,29 +103,57 @@ model_deployments:
133
103
  tokenizer_name: AlephAlpha/luminous-base
134
104
  max_sequence_length: 2048
135
105
  client_spec:
136
- class_name: "helm.proxy.clients.aleph_alpha_client.AlephAlphaClient"
137
- args: {}
106
+ class_name: "helm.clients.aleph_alpha_client.AlephAlphaClient"
138
107
 
139
108
  - name: AlephAlpha/luminous-extended
140
109
  model_name: AlephAlpha/luminous-extended
141
110
  tokenizer_name: AlephAlpha/luminous-extended
142
111
  max_sequence_length: 2048
143
112
  client_spec:
144
- class_name: "helm.proxy.clients.aleph_alpha_client.AlephAlphaClient"
145
- args: {}
113
+ class_name: "helm.clients.aleph_alpha_client.AlephAlphaClient"
146
114
 
147
115
  - name: AlephAlpha/luminous-supreme
148
116
  model_name: AlephAlpha/luminous-supreme
149
117
  tokenizer_name: AlephAlpha/luminous-supreme
150
118
  max_sequence_length: 2048
151
119
  client_spec:
152
- class_name: "helm.proxy.clients.aleph_alpha_client.AlephAlphaClient"
153
- args: {}
120
+ class_name: "helm.clients.aleph_alpha_client.AlephAlphaClient"
154
121
 
155
- # TODO: Add luminous-world once it is released.
122
+ # TODO: Add luminous-world once it is released
123
+
124
+ - name: AlephAlpha/m-vader
125
+ model_name: AlephAlpha/m-vader
126
+ tokenizer_name: openai/clip-vit-large-patch14
127
+ max_sequence_length: 75
128
+ client_spec:
129
+ class_name: "helm.clients.image_generation.aleph_alpha_image_generation_client.AlephAlphaImageGenerationClient"
130
+ window_service_spec:
131
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
132
+
133
+
134
+ # Amazon
135
+ - name: amazon/titan-text-lite-v1
136
+ model_name: amazon/titan-text-lite-v1
137
+ tokenizer_name: huggingface/gpt2
138
+ max_sequence_length: 4000
139
+ client_spec:
140
+ class_name: "helm.clients.bedrock_client.BedrockTitanClient"
141
+
142
+ - name: amazon/titan-tg1-large
143
+ model_name: amazon/titan-tg1-large
144
+ tokenizer_name: huggingface/gpt2
145
+ max_sequence_length: 8000
146
+ client_spec:
147
+ class_name: "helm.clients.bedrock_client.BedrockTitanClient"
148
+
149
+ - name: amazon/titan-text-express-v1
150
+ model_name: amazon/titan-text-express-v1
151
+ tokenizer_name: huggingface/gpt2
152
+ max_sequence_length: 8000
153
+ client_spec:
154
+ class_name: "helm.clients.bedrock_client.BedrockTitanClient"
156
155
 
157
156
 
158
-
159
157
  # Anthropic
160
158
  - name: anthropic/claude-v1.3
161
159
  model_name: anthropic/claude-v1.3
@@ -163,8 +161,7 @@ model_deployments:
163
161
  max_sequence_length: 8000
164
162
  max_sequence_and_generated_tokens_length: 9016
165
163
  client_spec:
166
- class_name: "helm.proxy.clients.anthropic_client.AnthropicClient"
167
- args: {}
164
+ class_name: "helm.clients.anthropic_client.AnthropicClient"
168
165
 
169
166
  - name: anthropic/claude-instant-v1
170
167
  model_name: anthropic/claude-instant-v1
@@ -172,8 +169,7 @@ model_deployments:
172
169
  max_sequence_length: 8000
173
170
  max_sequence_and_generated_tokens_length: 9016
174
171
  client_spec:
175
- class_name: "helm.proxy.clients.anthropic_client.AnthropicClient"
176
- args: {}
172
+ class_name: "helm.clients.anthropic_client.AnthropicClient"
177
173
 
178
174
  - name: anthropic/claude-instant-1.2
179
175
  model_name: anthropic/claude-instant-1.2
@@ -181,8 +177,7 @@ model_deployments:
181
177
  max_sequence_length: 8000
182
178
  max_sequence_and_generated_tokens_length: 9016
183
179
  client_spec:
184
- class_name: "helm.proxy.clients.anthropic_client.AnthropicClient"
185
- args: {}
180
+ class_name: "helm.clients.anthropic_client.AnthropicClient"
186
181
 
187
182
  - name: anthropic/claude-2.0
188
183
  model_name: anthropic/claude-2.0
@@ -190,8 +185,7 @@ model_deployments:
190
185
  max_sequence_length: 8000
191
186
  max_sequence_and_generated_tokens_length: 9016
192
187
  client_spec:
193
- class_name: "helm.proxy.clients.anthropic_client.AnthropicClient"
194
- args: {}
188
+ class_name: "helm.clients.anthropic_client.AnthropicClient"
195
189
 
196
190
  - name: anthropic/claude-2.1
197
191
  model_name: anthropic/claude-2.1
@@ -199,8 +193,28 @@ model_deployments:
199
193
  max_sequence_length: 8000
200
194
  max_sequence_and_generated_tokens_length: 9016
201
195
  client_spec:
202
- class_name: "helm.proxy.clients.anthropic_client.AnthropicClient"
203
- args: {}
196
+ class_name: "helm.clients.anthropic_client.AnthropicClient"
197
+
198
+ - name: anthropic/claude-3-sonnet-20240229
199
+ model_name: anthropic/claude-3-sonnet-20240229
200
+ tokenizer_name: anthropic/claude
201
+ max_sequence_length: 200000
202
+ client_spec:
203
+ class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
204
+
205
+ - name: anthropic/claude-3-haiku-20240307
206
+ model_name: anthropic/claude-3-haiku-20240307
207
+ tokenizer_name: anthropic/claude
208
+ max_sequence_length: 200000
209
+ client_spec:
210
+ class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
211
+
212
+ - name: anthropic/claude-3-opus-20240229
213
+ model_name: anthropic/claude-3-opus-20240229
214
+ tokenizer_name: anthropic/claude
215
+ max_sequence_length: 200000
216
+ client_spec:
217
+ class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
204
218
 
205
219
  - name: anthropic/stanford-online-all-v4-s3
206
220
  deprecated: true # Closed model, not accessible via API
@@ -208,8 +222,7 @@ model_deployments:
208
222
  tokenizer_name: huggingface/gpt2
209
223
  max_sequence_length: 8192
210
224
  client_spec:
211
- class_name: "helm.proxy.clients.anthropic_client.AnthropicLegacyClient"
212
- args: {}
225
+ class_name: "helm.clients.anthropic_client.AnthropicLegacyClient"
213
226
 
214
227
  # Cohere
215
228
  - name: cohere/xlarge-20220609
@@ -218,11 +231,9 @@ model_deployments:
218
231
  max_sequence_length: 2047
219
232
  max_request_length: 2048
220
233
  client_spec:
221
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
222
- args: {}
234
+ class_name: "helm.clients.cohere_client.CohereClient"
223
235
  window_service_spec:
224
236
  class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
225
- args: {}
226
237
 
227
238
  - name: cohere/large-20220720
228
239
  model_name: cohere/large-20220720
@@ -230,11 +241,9 @@ model_deployments:
230
241
  max_sequence_length: 2047
231
242
  max_request_length: 2048
232
243
  client_spec:
233
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
234
- args: {}
244
+ class_name: "helm.clients.cohere_client.CohereClient"
235
245
  window_service_spec:
236
246
  class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
237
- args: {}
238
247
 
239
248
  - name: cohere/medium-20220720
240
249
  model_name: cohere/medium-20220720
@@ -242,11 +251,9 @@ model_deployments:
242
251
  max_sequence_length: 2047
243
252
  max_request_length: 2048
244
253
  client_spec:
245
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
246
- args: {}
254
+ class_name: "helm.clients.cohere_client.CohereClient"
247
255
  window_service_spec:
248
256
  class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
249
- args: {}
250
257
 
251
258
  - name: cohere/small-20220720
252
259
  model_name: cohere/small-20220720
@@ -254,11 +261,9 @@ model_deployments:
254
261
  max_sequence_length: 2047
255
262
  max_request_length: 2048
256
263
  client_spec:
257
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
258
- args: {}
264
+ class_name: "helm.clients.cohere_client.CohereClient"
259
265
  window_service_spec:
260
266
  class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
261
- args: {}
262
267
 
263
268
  - name: cohere/xlarge-20221108
264
269
  model_name: cohere/xlarge-20221108
@@ -266,11 +271,9 @@ model_deployments:
266
271
  max_sequence_length: 2047
267
272
  max_request_length: 2048
268
273
  client_spec:
269
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
270
- args: {}
274
+ class_name: "helm.clients.cohere_client.CohereClient"
271
275
  window_service_spec:
272
276
  class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
273
- args: {}
274
277
 
275
278
  - name: cohere/medium-20221108
276
279
  model_name: cohere/medium-20221108
@@ -278,11 +281,9 @@ model_deployments:
278
281
  max_sequence_length: 2047
279
282
  max_request_length: 2048
280
283
  client_spec:
281
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
282
- args: {}
284
+ class_name: "helm.clients.cohere_client.CohereClient"
283
285
  window_service_spec:
284
286
  class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
285
- args: {}
286
287
 
287
288
  - name: cohere/command-medium-beta
288
289
  model_name: cohere/command-medium-beta
@@ -290,11 +291,9 @@ model_deployments:
290
291
  max_sequence_length: 2019
291
292
  max_request_length: 2020
292
293
  client_spec:
293
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
294
- args: {}
294
+ class_name: "helm.clients.cohere_client.CohereClient"
295
295
  window_service_spec:
296
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereCommandWindowService"
297
- args: {}
296
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
298
297
 
299
298
  - name: cohere/command-xlarge-beta
300
299
  model_name: cohere/command-xlarge-beta
@@ -302,11 +301,9 @@ model_deployments:
302
301
  max_sequence_length: 2019
303
302
  max_request_length: 2020
304
303
  client_spec:
305
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
306
- args: {}
304
+ class_name: "helm.clients.cohere_client.CohereClient"
307
305
  window_service_spec:
308
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereCommandWindowService"
309
- args: {}
306
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
310
307
 
311
308
  - name: cohere/command
312
309
  model_name: cohere/command
@@ -314,11 +311,9 @@ model_deployments:
314
311
  max_sequence_length: 2019 # TODO: verify this
315
312
  max_request_length: 2020 # TODO: verify this
316
313
  client_spec:
317
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
318
- args: {}
314
+ class_name: "helm.clients.cohere_client.CohereClient"
319
315
  window_service_spec:
320
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereCommandWindowService"
321
- args: {}
316
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
322
317
 
323
318
  - name: cohere/command-light
324
319
  model_name: cohere/command-light
@@ -326,38 +321,171 @@ model_deployments:
326
321
  max_sequence_length: 2019 # TODO: verify this
327
322
  max_request_length: 2020 # TODO: verify this
328
323
  client_spec:
329
- class_name: "helm.proxy.clients.cohere_client.CohereClient"
330
- args: {}
324
+ class_name: "helm.clients.cohere_client.CohereClient"
325
+ window_service_spec:
326
+ class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
327
+
328
+ # Craiyon
329
+
330
+ - name: craiyon/dalle-mini
331
+ model_name: craiyon/dalle-mini
332
+ tokenizer_name: openai/clip-vit-large-patch14
333
+ max_sequence_length: 75
334
+ client_spec:
335
+ class_name: "helm.clients.image_generation.dalle_mini_client.DALLEMiniClient"
336
+ window_service_spec:
337
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
338
+
339
+ - name: craiyon/dalle-mega
340
+ model_name: craiyon/dalle-mega
341
+ tokenizer_name: openai/clip-vit-large-patch14
342
+ max_sequence_length: 75
343
+ client_spec:
344
+ class_name: "helm.clients.image_generation.dalle_mini_client.DALLEMiniClient"
331
345
  window_service_spec:
332
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereCommandWindowService"
333
- args: {}
346
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
347
+
348
+ # Databricks
349
+
350
+ - name: together/dbrx-instruct
351
+ model_name: databricks/dbrx-instruct
352
+ tokenizer_name: databricks/dbrx-instruct
353
+ max_sequence_length: 32767
354
+ client_spec:
355
+ class_name: "helm.clients.together_client.TogetherClient"
334
356
 
357
+ # DeepFloyd
335
358
 
359
+ - name: DeepFloyd/IF-I-M-v1.0
360
+ model_name: DeepFloyd/IF-I-M-v1.0
361
+ tokenizer_name: openai/clip-vit-large-patch14
362
+ max_sequence_length: 75
363
+ client_spec:
364
+ class_name: "helm.clients.image_generation.deep_floyd_client.DeepFloydClient"
365
+ window_service_spec:
366
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
367
+
368
+ - name: DeepFloyd/IF-I-L-v1.0
369
+ model_name: DeepFloyd/IF-I-L-v1.0
370
+ tokenizer_name: openai/clip-vit-large-patch14
371
+ max_sequence_length: 75
372
+ client_spec:
373
+ class_name: "helm.clients.image_generation.deep_floyd_client.DeepFloydClient"
374
+ window_service_spec:
375
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
376
+
377
+ - name: DeepFloyd/IF-I-XL-v1.0
378
+ model_name: DeepFloyd/IF-I-XL-v1.0
379
+ tokenizer_name: openai/clip-vit-large-patch14
380
+ max_sequence_length: 75
381
+ client_spec:
382
+ class_name: "helm.clients.image_generation.deep_floyd_client.DeepFloydClient"
383
+ window_service_spec:
384
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
385
+
386
+ # Deepseek
387
+
388
+ - name: together/deepseek-llm-67b-chat
389
+ model_name: deepseek-ai/deepseek-llm-67b-chat
390
+ tokenizer_name: deepseek-ai/deepseek-llm-67b-chat
391
+ max_sequence_length: 4095
392
+ client_spec:
393
+ class_name: "helm.clients.together_client.TogetherClient"
336
394
 
337
395
  # Gooseai
338
396
 
397
+ # TODO: Migrate these models to use OpenAIClient
398
+
339
399
  ## EleutherAI
340
- - name: gooseai/gpt-neo-20b
341
- model_name: eleutherai/gpt-neox-20b
342
- tokenizer_name: EleutherAI/gpt-neox-20b
343
- max_sequence_length: 2048
344
- max_request_length: 2049
400
+ # - name: gooseai/gpt-neo-20b
401
+ # model_name: eleutherai/gpt-neox-20b
402
+ # tokenizer_name: EleutherAI/gpt-neox-20b
403
+ # max_sequence_length: 2048
404
+ # max_request_length: 2049
405
+ # client_spec:
406
+ # class_name: "helm.clients.goose_ai_client.GooseAIClient"
407
+
408
+ # - name: gooseai/gpt-j-6b
409
+ # model_name: eleutherai/gpt-j-6b
410
+ # tokenizer_name: EleutherAI/gpt-j-6B
411
+ # max_sequence_length: 2048
412
+ # max_request_length: 2049
413
+ # client_spec:
414
+ # class_name: "helm.clients.goose_ai_client.GooseAIClient"
415
+
416
+ # Google
417
+ # See: https://cloud.google.com/vertex-ai/docs/generative-ai/learn/model-versioning
418
+
419
+ ## Gemini
420
+ # See: https://ai.google.dev/models/gemini#model_variations
421
+ - name: google/gemini-pro
422
+ model_name: google/gemini-pro
423
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
424
+ max_sequence_length: 30720
425
+ max_sequence_and_generated_tokens_length: 32768 # Officially max_sequence_length + 2048
345
426
  client_spec:
346
- class_name: "helm.proxy.clients.goose_ai_client.GooseAIClient"
347
- args: {}
427
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
348
428
 
349
- - name: gooseai/gpt-j-6b
350
- model_name: eleutherai/gpt-j-6b
351
- tokenizer_name: EleutherAI/gpt-j-6B
352
- max_sequence_length: 2048
353
- max_request_length: 2049
429
+ - name: google/gemini-1.0-pro-001
430
+ model_name: google/gemini-1.0-pro-001
431
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
432
+ max_sequence_length: 30720
433
+ max_sequence_and_generated_tokens_length: 32768 # Officially max_sequence_length + 2048
354
434
  client_spec:
355
- class_name: "helm.proxy.clients.goose_ai_client.GooseAIClient"
356
- args: {}
435
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
357
436
 
437
+ - name: google/gemini-pro-vision
438
+ model_name: google/gemini-pro-vision
439
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
440
+ max_sequence_length: 12288
441
+ max_sequence_and_generated_tokens_length: 16384 # Officially max_sequence_length + 4096, in practice max_output_tokens <= 2048 for vision models
442
+ client_spec:
443
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
358
444
 
445
+ - name: google/gemini-1.0-pro-vision-001
446
+ model_name: google/gemini-1.0-pro-vision-001
447
+ tokenizer_name: hf-internal-testing/llama-tokenizer
448
+ max_sequence_length: 12288
449
+ max_sequence_and_generated_tokens_length: 16384
450
+ client_spec:
451
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
359
452
 
360
- # Google
453
+ - name: google/gemini-1.5-pro-preview-0409
454
+ model_name: google/gemini-1.5-pro-preview-0409
455
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
456
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
457
+ # TODO: Max output tokens: 8192
458
+ client_spec:
459
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
460
+
461
+ ## Gemma
462
+ - name: together/gemma-2b
463
+ model_name: google/gemma-2b
464
+ tokenizer_name: google/gemma-2b
465
+ max_sequence_length: 7167
466
+ client_spec:
467
+ class_name: "helm.clients.together_client.TogetherClient"
468
+
469
+ - name: together/gemma-2b-it
470
+ model_name: google/gemma-2b-it
471
+ tokenizer_name: google/gemma-2b
472
+ max_sequence_length: 7167
473
+ client_spec:
474
+ class_name: "helm.clients.together_client.TogetherClient"
475
+
476
+ - name: together/gemma-7b
477
+ model_name: google/gemma-7b
478
+ tokenizer_name: google/gemma-2b
479
+ max_sequence_length: 7167
480
+ client_spec:
481
+ class_name: "helm.clients.together_client.TogetherClient"
482
+
483
+ - name: together/gemma-7b-it
484
+ model_name: google/gemma-7b-it
485
+ tokenizer_name: google/gemma-2b
486
+ max_sequence_length: 7167
487
+ client_spec:
488
+ class_name: "helm.clients.together_client.TogetherClient"
361
489
 
362
490
  ## PaLM 2
363
491
  - name: google/text-bison@001
@@ -366,20 +494,30 @@ model_deployments:
366
494
  max_sequence_length: 6000 # Officially 8192
367
495
  max_sequence_and_generated_tokens_length: 7000 # Officially 9216
368
496
  client_spec:
369
- class_name: "helm.proxy.clients.vertexai_client.VertexAIClient"
370
- args: {}
497
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
371
498
  window_service_spec:
372
499
  class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
373
500
 
501
+ - name: google/text-bison@002
502
+ model_name: google/text-bison@002
503
+ tokenizer_name: google/text-bison@002
504
+ max_sequence_length: 6000 # Officially 8192
505
+ max_sequence_and_generated_tokens_length: 9216
506
+ client_spec:
507
+ class_name: "helm.proxy.clients.vertexai_client.VertexAITextClient"
508
+ window_service_spec:
509
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
374
510
 
375
511
  - name: google/text-bison-32k
376
512
  model_name: google/text-bison-32k
377
- tokenizer_name: google/mt5-base
513
+ tokenizer_name: google/text-bison@001
378
514
  max_sequence_length: 32000
379
515
  max_sequence_and_generated_tokens_length: 32000
380
516
  client_spec:
381
- class_name: "helm.proxy.clients.vertexai_client.VertexAIClient"
382
- args: {}
517
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
518
+ window_service_spec:
519
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
520
+
383
521
 
384
522
  - name: google/text-unicorn@001
385
523
  model_name: google/text-unicorn@001
@@ -387,30 +525,39 @@ model_deployments:
387
525
  max_sequence_length: 6000 # Officially 8192
388
526
  max_sequence_and_generated_tokens_length: 7000 # Officially 9216
389
527
  client_spec:
390
- class_name: "helm.proxy.clients.vertexai_client.VertexAIClient"
391
- args: {}
528
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
392
529
  window_service_spec:
393
530
  class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
394
531
 
395
532
  - name: google/code-bison@001
396
533
  model_name: google/code-bison@001
397
- tokenizer_name: google/mt5-base
534
+ tokenizer_name: google/mt5-base # TODO #2188: change to actual tokenizer
398
535
  max_sequence_length: 6000 # Officially 6144
399
536
  max_sequence_and_generated_tokens_length: 7000 # Officially 7168
400
537
  client_spec:
401
- class_name: "helm.proxy.clients.vertexai_client.VertexAIClient"
402
- args: {}
538
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
539
+ window_service_spec:
540
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
541
+
542
+ - name: google/code-bison@002
543
+ model_name: google/code-bison@002
544
+ tokenizer_name: google/mt5-base # TODO #2188: change to actual tokenizer
545
+ max_sequence_length: 6000 # Officially 6144
546
+ max_sequence_and_generated_tokens_length: 7168
547
+ client_spec:
548
+ class_name: "helm.proxy.clients.vertexai_client.VertexAITextClient"
549
+ window_service_spec:
550
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
403
551
 
404
552
  - name: google/code-bison-32k
405
553
  model_name: google/code-bison-32k
406
- tokenizer_name: google/mt5-base
554
+ tokenizer_name: google/mt5-base # TODO #2188: change to actual tokenizer
407
555
  max_sequence_length: 32000
408
556
  max_sequence_and_generated_tokens_length: 32000
409
557
  client_spec:
410
- class_name: "helm.proxy.clients.vertexai_client.VertexAIClient"
411
- args: {}
412
-
413
-
558
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
559
+ window_service_spec:
560
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
414
561
 
415
562
  # HuggingFace
416
563
 
@@ -420,26 +567,210 @@ model_deployments:
420
567
  tokenizer_name: bigcode/santacoder
421
568
  max_sequence_length: 2048
422
569
  client_spec:
423
- class_name: "helm.proxy.clients.huggingface_client.HuggingFaceClient"
424
- args: {}
570
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
425
571
 
426
572
  - name: huggingface/starcoder
427
573
  model_name: bigcode/starcoder
428
574
  tokenizer_name: bigcode/starcoder
429
575
  max_sequence_length: 8192
430
576
  client_spec:
431
- class_name: "helm.proxy.clients.huggingface_client.HuggingFaceClient"
432
- args: {}
577
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
578
+
579
+ ## Databricks
580
+ - name: huggingface/dolly-v2-3b
581
+ model_name: databricks/dolly-v2-3b
582
+ tokenizer_name: EleutherAI/gpt-neox-20b
583
+ max_sequence_length: 2048
584
+ client_spec:
585
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
586
+
587
+ - name: huggingface/dolly-v2-7b
588
+ model_name: databricks/dolly-v2-7b
589
+ tokenizer_name: EleutherAI/gpt-neox-20b
590
+ max_sequence_length: 2048
591
+ client_spec:
592
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
593
+
594
+ - name: huggingface/dolly-v2-12b
595
+ model_name: databricks/dolly-v2-12b
596
+ tokenizer_name: EleutherAI/gpt-neox-20b
597
+ max_sequence_length: 2048
598
+ client_spec:
599
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
433
600
 
434
601
  ## EleutherAI
602
+ - name: huggingface/pythia-1b-v0
603
+ model_name: eleutherai/pythia-1b-v0
604
+ tokenizer_name: EleutherAI/gpt-neox-20b
605
+ max_sequence_length: 2048
606
+ client_spec:
607
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
608
+
609
+ - name: huggingface/pythia-2.8b-v0
610
+ model_name: eleutherai/pythia-2.8b-v0
611
+ tokenizer_name: EleutherAI/gpt-neox-20b
612
+ max_sequence_length: 2048
613
+ client_spec:
614
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
615
+
616
+ - name: huggingface/pythia-6.9b
617
+ model_name: eleutherai/pythia-6.9b
618
+ tokenizer_name: EleutherAI/gpt-neox-20b
619
+ max_sequence_length: 2048
620
+ client_spec:
621
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
622
+
623
+ - name: huggingface/pythia-12b-v0
624
+ model_name: eleutherai/pythia-12b-v0
625
+ tokenizer_name: EleutherAI/gpt-neox-20b
626
+ max_sequence_length: 2048
627
+ client_spec:
628
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
629
+
435
630
  - name: huggingface/gpt-j-6b
436
631
  model_name: eleutherai/gpt-j-6b
437
632
  tokenizer_name: EleutherAI/gpt-j-6B
438
633
  max_sequence_length: 2048
439
634
  max_request_length: 2049
440
635
  client_spec:
441
- class_name: "helm.proxy.clients.huggingface_client.HuggingFaceClient"
442
- args: {}
636
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
637
+
638
+ - name: huggingface/gpt-neox-20b
639
+ model_name: eleutherai/gpt-neox-20b
640
+ tokenizer_name: EleutherAI/gpt-neox-20b
641
+ max_sequence_length: 2048
642
+ max_request_length: 2049
643
+ client_spec:
644
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
645
+
646
+ ## LMSYS
647
+ - name: huggingface/vicuna-7b-v1.3
648
+ model_name: lmsys/vicuna-7b-v1.3
649
+ tokenizer_name: hf-internal-testing/llama-tokenizer
650
+ max_sequence_length: 2048
651
+ client_spec:
652
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
653
+
654
+ - name: huggingface/vicuna-13b-v1.3
655
+ model_name: lmsys/vicuna-13b-v1.3
656
+ tokenizer_name: hf-internal-testing/llama-tokenizer
657
+ max_sequence_length: 2048
658
+ client_spec:
659
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
660
+
661
+ ## Meta
662
+ - name: huggingface/opt-175b
663
+ model_name: meta/opt-175b
664
+ tokenizer_name: facebook/opt-66b
665
+ max_sequence_length: 2048
666
+ client_spec:
667
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
668
+ args:
669
+ pretrained_model_name_or_path: facebook/opt-175b
670
+
671
+ - name: huggingface/opt-66b
672
+ model_name: meta/opt-66b
673
+ tokenizer_name: facebook/opt-66b
674
+ max_sequence_length: 2048
675
+ client_spec:
676
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
677
+ args:
678
+ pretrained_model_name_or_path: facebook/opt-66b
679
+
680
+ - name: huggingface/opt-6.7b
681
+ model_name: meta/opt-6.7b
682
+ tokenizer_name: facebook/opt-66b
683
+ max_sequence_length: 2048
684
+ client_spec:
685
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
686
+ args:
687
+ pretrained_model_name_or_path: facebook/opt-6.7b
688
+
689
+ - name: huggingface/opt-1.3b
690
+ model_name: meta/opt-1.3b
691
+ tokenizer_name: facebook/opt-66b
692
+ max_sequence_length: 2048
693
+ client_spec:
694
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
695
+ args:
696
+ pretrained_model_name_or_path: facebook/opt-1.3b
697
+
698
+ ## Microsoft
699
+ - name: huggingface/llava-1.5-7b-hf
700
+ model_name: microsoft/llava-1.5-7b-hf
701
+ tokenizer_name: hf-internal-testing/llama-tokenizer
702
+ max_sequence_length: 2048
703
+ client_spec:
704
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
705
+
706
+ - name: huggingface/llava-1.5-13b-hf
707
+ model_name: microsoft/llava-1.5-13b-hf
708
+ tokenizer_name: hf-internal-testing/llama-tokenizer
709
+ max_sequence_length: 2048
710
+ client_spec:
711
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
712
+
713
+ ## OpenFlamingo
714
+ - name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
715
+ model_name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
716
+ tokenizer_name: anas-awadalla/mpt-7b
717
+ max_sequence_length: 2048
718
+ client_spec:
719
+ class_name: "helm.clients.vision_language.open_flamingo_client.OpenFlamingoClient"
720
+ args:
721
+ checkpoint_path: "openflamingo/OpenFlamingo-9B-vitl-mpt7b"
722
+ tokenizer_name: "anas-awadalla-2/mpt-7b"
723
+ cross_attn_every_n_layers: 4
724
+
725
+ - name: together/phi-2
726
+ model_name: microsoft/phi-2
727
+ tokenizer_name: microsoft/phi-2
728
+ max_sequence_length: 2047
729
+ client_spec:
730
+ class_name: "helm.clients.together_client.TogetherClient"
731
+
732
+ ## Mistral AI
733
+ - name: huggingface/bakLlava-v1-hf
734
+ model_name: mistralai/bakLlava-v1-hf
735
+ tokenizer_name: hf-internal-testing/llama-tokenizer
736
+ max_sequence_length: 2048
737
+ client_spec:
738
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
739
+
740
+ ## MosaicML
741
+ - name: huggingface/mpt-7b
742
+ model_name: mosaicml/mpt-7b
743
+ tokenizer_name: EleutherAI/gpt-neox-20b
744
+ max_sequence_length: 2048
745
+ client_spec:
746
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
747
+ args:
748
+ pretrained_model_name_or_path: mosaicml/mpt-7b
749
+
750
+ - name: huggingface/mpt-instruct-7b
751
+ model_name: mosaicml/mpt-instruct-7b
752
+ tokenizer_name: EleutherAI/gpt-neox-20b
753
+ max_sequence_length: 2048
754
+ client_spec:
755
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
756
+ args:
757
+ pretrained_model_name_or_path: mosaicml/mpt-7b-instruct
758
+
759
+ - name: huggingface/mpt-30b
760
+ model_name: mosaicml/mpt-30b
761
+ tokenizer_name: EleutherAI/gpt-neox-20b
762
+ max_sequence_length: 2048
763
+ client_spec:
764
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
765
+
766
+ - name: huggingface/mpt-instruct-30b
767
+ model_name: mosaicml/mpt-instruct-30b
768
+ tokenizer_name: EleutherAI/gpt-neox-20b
769
+ max_sequence_length: 2048
770
+ client_spec:
771
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
772
+ args:
773
+ pretrained_model_name_or_path: mosaicml/mpt-30b-instruct
443
774
 
444
775
  ## OpenAI
445
776
  - name: huggingface/gpt2
@@ -448,8 +779,188 @@ model_deployments:
448
779
  max_sequence_length: 1024
449
780
  max_request_length: 1025
450
781
  client_spec:
451
- class_name: "helm.proxy.clients.huggingface_client.HuggingFaceClient"
452
- args: {}
782
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
783
+ args:
784
+ pretrained_model_name_or_path: openai-community/gpt2
785
+
786
+ ## StabilityAI
787
+ - name: huggingface/stablelm-base-alpha-3b
788
+ model_name: stabilityai/stablelm-base-alpha-3b
789
+ tokenizer_name: EleutherAI/gpt-neox-20b
790
+ max_sequence_length: 4096
791
+ client_spec:
792
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
793
+
794
+ - name: huggingface/stablelm-base-alpha-7b
795
+ model_name: stabilityai/stablelm-base-alpha-7b
796
+ tokenizer_name: EleutherAI/gpt-neox-20b
797
+ max_sequence_length: 4096
798
+ client_spec:
799
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
800
+
801
+ ## Text-to-Image Diffusion Models
802
+
803
+ - name: huggingface/dreamlike-diffusion-v1-0
804
+ model_name: huggingface/dreamlike-diffusion-v1-0
805
+ tokenizer_name: openai/clip-vit-large-patch14
806
+ max_sequence_length: 75
807
+ client_spec:
808
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
809
+ window_service_spec:
810
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
811
+
812
+ - name: huggingface/dreamlike-photoreal-v2-0
813
+ model_name: huggingface/dreamlike-photoreal-v2-0
814
+ tokenizer_name: openai/clip-vit-large-patch14
815
+ max_sequence_length: 75
816
+ client_spec:
817
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
818
+ window_service_spec:
819
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
820
+
821
+ - name: huggingface/openjourney-v1-0
822
+ model_name: huggingface/openjourney-v1-0
823
+ tokenizer_name: openai/clip-vit-large-patch14
824
+ max_sequence_length: 75
825
+ client_spec:
826
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
827
+ window_service_spec:
828
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
829
+
830
+ - name: huggingface/openjourney-v2-0
831
+ model_name: huggingface/openjourney-v2-0
832
+ tokenizer_name: openai/clip-vit-large-patch14
833
+ max_sequence_length: 75
834
+ client_spec:
835
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
836
+ window_service_spec:
837
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
838
+
839
+ - name: huggingface/redshift-diffusion
840
+ model_name: huggingface/redshift-diffusion
841
+ tokenizer_name: openai/clip-vit-large-patch14
842
+ max_sequence_length: 75
843
+ client_spec:
844
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
845
+ window_service_spec:
846
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
847
+
848
+ - name: huggingface/promptist-stable-diffusion-v1-4
849
+ model_name: huggingface/promptist-stable-diffusion-v1-4
850
+ tokenizer_name: openai/clip-vit-large-patch14
851
+ max_sequence_length: 75
852
+ client_spec:
853
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
854
+ window_service_spec:
855
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
856
+
857
+ - name: huggingface/stable-diffusion-v1-4
858
+ model_name: huggingface/stable-diffusion-v1-4
859
+ tokenizer_name: openai/clip-vit-large-patch14
860
+ max_sequence_length: 75
861
+ client_spec:
862
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
863
+ window_service_spec:
864
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
865
+
866
+ - name: huggingface/stable-diffusion-v1-5
867
+ model_name: huggingface/stable-diffusion-v1-5
868
+ tokenizer_name: openai/clip-vit-large-patch14
869
+ max_sequence_length: 75
870
+ client_spec:
871
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
872
+ window_service_spec:
873
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
874
+
875
+ - name: huggingface/stable-diffusion-v2-base
876
+ model_name: huggingface/stable-diffusion-v2-base
877
+ tokenizer_name: openai/clip-vit-large-patch14
878
+ max_sequence_length: 75
879
+ client_spec:
880
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
881
+ window_service_spec:
882
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
883
+
884
+ - name: huggingface/stable-diffusion-v2-1-base
885
+ model_name: huggingface/stable-diffusion-v2-1-base
886
+ tokenizer_name: openai/clip-vit-large-patch14
887
+ max_sequence_length: 75
888
+ client_spec:
889
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
890
+ window_service_spec:
891
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
892
+
893
+ - name: huggingface/stable-diffusion-safe-weak
894
+ model_name: huggingface/stable-diffusion-safe-weak
895
+ tokenizer_name: openai/clip-vit-large-patch14
896
+ max_sequence_length: 75
897
+ client_spec:
898
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
899
+ window_service_spec:
900
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
901
+
902
+ - name: huggingface/stable-diffusion-safe-medium
903
+ model_name: huggingface/stable-diffusion-safe-medium
904
+ tokenizer_name: openai/clip-vit-large-patch14
905
+ max_sequence_length: 75
906
+ client_spec:
907
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
908
+ window_service_spec:
909
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
910
+
911
+ - name: huggingface/stable-diffusion-safe-strong
912
+ model_name: huggingface/stable-diffusion-safe-strong
913
+ tokenizer_name: openai/clip-vit-large-patch14
914
+ max_sequence_length: 75
915
+ client_spec:
916
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
917
+ window_service_spec:
918
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
919
+
920
+ - name: huggingface/stable-diffusion-safe-max
921
+ model_name: huggingface/stable-diffusion-safe-max
922
+ tokenizer_name: openai/clip-vit-large-patch14
923
+ max_sequence_length: 75
924
+ client_spec:
925
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
926
+ window_service_spec:
927
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
928
+
929
+ - name: huggingface/vintedois-diffusion-v0-1
930
+ model_name: huggingface/vintedois-diffusion-v0-1
931
+ tokenizer_name: openai/clip-vit-large-patch14
932
+ max_sequence_length: 75
933
+ client_spec:
934
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
935
+ window_service_spec:
936
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
937
+
938
+ - name: segmind/Segmind-Vega
939
+ model_name: segmind/Segmind-Vega
940
+ tokenizer_name: openai/clip-vit-large-patch14
941
+ max_sequence_length: 75
942
+ client_spec:
943
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
944
+ window_service_spec:
945
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
946
+
947
+ - name: segmind/SSD-1B
948
+ model_name: segmind/SSD-1B
949
+ tokenizer_name: openai/clip-vit-large-patch14
950
+ max_sequence_length: 75
951
+ client_spec:
952
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
953
+ window_service_spec:
954
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
955
+
956
+ - name: stabilityai/stable-diffusion-xl-base-1.0
957
+ model_name: stabilityai/stable-diffusion-xl-base-1.0
958
+ tokenizer_name: openai/clip-vit-large-patch14
959
+ max_sequence_length: 75
960
+ client_spec:
961
+ class_name: "helm.clients.image_generation.huggingface_diffusers_client.HuggingFaceDiffusersClient"
962
+ window_service_spec:
963
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
453
964
 
454
965
  # HuggingFaceM4
455
966
  - name: HuggingFaceM4/idefics-9b
@@ -457,34 +968,48 @@ model_deployments:
457
968
  tokenizer_name: HuggingFaceM4/idefics-9b
458
969
  max_sequence_length: 2048
459
970
  client_spec:
460
- class_name: "helm.proxy.clients.vision_language.idefics_client.IDEFICSClient"
461
- args: {}
971
+ class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
462
972
 
463
973
  - name: HuggingFaceM4/idefics-9b-instruct
464
974
  model_name: HuggingFaceM4/idefics-9b-instruct
465
975
  tokenizer_name: HuggingFaceM4/idefics-9b-instruct
466
976
  max_sequence_length: 2048
467
977
  client_spec:
468
- class_name: "helm.proxy.clients.vision_language.idefics_client.IDEFICSClient"
469
- args: {}
978
+ class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
470
979
 
471
980
  - name: HuggingFaceM4/idefics-80b
472
981
  model_name: HuggingFaceM4/idefics-80b
473
982
  tokenizer_name: HuggingFaceM4/idefics-80b
474
983
  max_sequence_length: 2048
475
984
  client_spec:
476
- class_name: "helm.proxy.clients.vision_language.idefics_client.IDEFICSClient"
477
- args: {}
985
+ class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
478
986
 
479
987
  - name: HuggingFaceM4/idefics-80b-instruct
480
988
  model_name: HuggingFaceM4/idefics-80b-instruct
481
989
  tokenizer_name: HuggingFaceM4/idefics-80b-instruct
482
990
  max_sequence_length: 2048
483
991
  client_spec:
484
- class_name: "helm.proxy.clients.vision_language.idefics_client.IDEFICSClient"
485
- args: {}
992
+ class_name: "helm.clients.vision_language.idefics_client.IDEFICSClient"
486
993
 
994
+ # Lexica
995
+ - name: lexica/search-stable-diffusion-1.5
996
+ model_name: lexica/search-stable-diffusion-1.5
997
+ tokenizer_name: openai/clip-vit-large-patch14
998
+ max_sequence_length: 200
999
+ client_spec:
1000
+ class_name: "helm.clients.image_generation.lexica_client.LexicaClient"
1001
+ window_service_spec:
1002
+ class_name: "helm.benchmark.window_services.image_generation.lexica_search_window_service.LexicaSearchWindowService"
487
1003
 
1004
+ # Kakao
1005
+ - name: kakaobrain/mindall-e
1006
+ model_name: kakaobrain/mindall-e
1007
+ tokenizer_name: openai/clip-vit-large-patch14
1008
+ max_sequence_length: 75
1009
+ client_spec:
1010
+ class_name: "helm.clients.image_generation.mindalle_client.MinDALLEClient"
1011
+ window_service_spec:
1012
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
488
1013
 
489
1014
  # Lighting AI
490
1015
  - name: lightningai/lit-gpt
@@ -492,33 +1017,41 @@ model_deployments:
492
1017
  tokenizer_name: lightningai/lit-gpt
493
1018
  max_sequence_length: 2048
494
1019
  client_spec:
495
- class_name: "helm.proxy.clients.lit_gpt_client.LitGPTClient"
1020
+ class_name: "helm.clients.lit_gpt_client.LitGPTClient"
496
1021
  args:
497
1022
  checkpoint_dir: "" # Path to the checkpoint directory
498
1023
  precision: bf16-true
499
1024
 
500
-
501
-
502
- # Microsoft
503
- - name: microsoft/TNLGv2_530B
504
- model_name: microsoft/TNLGv2_530B
505
- tokenizer_name: microsoft/gpt2
506
- max_sequence_length: 2047
507
- max_request_length: 2048
1025
+ # Mistral AI
1026
+ - name: mistralai/mistral-tiny
1027
+ model_name: mistralai/mistral-7b-v0.1
1028
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1029
+ max_sequence_length: 32000
508
1030
  client_spec:
509
- class_name: "helm.proxy.clients.microsoft_client.MicrosoftClient"
510
- args: {}
1031
+ class_name: "helm.clients.mistral_client.MistralAIClient"
1032
+ args:
1033
+ mistral_model: "mistral-tiny"
511
1034
 
512
- - name: microsoft/TNLGv2_7B
513
- model_name: microsoft/TNLGv2_7B
514
- tokenizer_name: microsoft/gpt2
515
- max_sequence_length: 2047
516
- max_request_length: 2048
1035
+ - name: mistralai/mistral-small-2402
1036
+ model_name: mistralai/mistral-small-2402
1037
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1038
+ max_sequence_length: 32000
517
1039
  client_spec:
518
- class_name: "helm.proxy.clients.microsoft_client.MicrosoftClient"
519
- args: {}
1040
+ class_name: "helm.clients.mistral_client.MistralAIClient"
520
1041
 
1042
+ - name: mistralai/mistral-medium-2312
1043
+ model_name: mistralai/mistral-medium-2312
1044
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1045
+ max_sequence_length: 32000
1046
+ client_spec:
1047
+ class_name: "helm.clients.mistral_client.MistralAIClient"
521
1048
 
1049
+ - name: mistralai/mistral-large-2402
1050
+ model_name: mistralai/mistral-large-2402
1051
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1052
+ max_sequence_length: 32000
1053
+ client_spec:
1054
+ class_name: "helm.clients.mistral_client.MistralAIClient"
522
1055
 
523
1056
  # Neurips
524
1057
  - name: neurips/local
@@ -526,10 +1059,7 @@ model_deployments:
526
1059
  tokenizer_name: neurips/local
527
1060
  max_sequence_length: 2048
528
1061
  client_spec:
529
- class_name: "helm.proxy.clients.http_model_client.HTTPModelClient"
530
- args: {}
531
-
532
-
1062
+ class_name: "helm.clients.http_model_client.HTTPModelClient"
533
1063
 
534
1064
  # Nvidia
535
1065
  - name: nvidia/megatron-gpt2
@@ -537,17 +1067,33 @@ model_deployments:
537
1067
  tokenizer_name: huggingface/gpt2
538
1068
  max_sequence_length: 1024
539
1069
  client_spec:
540
- class_name: "helm.proxy.clients.megatron_client.MegatronClient"
541
- args: {}
542
-
543
-
1070
+ class_name: "helm.clients.megatron_client.MegatronClient"
544
1071
 
545
1072
  # OpenAI
546
1073
 
547
1074
  ## GPT 3 Models
1075
+
1076
+ - name: openai/davinci-002
1077
+ model_name: openai/davinci-002
1078
+ tokenizer_name: openai/cl100k_base
1079
+ # Claimed sequence length is 16,384 tokens but we round down to 16,000 tokens
1080
+ # to provide a margin of error.
1081
+ max_sequence_length: 16000
1082
+ client_spec:
1083
+ class_name: "helm.clients.openai_client.OpenAIClient"
1084
+
1085
+ - name: openai/babbage-002
1086
+ model_name: openai/babbage-002
1087
+ tokenizer_name: openai/cl100k_base
1088
+ # Claimed sequence length is 16,384 tokens but we round down to 16,000 tokens
1089
+ # to provide a margin of error.
1090
+ max_sequence_length: 16000
1091
+ client_spec:
1092
+ class_name: "helm.clients.openai_client.OpenAIClient"
1093
+
548
1094
  # The list of models can be found here: https://beta.openai.com/docs/engines/gpt-3
549
1095
  # DEPRECATED: Announced on July 06 2023 that these models will be shut down on January 04 2024.
550
-
1096
+
551
1097
  - name: openai/davinci
552
1098
  deprecated: true
553
1099
  model_name: openai/davinci
@@ -555,8 +1101,7 @@ model_deployments:
555
1101
  max_sequence_length: 2048
556
1102
  max_request_length: 2049
557
1103
  client_spec:
558
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
559
- args: {}
1104
+ class_name: "helm.clients.openai_client.OpenAIClient"
560
1105
 
561
1106
  - name: openai/curie
562
1107
  deprecated: true
@@ -565,8 +1110,7 @@ model_deployments:
565
1110
  max_sequence_length: 2048
566
1111
  max_request_length: 2049
567
1112
  client_spec:
568
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
569
- args: {}
1113
+ class_name: "helm.clients.openai_client.OpenAIClient"
570
1114
 
571
1115
  - name: openai/babbage
572
1116
  deprecated: true
@@ -575,8 +1119,7 @@ model_deployments:
575
1119
  max_sequence_length: 2048
576
1120
  max_request_length: 2049
577
1121
  client_spec:
578
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
579
- args: {}
1122
+ class_name: "helm.clients.openai_client.OpenAIClient"
580
1123
 
581
1124
  - name: openai/ada
582
1125
  deprecated: true
@@ -585,8 +1128,7 @@ model_deployments:
585
1128
  max_sequence_length: 2048
586
1129
  max_request_length: 2049
587
1130
  client_spec:
588
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
589
- args: {}
1131
+ class_name: "helm.clients.openai_client.OpenAIClient"
590
1132
 
591
1133
  - name: openai/text-davinci-003
592
1134
  deprecated: true
@@ -595,8 +1137,7 @@ model_deployments:
595
1137
  max_sequence_length: 4000
596
1138
  max_request_length: 4001
597
1139
  client_spec:
598
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
599
- args: {}
1140
+ class_name: "helm.clients.openai_client.OpenAIClient"
600
1141
 
601
1142
  - name: openai/text-davinci-002
602
1143
  deprecated: true
@@ -605,8 +1146,7 @@ model_deployments:
605
1146
  max_sequence_length: 4000
606
1147
  max_request_length: 4001
607
1148
  client_spec:
608
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
609
- args: {}
1149
+ class_name: "helm.clients.openai_client.OpenAIClient"
610
1150
 
611
1151
  - name: openai/text-davinci-001
612
1152
  deprecated: true
@@ -615,8 +1155,7 @@ model_deployments:
615
1155
  max_sequence_length: 2048
616
1156
  max_request_length: 2049
617
1157
  client_spec:
618
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
619
- args: {}
1158
+ class_name: "helm.clients.openai_client.OpenAIClient"
620
1159
 
621
1160
  - name: openai/text-curie-001
622
1161
  deprecated: true
@@ -625,8 +1164,7 @@ model_deployments:
625
1164
  max_sequence_length: 2048
626
1165
  max_request_length: 2049
627
1166
  client_spec:
628
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
629
- args: {}
1167
+ class_name: "helm.clients.openai_client.OpenAIClient"
630
1168
 
631
1169
  - name: openai/text-babbage-001
632
1170
  deprecated: true
@@ -635,8 +1173,7 @@ model_deployments:
635
1173
  max_sequence_length: 2048
636
1174
  max_request_length: 2049
637
1175
  client_spec:
638
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
639
- args: {}
1176
+ class_name: "helm.clients.openai_client.OpenAIClient"
640
1177
 
641
1178
  - name: openai/text-ada-001
642
1179
  deprecated: true
@@ -645,13 +1182,19 @@ model_deployments:
645
1182
  max_sequence_length: 2048
646
1183
  max_request_length: 2049
647
1184
  client_spec:
648
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
649
- args: {}
650
-
1185
+ class_name: "helm.clients.openai_client.OpenAIClient"
651
1186
 
652
1187
  ## GPT 3.5 Turbo Models
653
1188
  # ChatGPT: https://openai.com/blog/chatgpt
654
1189
 
1190
+ - name: openai/gpt-3.5-turbo-instruct
1191
+ model_name: openai/gpt-3.5-turbo-instruct
1192
+ tokenizer_name: openai/cl100k_base
1193
+ max_sequence_length: 4096
1194
+ max_request_length: 4097
1195
+ client_spec:
1196
+ class_name: "helm.clients.openai_client.OpenAIClient"
1197
+
655
1198
  # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable
656
1199
  # sequence length is smaller at 4087 with one user input message and one assistant
657
1200
  # output message because ChatGPT uses special tokens for message roles and boundaries.
@@ -662,8 +1205,7 @@ model_deployments:
662
1205
  max_sequence_length: 4000
663
1206
  max_request_length: 4001
664
1207
  client_spec:
665
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
666
- args: {}
1208
+ class_name: "helm.clients.openai_client.OpenAIClient"
667
1209
 
668
1210
  # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable
669
1211
  # sequence length is smaller at 4087 with one user input message and one assistant
@@ -675,8 +1217,7 @@ model_deployments:
675
1217
  max_sequence_length: 4000
676
1218
  max_request_length: 4001
677
1219
  client_spec:
678
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
679
- args: {}
1220
+ class_name: "helm.clients.openai_client.OpenAIClient"
680
1221
 
681
1222
  # Claimed length is 16,384; we round down to 16,000 for the same reasons as explained
682
1223
  # in the openai/gpt-3.5-turbo-0613 comment
@@ -686,9 +1227,26 @@ model_deployments:
686
1227
  max_sequence_length: 16000
687
1228
  max_request_length: 16001
688
1229
  client_spec:
689
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
690
- args: {}
1230
+ class_name: "helm.clients.openai_client.OpenAIClient"
1231
+
1232
+ # Claimed length is 16,384; we round down to 16,000 for the same reasons as explained
1233
+ # in the openai/gpt-3.5-turbo-0613 comment
1234
+ - name: openai/gpt-3.5-turbo-1106
1235
+ model_name: openai/gpt-3.5-turbo-1106
1236
+ tokenizer_name: openai/cl100k_base
1237
+ max_sequence_length: 16000
1238
+ max_request_length: 16001
1239
+ client_spec:
1240
+ class_name: "helm.clients.openai_client.OpenAIClient"
691
1241
 
1242
+ # Claimed length is 16,384; we round down to 16,000 for the same reasons as explained
1243
+ # in the openai/gpt-3.5-turbo-0613 comment
1244
+ - name: openai/gpt-3.5-turbo-0125
1245
+ model_name: openai/gpt-3.5-turbo-0125
1246
+ tokenizer_name: openai/cl100k_base
1247
+ max_sequence_length: 16000
1248
+ client_spec:
1249
+ class_name: "helm.clients.openai_client.OpenAIClient"
692
1250
 
693
1251
  ## GPT 4 Models
694
1252
 
@@ -701,8 +1259,7 @@ model_deployments:
701
1259
  max_sequence_length: 128000
702
1260
  max_request_length: 128001
703
1261
  client_spec:
704
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
705
- args: {}
1262
+ class_name: "helm.clients.openai_client.OpenAIClient"
706
1263
 
707
1264
  - name: openai/gpt-4-0314
708
1265
  model_name: openai/gpt-4-0314
@@ -710,8 +1267,7 @@ model_deployments:
710
1267
  max_sequence_length: 8192
711
1268
  max_request_length: 8193
712
1269
  client_spec:
713
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
714
- args: {}
1270
+ class_name: "helm.clients.openai_client.OpenAIClient"
715
1271
 
716
1272
  - name: openai/gpt-4-32k-0314
717
1273
  model_name: openai/gpt-4-32k-0314
@@ -719,8 +1275,7 @@ model_deployments:
719
1275
  max_sequence_length: 32768
720
1276
  max_request_length: 32769
721
1277
  client_spec:
722
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
723
- args: {}
1278
+ class_name: "helm.clients.openai_client.OpenAIClient"
724
1279
 
725
1280
  - name: openai/gpt-4-0613
726
1281
  model_name: openai/gpt-4-0613
@@ -728,8 +1283,7 @@ model_deployments:
728
1283
  max_sequence_length: 8192
729
1284
  max_request_length: 8193
730
1285
  client_spec:
731
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
732
- args: {}
1286
+ class_name: "helm.clients.openai_client.OpenAIClient"
733
1287
 
734
1288
  - name: openai/gpt-4-32k-0613
735
1289
  model_name: openai/gpt-4-32k-0613
@@ -737,9 +1291,34 @@ model_deployments:
737
1291
  max_sequence_length: 32768
738
1292
  max_request_length: 32769
739
1293
  client_spec:
740
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
741
- args: {}
1294
+ class_name: "helm.clients.openai_client.OpenAIClient"
1295
+
1296
+ - name: openai/gpt-4-0125-preview
1297
+ model_name: openai/gpt-4-0125-preview
1298
+ tokenizer_name: openai/cl100k_base
1299
+ # According to https://help.openai.com/en/articles/8555510-gpt-4-turbo,
1300
+ # the maximum number of output tokens for this model is 4096
1301
+ # TODO: add max_generated_tokens_length of 4096 https://github.com/stanford-crfm/helm/issues/2098
1302
+ max_sequence_length: 128000
1303
+ max_request_length: 128001
1304
+ client_spec:
1305
+ class_name: "helm.clients.openai_client.OpenAIClient"
1306
+
1307
+ - name: openai/gpt-4-turbo-2024-04-09
1308
+ model_name: openai/gpt-4-turbo-2024-04-09
1309
+ tokenizer_name: openai/cl100k_base
1310
+ max_sequence_length: 128000
1311
+ client_spec:
1312
+ class_name: "helm.clients.openai_client.OpenAIClient"
742
1313
 
1314
+ - name: openai/gpt-4-vision-preview
1315
+ model_name: openai/gpt-4-vision-preview
1316
+ tokenizer_name: openai/cl100k_base
1317
+ max_sequence_length: 128000 # According to https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
1318
+ max_request_length: 128001
1319
+ max_sequence_and_generated_tokens_length: 132096
1320
+ client_spec:
1321
+ class_name: "helm.clients.openai_client.OpenAIClient"
743
1322
 
744
1323
  ## Codex Models
745
1324
  # DEPRECATED: Codex models have been shut down on March 23 2023.
@@ -751,8 +1330,7 @@ model_deployments:
751
1330
  max_sequence_length: 4000
752
1331
  max_request_length: 4001
753
1332
  client_spec:
754
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
755
- args: {}
1333
+ class_name: "helm.clients.openai_client.OpenAIClient"
756
1334
 
757
1335
  - name: openai/code-davinci-001
758
1336
  deprecated: true
@@ -761,8 +1339,7 @@ model_deployments:
761
1339
  max_sequence_length: 2048
762
1340
  max_request_length: 2049
763
1341
  client_spec:
764
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
765
- args: {}
1342
+ class_name: "helm.clients.openai_client.OpenAIClient"
766
1343
 
767
1344
  - name: openai/code-cushman-001
768
1345
  deprecated: true
@@ -771,10 +1348,8 @@ model_deployments:
771
1348
  max_sequence_length: 2048
772
1349
  max_request_length: 2049
773
1350
  client_spec:
774
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
775
- args: {}
1351
+ class_name: "helm.clients.openai_client.OpenAIClient"
776
1352
 
777
-
778
1353
  ## Text Similarity Models
779
1354
  # OpenAI similarity embedding models: https://beta.openai.com/docs/guides/embeddings
780
1355
  # The number of parameters is guessed based on the number of parameters of the
@@ -789,8 +1364,7 @@ model_deployments:
789
1364
  max_sequence_length: 2048
790
1365
  max_request_length: 2049
791
1366
  client_spec:
792
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
793
- args: {}
1367
+ class_name: "helm.clients.openai_client.OpenAIClient"
794
1368
 
795
1369
  - name: openai/text-similarity-curie-001
796
1370
  deprecated: true
@@ -799,8 +1373,7 @@ model_deployments:
799
1373
  max_sequence_length: 2048
800
1374
  max_request_length: 2049
801
1375
  client_spec:
802
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
803
- args: {}
1376
+ class_name: "helm.clients.openai_client.OpenAIClient"
804
1377
 
805
1378
  - name: openai/text-similarity-babbage-001
806
1379
  deprecated: true
@@ -809,8 +1382,7 @@ model_deployments:
809
1382
  max_sequence_length: 2048
810
1383
  max_request_length: 2049
811
1384
  client_spec:
812
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
813
- args: {}
1385
+ class_name: "helm.clients.openai_client.OpenAIClient"
814
1386
 
815
1387
  - name: openai/text-similarity-ada-001
816
1388
  deprecated: true
@@ -819,8 +1391,7 @@ model_deployments:
819
1391
  max_sequence_length: 2048
820
1392
  max_request_length: 2049
821
1393
  client_spec:
822
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
823
- args: {}
1394
+ class_name: "helm.clients.openai_client.OpenAIClient"
824
1395
 
825
1396
  # As of 2023-11-07, text-embedding-ada-002 is not deprecated:
826
1397
  # "We recommend using text-embedding-ada-002 for nearly all use cases."
@@ -831,10 +1402,53 @@ model_deployments:
831
1402
  max_sequence_length: 2048
832
1403
  max_request_length: 2049
833
1404
  client_spec:
834
- class_name: "helm.proxy.clients.openai_client.OpenAIClient"
835
- args: {}
1405
+ class_name: "helm.clients.openai_client.OpenAIClient"
1406
+
1407
+ # Text-to-image models
1408
+ - name: openai/dall-e-2
1409
+ model_name: openai/dall-e-2
1410
+ tokenizer_name: openai/clip-vit-large-patch14
1411
+ max_sequence_length: 1000
1412
+ client_spec:
1413
+ class_name: "helm.clients.image_generation.dalle2_client.DALLE2Client"
1414
+ window_service_spec:
1415
+ class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
1416
+
1417
+ - name: openai/dall-e-3
1418
+ model_name: openai/dall-e-3
1419
+ tokenizer_name: openai/clip-vit-large-patch14
1420
+ max_sequence_length: 1000
1421
+ client_spec:
1422
+ class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
1423
+ window_service_spec:
1424
+ class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
836
1425
 
1426
+ - name: openai/dall-e-3-natural
1427
+ model_name: openai/dall-e-3-natural
1428
+ tokenizer_name: openai/clip-vit-large-patch14
1429
+ max_sequence_length: 1000
1430
+ client_spec:
1431
+ class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
1432
+ window_service_spec:
1433
+ class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
1434
+
1435
+ - name: openai/dall-e-3-hd
1436
+ model_name: openai/dall-e-3-hd
1437
+ tokenizer_name: openai/clip-vit-large-patch14
1438
+ max_sequence_length: 1000
1439
+ client_spec:
1440
+ class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
1441
+ window_service_spec:
1442
+ class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
837
1443
 
1444
+ - name: openai/dall-e-3-hd-natural
1445
+ model_name: openai/dall-e-3-hd-natural
1446
+ tokenizer_name: openai/clip-vit-large-patch14
1447
+ max_sequence_length: 1000
1448
+ client_spec:
1449
+ class_name: "helm.clients.image_generation.dalle3_client.DALLE3Client"
1450
+ window_service_spec:
1451
+ class_name: "helm.benchmark.window_services.image_generation.openai_dalle_window_service.OpenAIDALLEWindowService"
838
1452
 
839
1453
  # Together
840
1454
  # The list of models served by Together changes often, to check the latest list, visit:
@@ -844,275 +1458,136 @@ model_deployments:
844
1458
 
845
1459
  ## BigScience
846
1460
  - name: together/bloom
847
- deprecated: true # Removed from together
1461
+ deprecated: true # Removed from Together
848
1462
  model_name: bigscience/bloom
849
1463
  tokenizer_name: bigscience/bloom
850
1464
  max_sequence_length: 2048
851
1465
  max_request_length: 2049
852
1466
  client_spec:
853
- class_name: "helm.proxy.clients.together_client.TogetherClient"
854
- args: {}
1467
+ class_name: "helm.clients.together_client.TogetherClient"
855
1468
 
856
1469
  - name: together/t0pp
857
- deprecated: true # Removed from together
1470
+ deprecated: true # Removed from Together
858
1471
  model_name: bigscience/t0pp
859
1472
  tokenizer_name: bigscience/T0pp
860
1473
  max_sequence_length: 1024
861
1474
  client_spec:
862
- class_name: "helm.proxy.clients.together_client.TogetherClient"
863
- args: {}
1475
+ class_name: "helm.clients.together_client.TogetherClient"
864
1476
  window_service_spec:
865
- class_name: "helm.benchmark.window_services.t0pp_window_service.T0ppWindowService"
866
- args: {}
867
-
868
- ## Databricks
869
- - name: together/dolly-v2-3b
870
- model_name: databricks/dolly-v2-3b
871
- tokenizer_name: EleutherAI/gpt-neox-20b
872
- max_sequence_length: 2048
873
- max_request_length: 2049
874
- client_spec:
875
- class_name: "helm.proxy.clients.together_client.TogetherClient"
876
- args: {}
877
-
878
- - name: together/dolly-v2-7b
879
- model_name: databricks/dolly-v2-7b
880
- tokenizer_name: EleutherAI/gpt-neox-20b
881
- max_sequence_length: 2048
882
- max_request_length: 2049
883
- client_spec:
884
- class_name: "helm.proxy.clients.together_client.TogetherClient"
885
- args: {}
886
-
887
- - name: together/dolly-v2-12b
888
- model_name: databricks/dolly-v2-12b
889
- tokenizer_name: EleutherAI/gpt-neox-20b
890
- max_sequence_length: 2048
891
- max_request_length: 2049
892
- client_spec:
893
- class_name: "helm.proxy.clients.together_client.TogetherClient"
894
- args: {}
895
-
896
- ## EleutherAI
897
- - name: together/gpt-j-6b
898
- deprecated: true # Removed from together
899
- model_name: eleutherai/gpt-j-6b
900
- tokenizer_name: EleutherAI/gpt-j-6B
901
- max_sequence_length: 2048
902
- max_request_length: 2049
903
- client_spec:
904
- class_name: "helm.proxy.clients.together_client.TogetherClient"
905
- args: {}
906
-
907
- - name: together/gpt-neox-20b
908
- deprecated: true # Removed from together
909
- model_name: eleutherai/gpt-neox-20b
910
- tokenizer_name: EleutherAI/gpt-neox-20b
911
- max_sequence_length: 2048
912
- max_request_length: 2049
913
- client_spec:
914
- class_name: "helm.proxy.clients.together_client.TogetherClient"
915
- args: {}
916
-
917
- - name: together/pythia-1b-v0
918
- model_name: eleutherai/pythia-1b-v0
919
- tokenizer_name: EleutherAI/gpt-neox-20b
920
- max_sequence_length: 2048
921
- max_request_length: 2049
922
- client_spec:
923
- class_name: "helm.proxy.clients.together_client.TogetherClient"
924
- args: {}
925
-
926
- - name: together/pythia-2.8b-v0
927
- model_name: eleutherai/pythia-2.8b-v0
928
- tokenizer_name: EleutherAI/gpt-neox-20b
929
- max_sequence_length: 2048
930
- max_request_length: 2049
931
- client_spec:
932
- class_name: "helm.proxy.clients.together_client.TogetherClient"
933
- args: {}
934
-
935
- - name: together/pythia-6.9b
936
- model_name: eleutherai/pythia-6.9b
937
- tokenizer_name: EleutherAI/gpt-neox-20b
938
- max_sequence_length: 2048
939
- max_request_length: 2049
940
- client_spec:
941
- class_name: "helm.proxy.clients.together_client.TogetherClient"
942
- args: {}
943
-
944
- - name: together/pythia-12b-v0
945
- model_name: eleutherai/pythia-12b-v0
946
- tokenizer_name: EleutherAI/gpt-neox-20b
947
- max_sequence_length: 2048
948
- max_request_length: 2049
949
- client_spec:
950
- class_name: "helm.proxy.clients.together_client.TogetherClient"
951
- args: {}
1477
+ class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
952
1478
 
953
1479
  ## Google
954
1480
  - name: together/t5-11b
955
- deprecated: true # Removed from together
1481
+ deprecated: true # Removed from Together
956
1482
  model_name: google/t5-11b
957
1483
  tokenizer_name: google/t5-11b
958
1484
  max_sequence_length: 511
959
1485
  client_spec:
960
- class_name: "helm.proxy.clients.together_client.TogetherClient"
961
- args: {}
1486
+ class_name: "helm.clients.together_client.TogetherClient"
962
1487
  window_service_spec:
963
- class_name: "helm.benchmark.window_services.t511b_window_service.T511bWindowService"
964
- args: {}
1488
+ class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
965
1489
 
966
1490
  - name: together/flan-t5-xxl
967
- deprecated: true # Removed from together
1491
+ deprecated: true # Removed from Together
968
1492
  model_name: google/flan-t5-xxl
969
1493
  tokenizer_name: google/flan-t5-xxl
970
1494
  max_sequence_length: 511
971
1495
  client_spec:
972
- class_name: "helm.proxy.clients.together_client.TogetherClient"
973
- args: {}
1496
+ class_name: "helm.clients.together_client.TogetherClient"
974
1497
  window_service_spec:
975
- class_name: "helm.benchmark.window_services.flan_t5_window_service.FlanT5WindowService"
976
- args: {}
1498
+ class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
977
1499
 
978
1500
  - name: together/ul2
979
- deprecated: true # Removed from together
1501
+ deprecated: true # Removed from Together
980
1502
  model_name: google/ul2
981
1503
  tokenizer_name: google/ul2
982
1504
  max_sequence_length: 511
983
1505
  client_spec:
984
- class_name: "helm.proxy.clients.together_client.TogetherClient"
985
- args: {}
1506
+ class_name: "helm.clients.together_client.TogetherClient"
986
1507
  window_service_spec:
987
- class_name: "helm.benchmark.window_services.ul2_window_service.UL2WindowService"
988
- args: {}
989
-
990
- ## HazyResearch
991
- - name: together/h3-2.7b
992
- deprecated: true # Not available on Together yet
993
- model_name: hazyresearch/h3-2.7b
994
- tokenizer_name: huggingface/gpt2
995
- max_sequence_length: 1024
996
- max_request_length: 1025
997
- client_spec:
998
- class_name: "helm.proxy.clients.together_client.TogetherClient"
999
- args: {}
1000
-
1001
- ## LMSYS
1002
- # TODO: might be deprecated. Needs to be checked.
1003
- # Together officialy supports vicuna 1.5, not sure if 1.3 is still supported.
1004
- - name: together/vicuna-7b-v1.3
1005
- model_name: lmsys/vicuna-7b-v1.3
1006
- tokenizer_name: hf-internal-testing/llama-tokenizer
1007
- max_sequence_length: 2048
1008
- client_spec:
1009
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1010
- args: {}
1011
-
1012
- - name: together/vicuna-13b-v1.3
1013
- model_name: lmsys/vicuna-13b-v1.3
1014
- tokenizer_name: hf-internal-testing/llama-tokenizer
1015
- max_sequence_length: 2048
1016
- client_spec:
1017
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1018
- args: {}
1508
+ class_name: "helm.benchmark.window_services.encoder_decoder_window_service.EncoderDecoderWindowService"
1019
1509
 
1020
1510
  ## Meta
1021
1511
  - name: together/llama-7b
1022
1512
  model_name: meta/llama-7b
1023
1513
  tokenizer_name: hf-internal-testing/llama-tokenizer
1024
- max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1514
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1025
1515
  client_spec:
1026
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1027
- args: {}
1516
+ class_name: "helm.clients.together_client.TogetherClient"
1517
+ args:
1518
+ together_model: huggyllama/llama-7b
1028
1519
 
1029
1520
  - name: together/llama-13b
1030
1521
  model_name: meta/llama-13b
1031
1522
  tokenizer_name: hf-internal-testing/llama-tokenizer
1032
- max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1523
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1033
1524
  client_spec:
1034
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1035
- args: {}
1525
+ class_name: "helm.clients.together_client.TogetherClient"
1526
+ args:
1527
+ together_model: huggyllama/llama-13b
1036
1528
 
1037
1529
  - name: together/llama-30b
1038
1530
  model_name: meta/llama-30b
1039
1531
  tokenizer_name: hf-internal-testing/llama-tokenizer
1040
- max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1532
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1041
1533
  client_spec:
1042
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1043
- args: {}
1534
+ class_name: "helm.clients.together_client.TogetherClient"
1535
+ args:
1536
+ together_model: huggyllama/llama-30b
1044
1537
 
1045
1538
  - name: together/llama-65b
1046
1539
  model_name: meta/llama-65b
1047
1540
  tokenizer_name: hf-internal-testing/llama-tokenizer
1048
- max_sequence_length: 2047 # Subtract 1 tokens to work around a off-by-one bug in Together's input validation token counting (#2080)
1541
+ max_sequence_length: 2047 # Subtract 1 tokens to work around a off-by-one bug in Together's input validation token counting (#2080)
1049
1542
  client_spec:
1050
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1051
- args: {}
1543
+ class_name: "helm.clients.together_client.TogetherClient"
1544
+ args:
1545
+ together_model: huggyllama/llama-65b
1052
1546
 
1053
1547
  - name: together/llama-2-7b
1054
1548
  model_name: meta/llama-2-7b
1055
1549
  tokenizer_name: meta-llama/Llama-2-7b-hf
1056
- max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
1550
+ max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
1057
1551
  client_spec:
1058
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1059
- args: {}
1552
+ class_name: "helm.clients.together_client.TogetherClient"
1553
+ args:
1554
+ together_model: togethercomputer/llama-2-7b
1060
1555
 
1061
1556
  - name: together/llama-2-13b
1062
1557
  model_name: meta/llama-2-13b
1063
1558
  tokenizer_name: meta-llama/Llama-2-7b-hf
1064
- max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
1559
+ max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
1065
1560
  client_spec:
1066
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1067
- args: {}
1561
+ class_name: "helm.clients.together_client.TogetherClient"
1562
+ args:
1563
+ together_model: togethercomputer/llama-2-13b
1068
1564
 
1069
1565
  - name: together/llama-2-70b
1070
1566
  model_name: meta/llama-2-70b
1071
1567
  tokenizer_name: meta-llama/Llama-2-7b-hf
1072
- max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
1073
- client_spec:
1074
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1075
- args: {}
1076
-
1077
- - name: together/opt-175b
1078
- deprecated: true # Not available on Together yet
1079
- model_name: meta/opt-175b
1080
- tokenizer_name: facebook/opt-66b
1081
- max_sequence_length: 2048
1082
- max_request_length: 2049
1083
- client_spec:
1084
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1085
- args: {}
1086
-
1087
- - name: together/opt-66b
1088
- deprecated: true # Not available on Together yet
1089
- model_name: meta/opt-66b
1090
- tokenizer_name: facebook/opt-66b
1091
- max_sequence_length: 2048
1092
- max_request_length: 2049
1568
+ max_sequence_length: 4094 # Subtract 2 tokens to work around a off-by-two bug in Together's token counting (#2080 and #2094)
1093
1569
  client_spec:
1094
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1095
- args: {}
1570
+ class_name: "helm.clients.together_client.TogetherClient"
1571
+ args:
1572
+ together_model: togethercomputer/llama-2-70b
1096
1573
 
1097
- - name: together/opt-6.7b
1098
- deprecated: true # Not available on Together yet
1099
- model_name: meta/opt-6.7b
1100
- tokenizer_name: facebook/opt-66b
1101
- max_sequence_length: 2048
1102
- max_request_length: 2049
1574
+ - name: together/llama-3-8b
1575
+ model_name: meta/llama-3-8b
1576
+ tokenizer_name: meta/llama-3-8b
1577
+ max_sequence_length: 8191
1103
1578
  client_spec:
1104
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1105
- args: {}
1579
+ class_name: "helm.clients.together_client.TogetherClient"
1580
+ args:
1581
+ together_model: meta-llama/Meta-Llama-3-8B
1106
1582
 
1107
- - name: together/opt-1.3b
1108
- deprecated: true # Not available on Together yet
1109
- model_name: meta/opt-1.3b
1110
- tokenizer_name: facebook/opt-66b
1111
- max_sequence_length: 2048
1112
- max_request_length: 2049
1583
+ - name: together/llama-3-70b
1584
+ model_name: meta/llama-3-70b
1585
+ tokenizer_name: meta/llama-3-8b
1586
+ max_sequence_length: 8191
1113
1587
  client_spec:
1114
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1115
- args: {}
1588
+ class_name: "helm.clients.together_client.TogetherClient"
1589
+ args:
1590
+ together_model: meta-llama/Meta-Llama-3-70B
1116
1591
 
1117
1592
  # 01.AI
1118
1593
  - name: together/yi-6b
@@ -1120,93 +1595,75 @@ model_deployments:
1120
1595
  tokenizer_name: 01-ai/Yi-6B
1121
1596
  max_sequence_length: 4095
1122
1597
  client_spec:
1123
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1124
- args: {}
1598
+ class_name: "helm.clients.together_client.TogetherClient"
1599
+ args:
1600
+ together_model: zero-one-ai/Yi-6B
1125
1601
 
1126
1602
  - name: together/yi-34b
1127
1603
  model_name: 01-ai/yi-34b
1128
1604
  tokenizer_name: 01-ai/Yi-6B
1129
1605
  max_sequence_length: 4095
1130
1606
  client_spec:
1131
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1132
- args: {}
1607
+ class_name: "helm.clients.together_client.TogetherClient"
1608
+ args:
1609
+ together_model: zero-one-ai/Yi-34B
1133
1610
 
1134
- ## MistralAI
1135
- - name: together/mistral-7b-v0.1
1136
- model_name: mistralai/mistral-7b-v0.1
1137
- tokenizer_name: mistralai/Mistral-7B-v0.1
1138
- max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1139
- client_spec:
1140
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1141
- args: {}
1142
1611
 
1143
- - name: together/mixtral-8x7b-32kseqlen
1144
- model_name: mistralai/mixtral-8x7b-32kseqlen
1145
- tokenizer_name: mistralai/Mistral-7B-v0.1
1146
- max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1612
+ # Allen Institute for AI
1613
+ - name: together/olmo-7b
1614
+ model_name: allenai/olmo-7b
1615
+ tokenizer_name: allenai/olmo-7b
1616
+ max_sequence_length: 2047
1147
1617
  client_spec:
1148
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1149
- args: {}
1618
+ class_name: "helm.clients.together_client.TogetherClient"
1150
1619
 
1151
- ## MosaicML
1152
- - name: together/mpt-7b
1153
- deprecated: true # Not available on Together yet
1154
- model_name: mosaicml/mpt-7b
1155
- tokenizer_name: EleutherAI/gpt-neox-20b
1156
- max_sequence_length: 2048
1157
- max_request_length: 2049
1620
+ - name: together/olmo-7b-twin-2t
1621
+ model_name: allenai/olmo-7b-twin-2t
1622
+ tokenizer_name: allenai/olmo-7b
1623
+ max_sequence_length: 2047
1158
1624
  client_spec:
1159
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1160
- args: {}
1625
+ class_name: "helm.clients.together_client.TogetherClient"
1161
1626
 
1162
- - name: together/mpt-instruct-7b
1163
- deprecated: true # Not available on Together yet
1164
- model_name: mosaicml/mpt-instruct-7b
1165
- tokenizer_name: EleutherAI/gpt-neox-20b
1166
- max_sequence_length: 2048
1167
- max_request_length: 2049
1627
+ - name: together/olmo-7b-instruct
1628
+ model_name: allenai/olmo-7b-instruct
1629
+ tokenizer_name: allenai/olmo-7b
1630
+ max_sequence_length: 2047
1168
1631
  client_spec:
1169
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1170
- args: {}
1632
+ class_name: "helm.clients.together_client.TogetherClient"
1171
1633
 
1172
- - name: together/mpt-30b
1173
- model_name: mosaicml/mpt-30b
1174
- tokenizer_name: EleutherAI/gpt-neox-20b
1175
- max_sequence_length: 2048
1176
- max_request_length: 2049
1634
+
1635
+ ## MistralAI
1636
+ - name: together/mistral-7b-v0.1
1637
+ model_name: mistralai/mistral-7b-v0.1
1638
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1639
+ max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1177
1640
  client_spec:
1178
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1179
- args: {}
1641
+ class_name: "helm.clients.together_client.TogetherClient"
1642
+ args:
1643
+ together_model: mistralai/Mistral-7B-v0.1
1180
1644
 
1181
- - name: together/mpt-instruct-30b
1182
- model_name: mosaicml/mpt-instruct-30b
1183
- tokenizer_name: EleutherAI/gpt-neox-20b
1184
- max_sequence_length: 2048
1185
- max_request_length: 2049
1645
+ - name: together/mixtral-8x7b-32kseqlen
1646
+ model_name: mistralai/mixtral-8x7b-32kseqlen
1647
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1648
+ max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1186
1649
  client_spec:
1187
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1188
- args: {}
1650
+ class_name: "helm.clients.together_client.TogetherClient"
1651
+ args:
1652
+ together_model: mistralai/mixtral-8x7b-32kseqlen
1189
1653
 
1190
- ## StabilityAI
1191
- - name: together/stablelm-base-alpha-3b
1192
- deprecated: true # Removed from together
1193
- model_name: stabilityai/stablelm-base-alpha-3b
1194
- tokenizer_name: EleutherAI/gpt-neox-20b
1195
- max_sequence_length: 4096
1196
- max_request_length: 4097
1654
+ - name: together/mixtral-8x7b-instruct-v0.1
1655
+ model_name: mistralai/mixtral-8x7b-instruct-v0.1
1656
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1657
+ max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1197
1658
  client_spec:
1198
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1199
- args: {}
1659
+ class_name: "helm.clients.together_client.TogetherClient"
1200
1660
 
1201
- - name: together/stablelm-base-alpha-7b
1202
- deprecated: true # Removed from together
1203
- model_name: stabilityai/stablelm-base-alpha-7b
1204
- tokenizer_name: EleutherAI/gpt-neox-20b
1205
- max_sequence_length: 4096
1206
- max_request_length: 4097
1661
+ - name: together/mixtral-8x22b
1662
+ model_name: mistralai/mixtral-8x22b
1663
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1664
+ max_sequence_length: 65535
1207
1665
  client_spec:
1208
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1209
- args: {}
1666
+ class_name: "helm.clients.together_client.TogetherClient"
1210
1667
 
1211
1668
  ## Stanford
1212
1669
  - name: together/alpaca-7b
@@ -1214,41 +1671,46 @@ model_deployments:
1214
1671
  tokenizer_name: hf-internal-testing/llama-tokenizer
1215
1672
  max_sequence_length: 2048
1216
1673
  client_spec:
1217
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1218
- args: {}
1674
+ class_name: "helm.clients.together_client.TogetherClient"
1675
+ args:
1676
+ together_model: togethercomputer/alpaca-7b
1219
1677
 
1220
1678
  ## Tiiuae
1221
1679
  - name: together/falcon-7b
1222
1680
  model_name: tiiuae/falcon-7b
1223
1681
  tokenizer_name: tiiuae/falcon-7b
1224
- max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1682
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1225
1683
  client_spec:
1226
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1227
- args: {}
1684
+ class_name: "helm.clients.together_client.TogetherClient"
1685
+ args:
1686
+ together_model: togethercomputer/falcon-7b
1228
1687
 
1229
1688
  - name: together/falcon-7b-instruct
1230
1689
  model_name: tiiuae/falcon-7b-instruct
1231
1690
  tokenizer_name: tiiuae/falcon-7b
1232
- max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1691
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1233
1692
  client_spec:
1234
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1235
- args: {}
1693
+ class_name: "helm.clients.together_client.TogetherClient"
1694
+ args:
1695
+ together_model: togethercomputer/falcon-7b-instruct
1236
1696
 
1237
1697
  - name: together/falcon-40b
1238
1698
  model_name: tiiuae/falcon-40b
1239
1699
  tokenizer_name: tiiuae/falcon-7b
1240
- max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1700
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1241
1701
  client_spec:
1242
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1243
- args: {}
1702
+ class_name: "helm.clients.together_client.TogetherClient"
1703
+ args:
1704
+ together_model: togethercomputer/falcon-40b
1244
1705
 
1245
1706
  - name: together/falcon-40b-instruct
1246
1707
  model_name: tiiuae/falcon-40b-instruct
1247
1708
  tokenizer_name: tiiuae/falcon-7b
1248
- max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1709
+ max_sequence_length: 2047 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
1249
1710
  client_spec:
1250
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1251
- args: {}
1711
+ class_name: "helm.clients.together_client.TogetherClient"
1712
+ args:
1713
+ together_model: togethercomputer/falcon-40b-instruct
1252
1714
 
1253
1715
  ## Together
1254
1716
  # These are models fine-tuned by Together (and not simply hosted by Together).
@@ -1258,8 +1720,9 @@ model_deployments:
1258
1720
  max_sequence_length: 2048
1259
1721
  max_request_length: 2049
1260
1722
  client_spec:
1261
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1262
- args: {}
1723
+ class_name: "helm.clients.together_client.TogetherClient"
1724
+ args:
1725
+ together_model: togethercomputer/GPT-JT-6B-v1
1263
1726
 
1264
1727
  - name: together/gpt-neoxt-chat-base-20b
1265
1728
  model_name: together/gpt-neoxt-chat-base-20b
@@ -1267,8 +1730,9 @@ model_deployments:
1267
1730
  max_sequence_length: 2048
1268
1731
  max_request_length: 2049
1269
1732
  client_spec:
1270
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1271
- args: {}
1733
+ class_name: "helm.clients.together_client.TogetherClient"
1734
+ args:
1735
+ together_model: togethercomputer/GPT-NeoXT-Chat-Base-20B
1272
1736
 
1273
1737
  - name: together/redpajama-incite-base-3b-v1
1274
1738
  model_name: together/redpajama-incite-base-3b-v1
@@ -1276,8 +1740,9 @@ model_deployments:
1276
1740
  max_sequence_length: 2048
1277
1741
  max_request_length: 2049
1278
1742
  client_spec:
1279
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1280
- args: {}
1743
+ class_name: "helm.clients.together_client.TogetherClient"
1744
+ args:
1745
+ together_model: togethercomputer/RedPajama-INCITE-Base-3B-v1
1281
1746
 
1282
1747
  - name: together/redpajama-incite-instruct-3b-v1
1283
1748
  model_name: together/redpajama-incite-instruct-3b-v1
@@ -1285,8 +1750,9 @@ model_deployments:
1285
1750
  max_sequence_length: 2048
1286
1751
  max_request_length: 2049
1287
1752
  client_spec:
1288
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1289
- args: {}
1753
+ class_name: "helm.clients.together_client.TogetherClient"
1754
+ args:
1755
+ together_model: togethercomputer/RedPajama-INCITE-Instruct-3B-v1
1290
1756
 
1291
1757
  - name: together/redpajama-incite-base-7b
1292
1758
  model_name: together/redpajama-incite-base-7b
@@ -1294,8 +1760,9 @@ model_deployments:
1294
1760
  max_sequence_length: 2048
1295
1761
  max_request_length: 2049
1296
1762
  client_spec:
1297
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1298
- args: {}
1763
+ class_name: "helm.clients.together_client.TogetherClient"
1764
+ args:
1765
+ together_model: togethercomputer/RedPajama-INCITE-7B-Base
1299
1766
 
1300
1767
  - name: together/redpajama-incite-instruct-7b
1301
1768
  model_name: together/redpajama-incite-instruct-7b
@@ -1303,38 +1770,42 @@ model_deployments:
1303
1770
  max_sequence_length: 2048
1304
1771
  max_request_length: 2049
1305
1772
  client_spec:
1306
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1307
- args: {}
1773
+ class_name: "helm.clients.together_client.TogetherClient"
1774
+ args:
1775
+ together_model: togethercomputer/RedPajama-INCITE-7B-Instruct
1308
1776
 
1309
1777
  ## Tsinghua
1310
1778
  - name: together/glm
1311
- deprecated: true # Not available on Together yet
1779
+ deprecated: true # Removed from Together
1312
1780
  model_name: tsinghua/glm
1313
1781
  tokenizer_name: TsinghuaKEG/ice
1314
1782
  max_sequence_length: 2048
1315
1783
  max_request_length: 2049
1316
1784
  client_spec:
1317
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1318
- args: {}
1785
+ class_name: "helm.clients.together_client.TogetherClient"
1319
1786
  window_service_spec:
1320
1787
  class_name: "helm.benchmark.window_services.ice_window_service.ICEWindowService"
1321
- args: {}
1788
+
1789
+ - name: thudm/cogview2
1790
+ model_name: thudm/cogview2
1791
+ tokenizer_name: openai/clip-vit-large-patch14
1792
+ max_sequence_length: 75
1793
+ client_spec:
1794
+ class_name: "helm.clients.image_generation.cogview2_client.CogView2Client"
1795
+ window_service_spec:
1796
+ class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
1322
1797
 
1323
1798
  ## Yandex
1324
1799
  - name: together/yalm
1325
- deprecated: true # Not available on Together yet
1800
+ deprecated: true # Removed from Together
1326
1801
  model_name: yandex/yalm
1327
1802
  tokenizer_name: Yandex/yalm
1328
1803
  max_sequence_length: 2048
1329
1804
  max_request_length: 2049
1330
1805
  client_spec:
1331
- class_name: "helm.proxy.clients.together_client.TogetherClient"
1332
- args: {}
1806
+ class_name: "helm.clients.together_client.TogetherClient"
1333
1807
  window_service_spec:
1334
1808
  class_name: "helm.benchmark.window_services.yalm_window_service.YaLMWindowService"
1335
- args: {}
1336
-
1337
-
1338
1809
 
1339
1810
  # Writer
1340
1811
  - name: writer/palmyra-base
@@ -1343,8 +1814,7 @@ model_deployments:
1343
1814
  max_sequence_length: 2048
1344
1815
  max_sequence_and_generated_tokens_length: 2048
1345
1816
  client_spec:
1346
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1347
- args: {}
1817
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1348
1818
 
1349
1819
  - name: writer/palmyra-large
1350
1820
  model_name: writer/palmyra-large
@@ -1352,8 +1822,7 @@ model_deployments:
1352
1822
  max_sequence_length: 2048
1353
1823
  max_sequence_and_generated_tokens_length: 2048
1354
1824
  client_spec:
1355
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1356
- args: {}
1825
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1357
1826
 
1358
1827
  - name: writer/palmyra-instruct-30
1359
1828
  model_name: writer/palmyra-instruct-30
@@ -1361,8 +1830,7 @@ model_deployments:
1361
1830
  max_sequence_length: 2048
1362
1831
  max_sequence_and_generated_tokens_length: 2048
1363
1832
  client_spec:
1364
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1365
- args: {}
1833
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1366
1834
 
1367
1835
  - name: writer/palmyra-e
1368
1836
  model_name: writer/palmyra-e
@@ -1370,8 +1838,7 @@ model_deployments:
1370
1838
  max_sequence_length: 2048
1371
1839
  max_sequence_and_generated_tokens_length: 2048
1372
1840
  client_spec:
1373
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1374
- args: {}
1841
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1375
1842
 
1376
1843
  - name: writer/silk-road
1377
1844
  model_name: writer/silk-road
@@ -1379,8 +1846,7 @@ model_deployments:
1379
1846
  max_sequence_length: 8192
1380
1847
  max_sequence_and_generated_tokens_length: 8192
1381
1848
  client_spec:
1382
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1383
- args: {}
1849
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1384
1850
 
1385
1851
  - name: writer/palmyra-x
1386
1852
  model_name: writer/palmyra-x
@@ -1388,8 +1854,7 @@ model_deployments:
1388
1854
  max_sequence_length: 8192
1389
1855
  max_sequence_and_generated_tokens_length: 8192
1390
1856
  client_spec:
1391
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1392
- args: {}
1857
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1393
1858
 
1394
1859
  - name: writer/palmyra-x-v2
1395
1860
  model_name: writer/palmyra-x-v2
@@ -1397,8 +1862,7 @@ model_deployments:
1397
1862
  max_sequence_length: 6000
1398
1863
  max_sequence_and_generated_tokens_length: 7024
1399
1864
  client_spec:
1400
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1401
- args: {}
1865
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1402
1866
 
1403
1867
  - name: writer/palmyra-x-v3
1404
1868
  model_name: writer/palmyra-x-v3
@@ -1406,8 +1870,7 @@ model_deployments:
1406
1870
  max_sequence_length: 6000
1407
1871
  max_sequence_and_generated_tokens_length: 7024
1408
1872
  client_spec:
1409
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1410
- args: {}
1873
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1411
1874
 
1412
1875
  - name: writer/palmyra-x-32k
1413
1876
  model_name: writer/palmyra-x-32k
@@ -1415,5 +1878,65 @@ model_deployments:
1415
1878
  max_sequence_length: 28000
1416
1879
  max_sequence_and_generated_tokens_length: 30048
1417
1880
  client_spec:
1418
- class_name: "helm.proxy.clients.palmyra_client.PalmyraClient"
1419
- args: {}
1881
+ class_name: "helm.clients.palmyra_client.PalmyraClient"
1882
+
1883
+ # Qwen
1884
+
1885
+ - name: together/qwen-7b
1886
+ model_name: qwen/qwen-7b
1887
+ tokenizer_name: qwen/qwen-7b
1888
+ max_sequence_length: 8191
1889
+ client_spec:
1890
+ class_name: "helm.clients.together_client.TogetherClient"
1891
+ args:
1892
+ together_model: togethercomputer/Qwen-7B
1893
+
1894
+ - name: together/qwen1.5-7b
1895
+ model_name: qwen/qwen1.5-7b
1896
+ tokenizer_name: qwen/qwen1.5-7b
1897
+ max_sequence_length: 32767
1898
+ client_spec:
1899
+ class_name: "helm.clients.together_client.TogetherClient"
1900
+ args:
1901
+ together_model: Qwen/Qwen1.5-7B
1902
+
1903
+ - name: together/qwen1.5-14b
1904
+ model_name: qwen/qwen1.5-14b
1905
+ tokenizer_name: qwen/qwen1.5-7b
1906
+ max_sequence_length: 32767
1907
+ client_spec:
1908
+ class_name: "helm.clients.together_client.TogetherClient"
1909
+ args:
1910
+ together_model: Qwen/Qwen1.5-14B
1911
+
1912
+ - name: together/qwen1.5-32b
1913
+ model_name: qwen/qwen1.5-32b
1914
+ tokenizer_name: qwen/qwen1.5-7b
1915
+ max_sequence_length: 32767
1916
+ client_spec:
1917
+ class_name: "helm.clients.together_client.TogetherClient"
1918
+ args:
1919
+ together_model: Qwen/Qwen1.5-32B
1920
+
1921
+ - name: together/qwen1.5-72b
1922
+ model_name: qwen/qwen1.5-72b
1923
+ tokenizer_name: qwen/qwen1.5-7b
1924
+ max_sequence_length: 4095
1925
+ client_spec:
1926
+ class_name: "helm.clients.together_client.TogetherClient"
1927
+ args:
1928
+ together_model: Qwen/Qwen1.5-72B
1929
+
1930
+ - name: huggingface/qwen-vl
1931
+ model_name: qwen/qwen-vl
1932
+ tokenizer_name: qwen/qwen-vl
1933
+ max_sequence_length: 8191
1934
+ client_spec:
1935
+ class_name: "helm.clients.vision_language.qwen_vlm_client.QwenVLMClient"
1936
+
1937
+ - name: huggingface/qwen-vl-chat
1938
+ model_name: qwen/qwen-vl-chat
1939
+ tokenizer_name: qwen/qwen-vl-chat
1940
+ max_sequence_length: 8191
1941
+ client_spec:
1942
+ class_name: "helm.clients.vision_language.qwen_vlm_client.QwenVLMClient"