crfm-helm 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (236) hide show
  1. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/METADATA +41 -57
  2. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/RECORD +197 -152
  3. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +32 -31
  5. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
  6. helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
  7. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
  8. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
  9. helm/benchmark/adaptation/common_adapter_specs.py +2 -0
  10. helm/benchmark/annotation/air_bench_annotator.py +64 -0
  11. helm/benchmark/annotation/annotator_factory.py +6 -0
  12. helm/benchmark/annotation/anthropic_red_team_annotator.py +70 -0
  13. helm/benchmark/annotation/call_center_annotator.py +247 -0
  14. helm/benchmark/annotation/financebench_annotator.py +79 -0
  15. helm/benchmark/annotation/harm_bench_annotator.py +68 -0
  16. helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
  17. helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
  18. helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
  19. helm/benchmark/annotation/live_qa_annotator.py +71 -0
  20. helm/benchmark/annotation/medication_qa_annotator.py +68 -0
  21. helm/benchmark/annotation/model_as_judge.py +45 -0
  22. helm/benchmark/annotation/simple_safety_tests_annotator.py +64 -0
  23. helm/benchmark/annotation/xstest_annotator.py +110 -0
  24. helm/benchmark/augmentations/translate_perturbation.py +1 -0
  25. helm/benchmark/huggingface_registration.py +16 -6
  26. helm/benchmark/metrics/air_bench_metrics.py +56 -0
  27. helm/benchmark/metrics/annotation_metrics.py +108 -0
  28. helm/benchmark/metrics/bhasa_metrics.py +188 -0
  29. helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
  30. helm/benchmark/metrics/code_metrics_helper.py +11 -1
  31. helm/benchmark/metrics/fin_qa_metrics.py +60 -0
  32. helm/benchmark/metrics/fin_qa_metrics_helper.py +398 -0
  33. helm/benchmark/metrics/gpt4v_originality_critique_metrics.py +126 -0
  34. helm/benchmark/metrics/instruction_following_critique_metrics.py +1 -0
  35. helm/benchmark/metrics/live_qa_metrics.py +23 -0
  36. helm/benchmark/metrics/medication_qa_metrics.py +23 -0
  37. helm/benchmark/metrics/prometheus_vision_critique_metrics.py +185 -0
  38. helm/benchmark/metrics/reka_vibe_critique_metrics.py +158 -0
  39. helm/benchmark/metrics/safety_metrics.py +57 -0
  40. helm/benchmark/metrics/summac/model_summac.py +3 -3
  41. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
  42. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
  43. helm/benchmark/metrics/unitxt_metrics.py +20 -10
  44. helm/benchmark/metrics/vision_language/emd_utils.py +4 -0
  45. helm/benchmark/metrics/vision_language/image_metrics.py +30 -72
  46. helm/benchmark/metrics/vision_language/image_utils.py +1 -1
  47. helm/benchmark/model_metadata_registry.py +3 -3
  48. helm/benchmark/presentation/schema.py +54 -4
  49. helm/benchmark/presentation/test_run_entry.py +1 -0
  50. helm/benchmark/presentation/test_schema.py +11 -0
  51. helm/benchmark/run.py +31 -2
  52. helm/benchmark/run_expander.py +113 -10
  53. helm/benchmark/run_spec_factory.py +4 -0
  54. helm/benchmark/run_specs/air_bench_run_specs.py +40 -0
  55. helm/benchmark/run_specs/bhasa_run_specs.py +638 -0
  56. helm/benchmark/run_specs/call_center_run_specs.py +152 -0
  57. helm/benchmark/run_specs/classic_run_specs.py +15 -11
  58. helm/benchmark/run_specs/decodingtrust_run_specs.py +11 -9
  59. helm/benchmark/run_specs/experimental_run_specs.py +85 -0
  60. helm/benchmark/run_specs/finance_run_specs.py +110 -0
  61. helm/benchmark/run_specs/safety_run_specs.py +154 -0
  62. helm/benchmark/run_specs/vlm_run_specs.py +251 -57
  63. helm/benchmark/scenarios/air_bench_scenario.py +50 -0
  64. helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
  65. helm/benchmark/scenarios/banking77_scenario.py +51 -0
  66. helm/benchmark/scenarios/bhasa_scenario.py +1798 -0
  67. helm/benchmark/scenarios/call_center_scenario.py +84 -0
  68. helm/benchmark/scenarios/ci_mcqa_scenario.py +80 -0
  69. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
  70. helm/benchmark/scenarios/entity_data_imputation_scenario.py +8 -2
  71. helm/benchmark/scenarios/ewok_scenario.py +116 -0
  72. helm/benchmark/scenarios/fin_qa_scenario.py +119 -0
  73. helm/benchmark/scenarios/financebench_scenario.py +53 -0
  74. helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
  75. helm/benchmark/scenarios/scenario.py +1 -1
  76. helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
  77. helm/benchmark/scenarios/test_air_bench_scenario.py +27 -0
  78. helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
  79. helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
  80. helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
  81. helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
  82. helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
  83. helm/benchmark/scenarios/test_math_scenario.py +2 -8
  84. helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
  85. helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
  86. helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
  87. helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
  88. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
  89. helm/benchmark/scenarios/vision_language/bingo_scenario.py +5 -5
  90. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
  91. helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
  92. helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
  93. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
  94. helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
  95. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
  96. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
  97. helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +13 -2
  98. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -7
  99. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -5
  100. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
  101. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
  102. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
  103. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +44 -13
  104. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
  105. helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
  106. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
  107. helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
  108. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
  109. helm/benchmark/scenarios/vision_language/pairs_scenario.py +7 -6
  110. helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
  111. helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
  112. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
  113. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +5 -5
  114. helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +98 -0
  115. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
  116. helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
  117. helm/benchmark/scenarios/xstest_scenario.py +35 -0
  118. helm/benchmark/server.py +1 -6
  119. helm/benchmark/static/schema_air_bench.yaml +3149 -0
  120. helm/benchmark/static/schema_bhasa.yaml +709 -0
  121. helm/benchmark/static/schema_call_center.yaml +232 -0
  122. helm/benchmark/static/schema_classic.yaml +3 -59
  123. helm/benchmark/static/schema_cleva.yaml +768 -0
  124. helm/benchmark/static/schema_decodingtrust.yaml +444 -0
  125. helm/benchmark/static/schema_ewok.yaml +367 -0
  126. helm/benchmark/static/schema_finance.yaml +189 -0
  127. helm/benchmark/static/schema_image2struct.yaml +588 -0
  128. helm/benchmark/static/schema_instruction_following.yaml +3 -52
  129. helm/benchmark/static/schema_lite.yaml +3 -61
  130. helm/benchmark/static/schema_medical.yaml +255 -0
  131. helm/benchmark/static/schema_mmlu.yaml +3 -61
  132. helm/benchmark/static/schema_safety.yaml +247 -0
  133. helm/benchmark/static/schema_tables.yaml +317 -0
  134. helm/benchmark/static/schema_thai.yaml +244 -0
  135. helm/benchmark/static/schema_unitxt.yaml +3 -61
  136. helm/benchmark/static/{schema_vlm.yaml → schema_vhelm.yaml} +304 -298
  137. helm/benchmark/static/schema_vhelm_lite.yaml +4 -59
  138. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  139. helm/benchmark/static_build/assets/air-overview-d2e6c49f.png +0 -0
  140. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  141. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  142. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  143. helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
  144. helm/benchmark/static_build/assets/index-58f97dcd.js +10 -0
  145. helm/benchmark/static_build/assets/overview-74aea3d8.png +0 -0
  146. helm/benchmark/static_build/assets/process-flow-bd2eba96.png +0 -0
  147. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  148. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  149. helm/benchmark/static_build/index.html +2 -2
  150. helm/benchmark/window_services/test_openai_window_service.py +8 -8
  151. helm/clients/ai21_client.py +71 -1
  152. helm/clients/anthropic_client.py +50 -28
  153. helm/clients/auto_client.py +11 -0
  154. helm/clients/client.py +24 -7
  155. helm/clients/cohere_client.py +98 -3
  156. helm/clients/huggingface_client.py +79 -19
  157. helm/clients/nvidia_nim_client.py +35 -0
  158. helm/clients/openai_client.py +11 -5
  159. helm/clients/palmyra_client.py +25 -0
  160. helm/clients/perspective_api_client.py +11 -6
  161. helm/clients/reka_client.py +189 -0
  162. helm/clients/test_client.py +7 -9
  163. helm/clients/test_huggingface_client.py +19 -3
  164. helm/clients/test_together_client.py +72 -2
  165. helm/clients/together_client.py +129 -23
  166. helm/clients/vertexai_client.py +62 -18
  167. helm/clients/vision_language/huggingface_vlm_client.py +1 -0
  168. helm/clients/vision_language/open_flamingo_client.py +1 -2
  169. helm/clients/vision_language/paligemma_client.py +146 -0
  170. helm/clients/vision_language/palmyra_vision_client.py +99 -0
  171. helm/clients/yi_client.py +31 -0
  172. helm/common/critique_request.py +10 -1
  173. helm/common/images_utils.py +25 -0
  174. helm/common/mongo_key_value_store.py +2 -1
  175. helm/common/request.py +16 -0
  176. helm/config/model_deployments.yaml +740 -363
  177. helm/config/model_metadata.yaml +824 -128
  178. helm/config/tokenizer_configs.yaml +207 -10
  179. helm/proxy/critique/model_critique_client.py +32 -4
  180. helm/proxy/example_queries.py +14 -21
  181. helm/proxy/services/server_service.py +2 -3
  182. helm/proxy/token_counters/test_auto_token_counter.py +2 -2
  183. helm/tokenizers/ai21_tokenizer.py +51 -59
  184. helm/tokenizers/auto_tokenizer.py +1 -1
  185. helm/tokenizers/cohere_tokenizer.py +29 -62
  186. helm/tokenizers/huggingface_tokenizer.py +35 -13
  187. helm/tokenizers/test_ai21_tokenizer.py +48 -0
  188. helm/tokenizers/test_cohere_tokenizer.py +39 -0
  189. helm/tokenizers/test_huggingface_tokenizer.py +5 -1
  190. helm/benchmark/static/benchmarking.css +0 -156
  191. helm/benchmark/static/benchmarking.js +0 -1705
  192. helm/benchmark/static/config.js +0 -3
  193. helm/benchmark/static/general.js +0 -122
  194. helm/benchmark/static/images/crfm-logo.png +0 -0
  195. helm/benchmark/static/images/helm-logo-simple.png +0 -0
  196. helm/benchmark/static/images/helm-logo.png +0 -0
  197. helm/benchmark/static/images/language-model-helm.png +0 -0
  198. helm/benchmark/static/images/organizations/ai21.png +0 -0
  199. helm/benchmark/static/images/organizations/anthropic.png +0 -0
  200. helm/benchmark/static/images/organizations/bigscience.png +0 -0
  201. helm/benchmark/static/images/organizations/cohere.png +0 -0
  202. helm/benchmark/static/images/organizations/eleutherai.png +0 -0
  203. helm/benchmark/static/images/organizations/google.png +0 -0
  204. helm/benchmark/static/images/organizations/meta.png +0 -0
  205. helm/benchmark/static/images/organizations/microsoft.png +0 -0
  206. helm/benchmark/static/images/organizations/nvidia.png +0 -0
  207. helm/benchmark/static/images/organizations/openai.png +0 -0
  208. helm/benchmark/static/images/organizations/together.png +0 -0
  209. helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
  210. helm/benchmark/static/images/organizations/yandex.png +0 -0
  211. helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
  212. helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
  213. helm/benchmark/static/index.html +0 -68
  214. helm/benchmark/static/info-icon.png +0 -0
  215. helm/benchmark/static/json-urls.js +0 -69
  216. helm/benchmark/static/plot-captions.js +0 -27
  217. helm/benchmark/static/schema_image2structure.yaml +0 -304
  218. helm/benchmark/static/utils.js +0 -285
  219. helm/benchmark/static_build/assets/index-737eef9e.js +0 -10
  220. helm/benchmark/static_build/assets/index-878a1094.css +0 -1
  221. helm/benchmark/window_services/ai21_window_service.py +0 -247
  222. helm/benchmark/window_services/cohere_window_service.py +0 -101
  223. helm/benchmark/window_services/test_ai21_window_service.py +0 -163
  224. helm/benchmark/window_services/test_cohere_window_service.py +0 -75
  225. helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
  226. helm/benchmark/window_services/test_ice_window_service.py +0 -327
  227. helm/tokenizers/ice_tokenizer.py +0 -30
  228. helm/tokenizers/test_ice_tokenizer.py +0 -57
  229. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/LICENSE +0 -0
  230. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/entry_points.txt +0 -0
  231. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/top_level.txt +0 -0
  232. /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
  233. /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
  234. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/__init__.py +0 -0
  235. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/__init__.py +0 -0
  236. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
@@ -28,74 +28,47 @@ model_deployments:
28
28
 
29
29
  # AI21 Labs
30
30
 
31
- # J1 models are Deprecated by AI21 Labs
32
- # API returns: Detail: Jurassic J1 models are deprecated
33
- - name: ai21/j1-jumbo
34
- deprecated: true
35
- model_name: ai21/j1-jumbo
36
- tokenizer_name: ai21/j1
37
- max_sequence_length: 2047
38
- client_spec:
39
- class_name: "helm.clients.ai21_client.AI21Client"
40
- window_service_spec:
41
- class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
42
-
43
- - name: ai21/j1-large
44
- deprecated: true
45
- model_name: ai21/j1-large
46
- tokenizer_name: ai21/j1
47
- max_sequence_length: 2047
48
- client_spec:
49
- class_name: "helm.clients.ai21_client.AI21Client"
50
- window_service_spec:
51
- class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
52
-
53
- - name: ai21/j1-grande
54
- deprecated: true
55
- model_name: ai21/j1-grande
56
- tokenizer_name: ai21/j1
31
+ - name: ai21/j2-large
32
+ model_name: ai21/j2-large
33
+ tokenizer_name: ai21/j2-tokenizer
57
34
  max_sequence_length: 2047
58
35
  client_spec:
59
36
  class_name: "helm.clients.ai21_client.AI21Client"
60
- window_service_spec:
61
- class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
62
37
 
63
- - name: ai21/j1-grande-v2-beta
64
- deprecated: true
65
- model_name: ai21/j1-grande-v2-beta
66
- tokenizer_name: ai21/j1
38
+ - name: ai21/j2-grande
39
+ model_name: ai21/j2-grande
40
+ tokenizer_name: ai21/j2-tokenizer
67
41
  max_sequence_length: 2047
68
42
  client_spec:
69
43
  class_name: "helm.clients.ai21_client.AI21Client"
70
- window_service_spec:
71
- class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
72
44
 
73
45
  - name: ai21/j2-jumbo
74
46
  model_name: ai21/j2-jumbo
75
- tokenizer_name: ai21/j1
47
+ tokenizer_name: ai21/j2-tokenizer
76
48
  max_sequence_length: 6000
77
49
  client_spec:
78
50
  class_name: "helm.clients.ai21_client.AI21Client"
79
- window_service_spec:
80
- class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
81
51
 
82
- - name: ai21/j2-large
83
- model_name: ai21/j2-large
84
- tokenizer_name: ai21/j1
85
- max_sequence_length: 2047
52
+ - name: ai21/jamba-instruct
53
+ model_name: ai21/jamba-instruct
54
+ tokenizer_name: ai21/jamba-instruct-tokenizer
55
+ max_sequence_length: 256000
86
56
  client_spec:
87
- class_name: "helm.clients.ai21_client.AI21Client"
88
- window_service_spec:
89
- class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
57
+ class_name: "helm.clients.ai21_client.AI21ChatClient"
90
58
 
91
- - name: ai21/j2-grande
92
- model_name: ai21/j2-grande
93
- tokenizer_name: ai21/j1
94
- max_sequence_length: 2047
59
+ - name: ai21/jamba-1.5-mini
60
+ model_name: ai21/jamba-1.5-mini
61
+ tokenizer_name: ai21/jamba-1.5-mini-tokenizer
62
+ max_sequence_length: 256000
95
63
  client_spec:
96
- class_name: "helm.clients.ai21_client.AI21Client"
97
- window_service_spec:
98
- class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
64
+ class_name: "helm.clients.ai21_client.AI21ChatClient"
65
+
66
+ - name: ai21/jamba-1.5-large
67
+ model_name: ai21/jamba-1.5-large
68
+ tokenizer_name: ai21/jamba-1.5-large-tokenizer
69
+ max_sequence_length: 256000
70
+ client_spec:
71
+ class_name: "helm.clients.ai21_client.AI21ChatClient"
99
72
 
100
73
  # Aleph Alpha
101
74
  - name: AlephAlpha/luminous-base
@@ -216,6 +189,13 @@ model_deployments:
216
189
  client_spec:
217
190
  class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
218
191
 
192
+ - name: anthropic/claude-3-5-sonnet-20240620
193
+ model_name: anthropic/claude-3-5-sonnet-20240620
194
+ tokenizer_name: anthropic/claude
195
+ max_sequence_length: 200000
196
+ client_spec:
197
+ class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
198
+
219
199
  - name: anthropic/stanford-online-all-v4-s3
220
200
  deprecated: true # Closed model, not accessible via API
221
201
  model_name: anthropic/stanford-online-all-v4-s3
@@ -225,105 +205,40 @@ model_deployments:
225
205
  class_name: "helm.clients.anthropic_client.AnthropicLegacyClient"
226
206
 
227
207
  # Cohere
228
- - name: cohere/xlarge-20220609
229
- model_name: cohere/xlarge-20220609
230
- tokenizer_name: cohere/cohere
231
- max_sequence_length: 2047
232
- max_request_length: 2048
233
- client_spec:
234
- class_name: "helm.clients.cohere_client.CohereClient"
235
- window_service_spec:
236
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
237
-
238
- - name: cohere/large-20220720
239
- model_name: cohere/large-20220720
240
- tokenizer_name: cohere/cohere
241
- max_sequence_length: 2047
242
- max_request_length: 2048
243
- client_spec:
244
- class_name: "helm.clients.cohere_client.CohereClient"
245
- window_service_spec:
246
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
247
-
248
- - name: cohere/medium-20220720
249
- model_name: cohere/medium-20220720
250
- tokenizer_name: cohere/cohere
251
- max_sequence_length: 2047
252
- max_request_length: 2048
253
- client_spec:
254
- class_name: "helm.clients.cohere_client.CohereClient"
255
- window_service_spec:
256
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
257
-
258
- - name: cohere/small-20220720
259
- model_name: cohere/small-20220720
260
- tokenizer_name: cohere/cohere
261
- max_sequence_length: 2047
262
- max_request_length: 2048
263
- client_spec:
264
- class_name: "helm.clients.cohere_client.CohereClient"
265
- window_service_spec:
266
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
267
-
268
- - name: cohere/xlarge-20221108
269
- model_name: cohere/xlarge-20221108
270
- tokenizer_name: cohere/cohere
271
- max_sequence_length: 2047
272
- max_request_length: 2048
273
- client_spec:
274
- class_name: "helm.clients.cohere_client.CohereClient"
275
- window_service_spec:
276
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
277
-
278
- - name: cohere/medium-20221108
279
- model_name: cohere/medium-20221108
280
- tokenizer_name: cohere/cohere
281
- max_sequence_length: 2047
282
- max_request_length: 2048
283
- client_spec:
284
- class_name: "helm.clients.cohere_client.CohereClient"
285
- window_service_spec:
286
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
287
-
288
- - name: cohere/command-medium-beta
289
- model_name: cohere/command-medium-beta
290
- tokenizer_name: cohere/cohere
291
- max_sequence_length: 2019
292
- max_request_length: 2020
293
- client_spec:
294
- class_name: "helm.clients.cohere_client.CohereClient"
295
- window_service_spec:
296
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
297
-
298
- - name: cohere/command-xlarge-beta
299
- model_name: cohere/command-xlarge-beta
300
- tokenizer_name: cohere/cohere
301
- max_sequence_length: 2019
302
- max_request_length: 2020
303
- client_spec:
304
- class_name: "helm.clients.cohere_client.CohereClient"
305
- window_service_spec:
306
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
307
-
308
208
  - name: cohere/command
309
209
  model_name: cohere/command
310
- tokenizer_name: cohere/cohere
210
+ tokenizer_name: cohere/command
311
211
  max_sequence_length: 2019 # TODO: verify this
312
212
  max_request_length: 2020 # TODO: verify this
313
213
  client_spec:
314
214
  class_name: "helm.clients.cohere_client.CohereClient"
315
- window_service_spec:
316
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
317
215
 
318
216
  - name: cohere/command-light
319
217
  model_name: cohere/command-light
320
- tokenizer_name: cohere/cohere
218
+ tokenizer_name: cohere/command-light
321
219
  max_sequence_length: 2019 # TODO: verify this
322
220
  max_request_length: 2020 # TODO: verify this
323
221
  client_spec:
324
222
  class_name: "helm.clients.cohere_client.CohereClient"
325
- window_service_spec:
326
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
223
+
224
+ - name: cohere/command-r
225
+ model_name: cohere/command-r
226
+ tokenizer_name: cohere/command-r
227
+ max_sequence_length: 128000
228
+ max_request_length: 128000
229
+ client_spec:
230
+ class_name: "helm.clients.cohere_client.CohereChatClient"
231
+
232
+ - name: cohere/command-r-plus
233
+ model_name: cohere/command-r-plus
234
+ tokenizer_name: cohere/command-r-plus
235
+ # "We have a known issue where prompts between 112K - 128K in length
236
+ # result in bad generations."
237
+ # Source: https://docs.cohere.com/docs/command-r-plus
238
+ max_sequence_length: 110000
239
+ max_request_length: 110000
240
+ client_spec:
241
+ class_name: "helm.clients.cohere_client.CohereChatClient"
327
242
 
328
243
  # Craiyon
329
244
 
@@ -352,7 +267,7 @@ model_deployments:
352
267
  tokenizer_name: databricks/dbrx-instruct
353
268
  max_sequence_length: 32767
354
269
  client_spec:
355
- class_name: "helm.clients.together_client.TogetherClient"
270
+ class_name: "helm.clients.together_client.TogetherChatClient"
356
271
 
357
272
  # DeepFloyd
358
273
 
@@ -390,7 +305,7 @@ model_deployments:
390
305
  tokenizer_name: deepseek-ai/deepseek-llm-67b-chat
391
306
  max_sequence_length: 4095
392
307
  client_spec:
393
- class_name: "helm.clients.together_client.TogetherClient"
308
+ class_name: "helm.clients.together_client.TogetherChatClient"
394
309
 
395
310
  # Gooseai
396
311
 
@@ -434,6 +349,14 @@ model_deployments:
434
349
  client_spec:
435
350
  class_name: "helm.clients.vertexai_client.VertexAIChatClient"
436
351
 
352
+ - name: google/gemini-1.0-pro-002
353
+ model_name: google/gemini-1.0-pro-002
354
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
355
+ max_sequence_length: 30720
356
+ max_sequence_and_generated_tokens_length: 32768 # Officially max_sequence_length + 2048
357
+ client_spec:
358
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
359
+
437
360
  - name: google/gemini-pro-vision
438
361
  model_name: google/gemini-pro-vision
439
362
  tokenizer_name: openai/cl100k_base
@@ -450,6 +373,22 @@ model_deployments:
450
373
  client_spec:
451
374
  class_name: "helm.clients.vertexai_client.VertexAIChatClient"
452
375
 
376
+ - name: google/gemini-1.5-flash-001
377
+ model_name: google/gemini-1.5-flash-001
378
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
379
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
380
+ # TODO: Max output tokens: 8192
381
+ client_spec:
382
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
383
+
384
+ - name: google/gemini-1.5-pro-001
385
+ model_name: google/gemini-1.5-pro-001
386
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
387
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
388
+ # TODO: Max output tokens: 8192
389
+ client_spec:
390
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
391
+
453
392
  - name: google/gemini-1.5-pro-preview-0409
454
393
  model_name: google/gemini-1.5-pro-preview-0409
455
394
  tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
@@ -458,6 +397,63 @@ model_deployments:
458
397
  client_spec:
459
398
  class_name: "helm.clients.vertexai_client.VertexAIChatClient"
460
399
 
400
+ - name: google/gemini-1.5-pro-preview-0514
401
+ model_name: google/gemini-1.5-pro-preview-0514
402
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
403
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
404
+ # TODO: Max output tokens: 8192
405
+ client_spec:
406
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
407
+
408
+ - name: google/gemini-1.5-flash-preview-0514
409
+ model_name: google/gemini-1.5-flash-preview-0514
410
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
411
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
412
+ # TODO: Max output tokens: 8192
413
+ client_spec:
414
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
415
+
416
+ ## Gemini with different safety settings
417
+ - name: google/gemini-1.5-pro-001-safety-default
418
+ model_name: google/gemini-1.5-pro-001-safety-default
419
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
420
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
421
+ # TODO: Max output tokens: 8192
422
+ client_spec:
423
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
424
+ args:
425
+ safety_settings_preset: default
426
+
427
+ - name: google/gemini-1.5-pro-001-safety-block-none
428
+ model_name: google/gemini-1.5-pro-001-safety-block-none
429
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
430
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
431
+ # TODO: Max output tokens: 8192
432
+ client_spec:
433
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
434
+ args:
435
+ safety_settings_preset: block_none
436
+
437
+ - name: google/gemini-1.5-flash-001-safety-default
438
+ model_name: google/gemini-1.5-flash-001-safety-default
439
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
440
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
441
+ # TODO: Max output tokens: 8192
442
+ client_spec:
443
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
444
+ args:
445
+ safety_settings_preset: default
446
+
447
+ - name: google/gemini-1.5-flash-001-safety-block-none
448
+ model_name: google/gemini-1.5-flash-001-safety-block-none
449
+ tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
450
+ max_sequence_length: 1000000 # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models
451
+ # TODO: Max output tokens: 8192
452
+ client_spec:
453
+ class_name: "helm.clients.vertexai_client.VertexAIChatClient"
454
+ args:
455
+ safety_settings_preset: block_none
456
+
461
457
  ## Gemma
462
458
  - name: together/gemma-2b
463
459
  model_name: google/gemma-2b
@@ -487,6 +483,56 @@ model_deployments:
487
483
  client_spec:
488
484
  class_name: "helm.clients.together_client.TogetherClient"
489
485
 
486
+ - name: together/gemma-2-9b-it
487
+ model_name: google/gemma-2-9b-it
488
+ tokenizer_name: google/gemma-2-9b
489
+ max_sequence_length: 8191
490
+ client_spec:
491
+ class_name: "helm.clients.together_client.TogetherClient"
492
+
493
+ - name: together/gemma-2-27b-it
494
+ model_name: google/gemma-2-27b-it
495
+ tokenizer_name: google/gemma-2-9b
496
+ max_sequence_length: 8191
497
+ client_spec:
498
+ class_name: "helm.clients.together_client.TogetherClient"
499
+
500
+ ## MedLM
501
+ - name: google/medlm-medium
502
+ model_name: google/medlm-medium
503
+ tokenizer_name: google/text-bison@001
504
+ max_sequence_length: 6000 # Officially 8192
505
+ max_sequence_and_generated_tokens_length: 7000 # Officially 9216
506
+ client_spec:
507
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
508
+ window_service_spec:
509
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
510
+
511
+ - name: google/medlm-large
512
+ model_name: google/medlm-large
513
+ tokenizer_name: google/text-bison@001
514
+ max_sequence_length: 6000 # Officially 8192
515
+ max_sequence_and_generated_tokens_length: 7000 # Officially 9216
516
+ client_spec:
517
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
518
+ window_service_spec:
519
+ class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
520
+
521
+ ## PaliGemma
522
+ - name: google/paligemma-3b-mix-224
523
+ model_name: google/paligemma-3b-mix-224
524
+ tokenizer_name: google/gemma-2b
525
+ max_sequence_length: 7167
526
+ client_spec:
527
+ class_name: "helm.clients.vision_language.paligemma_client.PaliGemmaClient"
528
+
529
+ - name: google/paligemma-3b-mix-448
530
+ model_name: google/paligemma-3b-mix-448
531
+ tokenizer_name: google/gemma-2b
532
+ max_sequence_length: 7167
533
+ client_spec:
534
+ class_name: "helm.clients.vision_language.paligemma_client.PaliGemmaClient"
535
+
490
536
  ## PaLM 2
491
537
  - name: google/text-bison@001
492
538
  model_name: google/text-bison@001
@@ -504,7 +550,7 @@ model_deployments:
504
550
  max_sequence_length: 6000 # Officially 8192
505
551
  max_sequence_and_generated_tokens_length: 9216
506
552
  client_spec:
507
- class_name: "helm.proxy.clients.vertexai_client.VertexAITextClient"
553
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
508
554
  window_service_spec:
509
555
  class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
510
556
 
@@ -545,7 +591,7 @@ model_deployments:
545
591
  max_sequence_length: 6000 # Officially 6144
546
592
  max_sequence_and_generated_tokens_length: 7168
547
593
  client_spec:
548
- class_name: "helm.proxy.clients.vertexai_client.VertexAITextClient"
594
+ class_name: "helm.clients.vertexai_client.VertexAITextClient"
549
595
  window_service_spec:
550
596
  class_name: "helm.benchmark.window_services.no_decoding_window_service.NoDecodingWindowService"
551
597
 
@@ -561,6 +607,25 @@ model_deployments:
561
607
 
562
608
  # HuggingFace
563
609
 
610
+ ## AI Singapore
611
+ - name: huggingface/sea-lion-7b
612
+ model_name: aisingapore/sea-lion-7b
613
+ tokenizer_name: aisingapore/sea-lion-7b
614
+ max_sequence_length: 2048
615
+ client_spec:
616
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
617
+ args:
618
+ trust_remote_code: true
619
+
620
+ - name: huggingface/sea-lion-7b-instruct
621
+ model_name: aisingapore/sea-lion-7b-instruct
622
+ tokenizer_name: aisingapore/sea-lion-7b
623
+ max_sequence_length: 2048
624
+ client_spec:
625
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
626
+ args:
627
+ trust_remote_code: true
628
+
564
629
  ## Bigcode
565
630
  - name: huggingface/santacoder
566
631
  model_name: bigcode/santacoder
@@ -576,6 +641,15 @@ model_deployments:
576
641
  client_spec:
577
642
  class_name: "helm.clients.huggingface_client.HuggingFaceClient"
578
643
 
644
+ ## Biomistral
645
+
646
+ - name: huggingface/biomistral-7b
647
+ model_name: biomistral/biomistral-7b
648
+ tokenizer_name: mistralai/Mistral-7B-v0.1
649
+ max_sequence_length: 32000
650
+ client_spec:
651
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
652
+
579
653
  ## Databricks
580
654
  - name: huggingface/dolly-v2-3b
581
655
  model_name: databricks/dolly-v2-3b
@@ -643,6 +717,47 @@ model_deployments:
643
717
  client_spec:
644
718
  class_name: "helm.clients.huggingface_client.HuggingFaceClient"
645
719
 
720
+ ## Google
721
+ - name: huggingface/gemma-2-9b
722
+ model_name: google/gemma-2-9b
723
+ tokenizer_name: google/gemma-2-9b
724
+ max_sequence_length: 8192
725
+ client_spec:
726
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
727
+ args:
728
+ device_map: auto
729
+ torch_dtype: torch.bfloat16
730
+
731
+ - name: huggingface/gemma-2-9b-it
732
+ model_name: google/gemma-2-9b-it
733
+ tokenizer_name: google/gemma-2-9b
734
+ max_sequence_length: 8192
735
+ client_spec:
736
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
737
+ args:
738
+ device_map: auto
739
+ torch_dtype: torch.bfloat16
740
+
741
+ - name: huggingface/gemma-2-27b
742
+ model_name: google/gemma-2-27b
743
+ tokenizer_name: google/gemma-2-9b
744
+ max_sequence_length: 8192
745
+ client_spec:
746
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
747
+ args:
748
+ device_map: auto
749
+ torch_dtype: torch.bfloat16
750
+
751
+ - name: huggingface/gemma-2-27b-it
752
+ model_name: google/gemma-2-27b-it
753
+ tokenizer_name: google/gemma-2-9b
754
+ max_sequence_length: 8192
755
+ client_spec:
756
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
757
+ args:
758
+ device_map: auto
759
+ torch_dtype: torch.bfloat16
760
+
646
761
  ## LMSYS
647
762
  - name: huggingface/vicuna-7b-v1.3
648
763
  model_name: lmsys/vicuna-7b-v1.3
@@ -658,6 +773,15 @@ model_deployments:
658
773
  client_spec:
659
774
  class_name: "helm.clients.huggingface_client.HuggingFaceClient"
660
775
 
776
+ ## Meditron
777
+
778
+ - name: huggingface/meditron-7b
779
+ model_name: epfl-llm/meditron-7b
780
+ tokenizer_name: meta-llama/Llama-2-7b-hf
781
+ max_sequence_length: 4094
782
+ client_spec:
783
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
784
+
661
785
  ## Meta
662
786
  - name: huggingface/opt-175b
663
787
  model_name: meta/opt-175b
@@ -738,6 +862,14 @@ model_deployments:
738
862
  client_spec:
739
863
  class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
740
864
 
865
+ ## KAIST AI
866
+ - name: huggingface/prometheus-vision-13b-v1.0-hf
867
+ model_name: kaistai/prometheus-vision-13b-v1.0-hf
868
+ tokenizer_name: hf-internal-testing/llama-tokenizer
869
+ max_sequence_length: 2048
870
+ client_spec:
871
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
872
+
741
873
  ## OpenFlamingo
742
874
  - name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
743
875
  model_name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
@@ -750,6 +882,7 @@ model_deployments:
750
882
  tokenizer_name: "anas-awadalla-2/mpt-7b"
751
883
  cross_attn_every_n_layers: 4
752
884
 
885
+ ## Microsoft
753
886
  - name: together/phi-2
754
887
  model_name: microsoft/phi-2
755
888
  tokenizer_name: microsoft/phi-2
@@ -757,6 +890,26 @@ model_deployments:
757
890
  client_spec:
758
891
  class_name: "helm.clients.together_client.TogetherClient"
759
892
 
893
+ - name: huggingface/phi-3-small-8k-instruct
894
+ model_name: microsoft/phi-3-small-8k-instruct
895
+ tokenizer_name: microsoft/phi-3-small-8k-instruct
896
+ max_sequence_length: 8192
897
+ client_spec:
898
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
899
+ args:
900
+ torch_dtype: auto
901
+ trust_remote_code: true
902
+
903
+ - name: huggingface/phi-3-medium-4k-instruct
904
+ model_name: microsoft/phi-3-medium-4k-instruct
905
+ tokenizer_name: microsoft/phi-3-medium-4k-instruct
906
+ max_sequence_length: 4096
907
+ client_spec:
908
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
909
+ args:
910
+ device_map: auto
911
+ torch_dtype: auto
912
+
760
913
  ## Mistral AI
761
914
  - name: huggingface/bakLlava-v1-hf
762
915
  model_name: mistralai/bakLlava-v1-hf
@@ -811,6 +964,179 @@ model_deployments:
811
964
  args:
812
965
  pretrained_model_name_or_path: openai-community/gpt2
813
966
 
967
+ ## OpenThaiGPT
968
+ - name: huggingface/openthaigpt-1.0.0-7b-chat
969
+ model_name: openthaigpt/openthaigpt-1.0.0-7b-chat
970
+ tokenizer_name: openthaigpt/openthaigpt-1.0.0-7b-chat
971
+ max_sequence_length: 4096
972
+ client_spec:
973
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
974
+
975
+ - name: huggingface/openthaigpt-1.0.0-13b-chat
976
+ model_name: openthaigpt/openthaigpt-1.0.0-13b-chat
977
+ tokenizer_name: openthaigpt/openthaigpt-1.0.0-7b-chat
978
+ max_sequence_length: 4096
979
+ client_spec:
980
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
981
+ args:
982
+ device_map: auto
983
+
984
+ - name: huggingface/openthaigpt-1.0.0-70b-chat
985
+ model_name: openthaigpt/openthaigpt-1.0.0-70b-chat
986
+ tokenizer_name: huggingface/openthaigpt-1.0.0-7b-chat
987
+ max_sequence_length: 4096
988
+ client_spec:
989
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
990
+ args:
991
+ device_map: auto
992
+
993
+ ## SAIL (SEA AI Lab)
994
+ - name: huggingface/sailor-7b
995
+ model_name: sail/sailor-7b
996
+ tokenizer_name: qwen/qwen1.5-7b
997
+ max_sequence_length: 32768
998
+ client_spec:
999
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1000
+
1001
+ - name: huggingface/sailor-7b-chat
1002
+ model_name: sail/sailor-7b-chat
1003
+ tokenizer_name: qwen/qwen1.5-7b
1004
+ max_sequence_length: 32768
1005
+ client_spec:
1006
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1007
+
1008
+ - name: huggingface/sailor-14b
1009
+ model_name: sail/sailor-14b
1010
+ tokenizer_name: qwen/qwen1.5-7b
1011
+ max_sequence_length: 32768
1012
+ client_spec:
1013
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1014
+ args:
1015
+ device_map: auto
1016
+
1017
+ - name: huggingface/sailor-14b-chat
1018
+ model_name: sail/sailor-14b-chat
1019
+ tokenizer_name: qwen/qwen1.5-7b
1020
+ max_sequence_length: 32768
1021
+ client_spec:
1022
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1023
+ args:
1024
+ device_map: auto
1025
+
1026
+ # SambaNova
1027
+ - name: huggingface/sambalingo-thai-base
1028
+ model_name: sambanova/sambalingo-thai-base
1029
+ tokenizer_name: sambanova/sambalingo-thai-base
1030
+ max_sequence_length: 4096
1031
+ client_spec:
1032
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1033
+ args:
1034
+ pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base
1035
+
1036
+ - name: huggingface/sambalingo-thai-chat
1037
+ model_name: sambanova/sambalingo-thai-chat
1038
+ tokenizer_name: sambanova/sambalingo-thai-base
1039
+ max_sequence_length: 4096
1040
+ client_spec:
1041
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1042
+ args:
1043
+ pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base
1044
+
1045
+ - name: huggingface/sambalingo-thai-base-70b
1046
+ model_name: sambanova/sambalingo-thai-base-70b
1047
+ tokenizer_name: sambanova/sambalingo-thai-base
1048
+ max_sequence_length: 4096
1049
+ client_spec:
1050
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1051
+ args:
1052
+ pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base-70B
1053
+ device_map: auto
1054
+
1055
+ - name: huggingface/sambalingo-thai-chat-70b
1056
+ model_name: sambanova/sambalingo-thai-chat-70b
1057
+ tokenizer_name: sambanova/sambalingo-thai-base
1058
+ max_sequence_length: 4096
1059
+ client_spec:
1060
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1061
+ args:
1062
+ pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base-70B
1063
+ device_map: auto
1064
+
1065
+ ## SCB10X
1066
+ - name: huggingface/typhoon-7b
1067
+ model_name: scb10x/typhoon-7b
1068
+ tokenizer_name: scb10x/typhoon-7b
1069
+ max_sequence_length: 4096
1070
+ client_spec:
1071
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1072
+
1073
+ - name: huggingface/typhoon-v1.5-8b
1074
+ model_name: scb10x/typhoon-v1.5-8b
1075
+ tokenizer_name: meta/llama-3-8b
1076
+ max_sequence_length: 8192
1077
+ client_spec:
1078
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1079
+
1080
+ - name: huggingface/typhoon-v1.5-8b-instruct
1081
+ model_name: scb10x/typhoon-v1.5-8b-instruct
1082
+ tokenizer_name: meta/llama-3-8b
1083
+ max_sequence_length: 8192
1084
+ client_spec:
1085
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1086
+
1087
+ - name: huggingface/typhoon-v1.5-72b
1088
+ model_name: scb10x/typhoon-v1.5-72b
1089
+ tokenizer_name: qwen/qwen1.5-7b
1090
+ max_sequence_length: 32768
1091
+ client_spec:
1092
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1093
+ args:
1094
+ device_map: auto
1095
+
1096
+ - name: huggingface/typhoon-v1.5-72b-instruct
1097
+ model_name: scb10x/typhoon-v1.5-72b-instruct
1098
+ tokenizer_name: qwen/qwen1.5-7b
1099
+ max_sequence_length: 32768
1100
+ client_spec:
1101
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1102
+ args:
1103
+ device_map: auto
1104
+
1105
+ - name: huggingface/llama-3-typhoon-v1.5x-8b-instruct
1106
+ model_name: scb10x/llama-3-typhoon-v1.5x-8b-instruct
1107
+ tokenizer_name: meta/llama-3-8b
1108
+ max_sequence_length: 8192
1109
+ client_spec:
1110
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1111
+
1112
+ - name: huggingface/llama-3-typhoon-v1.5x-70b-instruct
1113
+ model_name: scb10x/llama-3-typhoon-v1.5x-70b-instruct
1114
+ tokenizer_name: meta/llama-3-8b
1115
+ max_sequence_length: 8192
1116
+ client_spec:
1117
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1118
+ args:
1119
+ device_map: auto
1120
+
1121
+ # Alibaba DAMO Academy
1122
+ - name: huggingface/seallm-7b-v2
1123
+ model_name: damo/seallm-7b-v2
1124
+ tokenizer_name: damo/seallm-7b-v2
1125
+ max_sequence_length: 4096
1126
+ client_spec:
1127
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1128
+ args:
1129
+ pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2
1130
+
1131
+ - name: huggingface/seallm-7b-v2.5
1132
+ model_name: damo/seallm-7b-v2.5
1133
+ tokenizer_name: damo/seallm-7b-v2.5
1134
+ max_sequence_length: 4096
1135
+ client_spec:
1136
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1137
+ args:
1138
+ pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2.5
1139
+
814
1140
  ## StabilityAI
815
1141
  - name: huggingface/stablelm-base-alpha-3b
816
1142
  model_name: stabilityai/stablelm-base-alpha-3b
@@ -1090,134 +1416,62 @@ model_deployments:
1090
1416
  client_spec:
1091
1417
  class_name: "helm.clients.mistral_client.MistralAIClient"
1092
1418
 
1093
- # Neurips
1094
- - name: neurips/local
1095
- model_name: neurips/local
1096
- tokenizer_name: neurips/local
1097
- max_sequence_length: 2048
1098
- client_spec:
1099
- class_name: "helm.clients.http_model_client.HTTPModelClient"
1100
-
1101
- # Nvidia
1102
- - name: nvidia/megatron-gpt2
1103
- model_name: nvidia/megatron-gpt2
1104
- tokenizer_name: huggingface/gpt2
1105
- max_sequence_length: 1024
1106
- client_spec:
1107
- class_name: "helm.clients.megatron_client.MegatronClient"
1108
-
1109
- # OpenAI
1110
-
1111
- ## GPT 3 Models
1112
-
1113
- - name: openai/davinci-002
1114
- model_name: openai/davinci-002
1115
- tokenizer_name: openai/cl100k_base
1116
- # Claimed sequence length is 16,384 tokens but we round down to 16,000 tokens
1117
- # to provide a margin of error.
1118
- max_sequence_length: 16000
1119
- client_spec:
1120
- class_name: "helm.clients.openai_client.OpenAIClient"
1121
-
1122
- - name: openai/babbage-002
1123
- model_name: openai/babbage-002
1124
- tokenizer_name: openai/cl100k_base
1125
- # Claimed sequence length is 16,384 tokens but we round down to 16,000 tokens
1126
- # to provide a margin of error.
1127
- max_sequence_length: 16000
1128
- client_spec:
1129
- class_name: "helm.clients.openai_client.OpenAIClient"
1130
-
1131
- # The list of models can be found here: https://beta.openai.com/docs/engines/gpt-3
1132
- # DEPRECATED: Announced on July 06 2023 that these models will be shut down on January 04 2024.
1133
-
1134
- - name: openai/davinci
1135
- deprecated: true
1136
- model_name: openai/davinci
1137
- tokenizer_name: huggingface/gpt2
1138
- max_sequence_length: 2048
1139
- max_request_length: 2049
1140
- client_spec:
1141
- class_name: "helm.clients.openai_client.OpenAIClient"
1142
-
1143
- - name: openai/curie
1144
- deprecated: true
1145
- model_name: openai/curie
1146
- tokenizer_name: huggingface/gpt2
1147
- max_sequence_length: 2048
1148
- max_request_length: 2049
1419
+ - name: mistralai/mistral-large-2407
1420
+ model_name: mistralai/mistral-large-2407
1421
+ tokenizer_name: mistralai/Mistral-Large-Instruct-2407
1422
+ max_sequence_length: 128000
1149
1423
  client_spec:
1150
- class_name: "helm.clients.openai_client.OpenAIClient"
1424
+ class_name: "helm.clients.mistral_client.MistralAIClient"
1151
1425
 
1152
- - name: openai/babbage
1153
- deprecated: true
1154
- model_name: openai/babbage
1155
- tokenizer_name: huggingface/gpt2
1156
- max_sequence_length: 2048
1157
- max_request_length: 2049
1426
+ - name: mistralai/open-mistral-nemo-2407
1427
+ model_name: mistralai/open-mistral-nemo-2407
1428
+ tokenizer_name: mistralai/Mistral-Nemo-Base-2407
1429
+ max_sequence_length: 128000
1158
1430
  client_spec:
1159
- class_name: "helm.clients.openai_client.OpenAIClient"
1431
+ class_name: "helm.clients.mistral_client.MistralAIClient"
1160
1432
 
1161
- - name: openai/ada
1162
- deprecated: true
1163
- model_name: openai/ada
1164
- tokenizer_name: huggingface/gpt2
1433
+ # Neurips
1434
+ - name: neurips/local
1435
+ model_name: neurips/local
1436
+ tokenizer_name: neurips/local
1165
1437
  max_sequence_length: 2048
1166
- max_request_length: 2049
1167
1438
  client_spec:
1168
- class_name: "helm.clients.openai_client.OpenAIClient"
1439
+ class_name: "helm.clients.http_model_client.HTTPModelClient"
1169
1440
 
1170
- - name: openai/text-davinci-003
1171
- deprecated: true
1172
- model_name: openai/text-davinci-003
1441
+ # Nvidia
1442
+ - name: nvidia/megatron-gpt2
1443
+ model_name: nvidia/megatron-gpt2
1173
1444
  tokenizer_name: huggingface/gpt2
1174
- max_sequence_length: 4000
1175
- max_request_length: 4001
1445
+ max_sequence_length: 1024
1176
1446
  client_spec:
1177
- class_name: "helm.clients.openai_client.OpenAIClient"
1447
+ class_name: "helm.clients.megatron_client.MegatronClient"
1178
1448
 
1179
- - name: openai/text-davinci-002
1180
- deprecated: true
1181
- model_name: openai/text-davinci-002
1182
- tokenizer_name: huggingface/gpt2
1183
- max_sequence_length: 4000
1184
- max_request_length: 4001
1449
+ - name: nvidia/nemotron-4-340b-instruct
1450
+ model_name: nvidia/nemotron-4-340b-instruct
1451
+ tokenizer_name: nvidia/nemotron-4-340b-instruct
1452
+ max_sequence_length: 4085
1185
1453
  client_spec:
1186
- class_name: "helm.clients.openai_client.OpenAIClient"
1454
+ class_name: "helm.clients.nvidia_nim_client.NvidiaNimClient"
1187
1455
 
1188
- - name: openai/text-davinci-001
1189
- deprecated: true
1190
- model_name: openai/text-davinci-001
1191
- tokenizer_name: huggingface/gpt2
1192
- max_sequence_length: 2048
1193
- max_request_length: 2049
1194
- client_spec:
1195
- class_name: "helm.clients.openai_client.OpenAIClient"
1456
+ # OpenAI
1196
1457
 
1197
- - name: openai/text-curie-001
1198
- deprecated: true
1199
- model_name: openai/text-curie-001
1200
- tokenizer_name: huggingface/gpt2
1201
- max_sequence_length: 2048
1202
- max_request_length: 2049
1203
- client_spec:
1204
- class_name: "helm.clients.openai_client.OpenAIClient"
1458
+ ## GPT 3 Models
1205
1459
 
1206
- - name: openai/text-babbage-001
1207
- deprecated: true
1208
- model_name: openai/text-babbage-001
1209
- tokenizer_name: huggingface/gpt2
1210
- max_sequence_length: 2048
1211
- max_request_length: 2049
1460
+ - name: openai/davinci-002
1461
+ model_name: openai/davinci-002
1462
+ tokenizer_name: openai/cl100k_base
1463
+ # Claimed sequence length is 16,384 tokens but we round down to 16,000 tokens
1464
+ # to provide a margin of error.
1465
+ max_sequence_length: 16000
1212
1466
  client_spec:
1213
1467
  class_name: "helm.clients.openai_client.OpenAIClient"
1214
1468
 
1215
- - name: openai/text-ada-001
1216
- deprecated: true
1217
- model_name: openai/text-ada-001
1218
- tokenizer_name: huggingface/gpt2
1219
- max_sequence_length: 2048
1220
- max_request_length: 2049
1469
+ - name: openai/babbage-002
1470
+ model_name: openai/babbage-002
1471
+ tokenizer_name: openai/cl100k_base
1472
+ # Claimed sequence length is 16,384 tokens but we round down to 16,000 tokens
1473
+ # to provide a margin of error.
1474
+ max_sequence_length: 16000
1221
1475
  client_spec:
1222
1476
  class_name: "helm.clients.openai_client.OpenAIClient"
1223
1477
 
@@ -1348,6 +1602,27 @@ model_deployments:
1348
1602
  client_spec:
1349
1603
  class_name: "helm.clients.openai_client.OpenAIClient"
1350
1604
 
1605
+ - name: openai/gpt-4o-2024-05-13
1606
+ model_name: openai/gpt-4o-2024-05-13
1607
+ tokenizer_name: openai/o200k_base
1608
+ max_sequence_length: 128000
1609
+ client_spec:
1610
+ class_name: "helm.clients.openai_client.OpenAIClient"
1611
+
1612
+ - name: openai/gpt-4o-2024-08-06
1613
+ model_name: openai/gpt-4o-2024-08-06
1614
+ tokenizer_name: openai/o200k_base
1615
+ max_sequence_length: 128000
1616
+ client_spec:
1617
+ class_name: "helm.clients.openai_client.OpenAIClient"
1618
+
1619
+ - name: openai/gpt-4o-mini-2024-07-18
1620
+ model_name: openai/gpt-4o-mini-2024-07-18
1621
+ tokenizer_name: openai/o200k_base
1622
+ max_sequence_length: 128000
1623
+ client_spec:
1624
+ class_name: "helm.clients.openai_client.OpenAIClient"
1625
+
1351
1626
  - name: openai/gpt-4-vision-preview
1352
1627
  model_name: openai/gpt-4-vision-preview
1353
1628
  tokenizer_name: openai/cl100k_base
@@ -1366,78 +1641,10 @@ model_deployments:
1366
1641
  client_spec:
1367
1642
  class_name: "helm.clients.openai_client.OpenAIClient"
1368
1643
 
1369
- ## Codex Models
1370
- # DEPRECATED: Codex models have been shut down on March 23 2023.
1371
-
1372
- - name: openai/code-davinci-002
1373
- deprecated: true
1374
- model_name: openai/code-davinci-002
1375
- tokenizer_name: huggingface/gpt2
1376
- max_sequence_length: 4000
1377
- max_request_length: 4001
1378
- client_spec:
1379
- class_name: "helm.clients.openai_client.OpenAIClient"
1380
-
1381
- - name: openai/code-davinci-001
1382
- deprecated: true
1383
- model_name: openai/code-davinci-001
1384
- tokenizer_name: huggingface/gpt2
1385
- max_sequence_length: 2048
1386
- max_request_length: 2049
1387
- client_spec:
1388
- class_name: "helm.clients.openai_client.OpenAIClient"
1389
-
1390
- - name: openai/code-cushman-001
1391
- deprecated: true
1392
- model_name: openai/code-cushman-001
1393
- tokenizer_name: huggingface/gpt2
1394
- max_sequence_length: 2048
1395
- max_request_length: 2049
1396
- client_spec:
1397
- class_name: "helm.clients.openai_client.OpenAIClient"
1398
-
1399
1644
  ## Text Similarity Models
1400
1645
  # OpenAI similarity embedding models: https://beta.openai.com/docs/guides/embeddings
1401
1646
  # The number of parameters is guessed based on the number of parameters of the
1402
1647
  # corresponding GPT-3 model.
1403
- # DEPRECATED: Announced on July 06 2023 that first generation embeddings models
1404
- # will be shut down on January 04 2024.
1405
-
1406
- - name: openai/text-similarity-davinci-001
1407
- deprecated: true
1408
- model_name: openai/text-similarity-davinci-001
1409
- tokenizer_name: huggingface/gpt2
1410
- max_sequence_length: 2048
1411
- max_request_length: 2049
1412
- client_spec:
1413
- class_name: "helm.clients.openai_client.OpenAIClient"
1414
-
1415
- - name: openai/text-similarity-curie-001
1416
- deprecated: true
1417
- model_name: openai/text-similarity-curie-001
1418
- tokenizer_name: huggingface/gpt2
1419
- max_sequence_length: 2048
1420
- max_request_length: 2049
1421
- client_spec:
1422
- class_name: "helm.clients.openai_client.OpenAIClient"
1423
-
1424
- - name: openai/text-similarity-babbage-001
1425
- deprecated: true
1426
- model_name: openai/text-similarity-babbage-001
1427
- tokenizer_name: huggingface/gpt2
1428
- max_sequence_length: 2048
1429
- max_request_length: 2049
1430
- client_spec:
1431
- class_name: "helm.clients.openai_client.OpenAIClient"
1432
-
1433
- - name: openai/text-similarity-ada-001
1434
- deprecated: true
1435
- model_name: openai/text-similarity-ada-001
1436
- tokenizer_name: huggingface/gpt2
1437
- max_sequence_length: 2048
1438
- max_request_length: 2049
1439
- client_spec:
1440
- class_name: "helm.clients.openai_client.OpenAIClient"
1441
1648
 
1442
1649
  # As of 2023-11-07, text-embedding-ada-002 is not deprecated:
1443
1650
  # "We recommend using text-embedding-ada-002 for nearly all use cases."
@@ -1635,23 +1842,77 @@ model_deployments:
1635
1842
  args:
1636
1843
  together_model: meta-llama/Meta-Llama-3-70B
1637
1844
 
1845
+ - name: together/llama-3.1-8b-instruct-turbo
1846
+ model_name: meta/llama-3.1-8b-instruct-turbo
1847
+ tokenizer_name: meta/llama-3.1-8b
1848
+ max_sequence_length: 128000
1849
+ client_spec:
1850
+ class_name: "helm.clients.together_client.TogetherChatClient"
1851
+ args:
1852
+ together_model: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
1853
+
1854
+ - name: together/llama-3.1-70b-instruct-turbo
1855
+ model_name: meta/llama-3.1-70b-instruct-turbo
1856
+ tokenizer_name: meta/llama-3.1-8b
1857
+ max_sequence_length: 128000
1858
+ client_spec:
1859
+ class_name: "helm.clients.together_client.TogetherChatClient"
1860
+ args:
1861
+ together_model: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
1862
+
1863
+ - name: together/llama-3.1-405b-instruct-turbo
1864
+ model_name: meta/llama-3.1-405b-instruct-turbo
1865
+ tokenizer_name: meta/llama-3.1-8b
1866
+ max_sequence_length: 128000
1867
+ client_spec:
1868
+ class_name: "helm.clients.together_client.TogetherChatClient"
1869
+ args:
1870
+ together_model: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
1871
+
1638
1872
  - name: together/llama-3-8b-chat
1639
1873
  model_name: meta/llama-3-8b-chat
1640
1874
  tokenizer_name: meta/llama-3-8b
1641
- max_sequence_length: 8191
1875
+ max_sequence_length: 8182
1642
1876
  client_spec:
1643
- class_name: "helm.clients.together_client.TogetherClient"
1877
+ class_name: "helm.clients.together_client.TogetherChatClient"
1644
1878
  args:
1645
- together_model: meta-llama/Meta-Llama-3-8B
1879
+ together_model: meta-llama/Llama-3-8b-chat-hf
1646
1880
 
1647
1881
  - name: together/llama-3-70b-chat
1648
1882
  model_name: meta/llama-3-70b-chat
1649
1883
  tokenizer_name: meta/llama-3-8b
1650
- max_sequence_length: 8191
1884
+ max_sequence_length: 8182
1885
+ client_spec:
1886
+ class_name: "helm.clients.together_client.TogetherChatClient"
1887
+ args:
1888
+ together_model: meta-llama/Llama-3-70b-chat-hf
1889
+
1890
+ - name: together/llama-guard-7b
1891
+ model_name: meta/llama-guard-7b
1892
+ tokenizer_name: meta-llama/Llama-2-7b-hf
1893
+ max_sequence_length: 2047
1651
1894
  client_spec:
1652
1895
  class_name: "helm.clients.together_client.TogetherClient"
1653
1896
  args:
1654
- together_model: meta-llama/Meta-Llama-3-70B
1897
+ together_model: meta-llama/llama-guard-7b
1898
+
1899
+ - name: together/llama-guard-2-8b
1900
+ model_name: meta/llama-guard-2-8b
1901
+ tokenizer_name: meta/llama-3-8b
1902
+ max_sequence_length: 4094
1903
+ client_spec:
1904
+ class_name: "helm.clients.together_client.TogetherClient"
1905
+ args:
1906
+ together_model: meta-llama/llamaguard-2-8b
1907
+
1908
+ - name: together/llama-guard-3-8b
1909
+ model_name: meta/llama-guard-3-8b
1910
+ tokenizer_name: meta/llama-3.1-8b
1911
+ max_sequence_length: 128000
1912
+ client_spec:
1913
+ class_name: "helm.clients.together_client.TogetherClient"
1914
+ args:
1915
+ together_model: meta-llama/Meta-Llama-Guard-3-8B
1655
1916
 
1656
1917
  # 01.AI
1657
1918
  - name: together/yi-6b
@@ -1677,18 +1938,32 @@ model_deployments:
1677
1938
  tokenizer_name: 01-ai/Yi-6B
1678
1939
  max_sequence_length: 4095
1679
1940
  client_spec:
1680
- class_name: "helm.clients.together_client.TogetherClient"
1941
+ class_name: "helm.clients.together_client.TogetherChatClient"
1681
1942
  args:
1682
- together_model: zero-one-ai/Yi-6B
1943
+ together_model: zero-one-ai/Yi-6B-Chat
1683
1944
 
1684
1945
  - name: together/yi-34b-chat
1685
1946
  model_name: 01-ai/yi-34b-chat
1686
1947
  tokenizer_name: 01-ai/Yi-6B
1687
1948
  max_sequence_length: 4095
1688
1949
  client_spec:
1689
- class_name: "helm.clients.together_client.TogetherClient"
1950
+ class_name: "helm.clients.together_client.TogetherChatClient"
1690
1951
  args:
1691
- together_model: zero-one-ai/Yi-34B
1952
+ together_model: zero-one-ai/Yi-34B-Chat
1953
+
1954
+ - name: 01-ai/yi-large
1955
+ model_name: 01-ai/yi-large
1956
+ tokenizer_name: 01-ai/Yi-6B # Actual tokenizer is publicly unavailable, so use a substitute
1957
+ max_sequence_length: 16000
1958
+ client_spec:
1959
+ class_name: "helm.clients.yi_client.YiChatClient"
1960
+
1961
+ - name: 01-ai/yi-large-preview
1962
+ model_name: 01-ai/yi-large-preview
1963
+ tokenizer_name: 01-ai/Yi-6B # Actual tokenizer is publicly unavailable, so use a substitute
1964
+ max_sequence_length: 16000
1965
+ client_spec:
1966
+ class_name: "helm.clients.yi_client.YiChatClient"
1692
1967
 
1693
1968
 
1694
1969
  # Allen Institute for AI
@@ -1711,8 +1986,16 @@ model_deployments:
1711
1986
  tokenizer_name: allenai/olmo-7b
1712
1987
  max_sequence_length: 2047
1713
1988
  client_spec:
1714
- class_name: "helm.clients.together_client.TogetherClient"
1989
+ class_name: "helm.clients.together_client.TogetherChatClient"
1715
1990
 
1991
+ - name: huggingface/olmo-1.7-7b
1992
+ model_name: allenai/olmo-1.7-7b
1993
+ tokenizer_name: allenai/OLMo-1.7-7B-hf
1994
+ max_sequence_length: 2048
1995
+ client_spec:
1996
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1997
+ args:
1998
+ pretrained_model_name_or_path: allenai/OLMo-1.7-7B-hf
1716
1999
 
1717
2000
  ## MistralAI
1718
2001
  - name: together/mistral-7b-v0.1
@@ -1724,6 +2007,27 @@ model_deployments:
1724
2007
  args:
1725
2008
  together_model: mistralai/Mistral-7B-v0.1
1726
2009
 
2010
+ - name: together/mistral-7b-instruct-v0.1
2011
+ model_name: mistralai/mistral-7b-instruct-v0.1
2012
+ tokenizer_name: mistralai/Mistral-7B-Instruct-v0.1
2013
+ max_sequence_length: 4000
2014
+ client_spec:
2015
+ class_name: "helm.clients.together_client.TogetherChatClient"
2016
+
2017
+ - name: together/mistral-7b-instruct-v0.2
2018
+ model_name: mistralai/mistral-7b-instruct-v0.2
2019
+ tokenizer_name: mistralai/Mistral-7B-Instruct-v0.2
2020
+ max_sequence_length: 32000
2021
+ client_spec:
2022
+ class_name: "helm.clients.together_client.TogetherChatClient"
2023
+
2024
+ - name: together/mistral-7b-instruct-v0.3
2025
+ model_name: mistralai/mistral-7b-instruct-v0.3
2026
+ tokenizer_name: mistralai/Mistral-7B-Instruct-v0.3
2027
+ max_sequence_length: 32000
2028
+ client_spec:
2029
+ class_name: "helm.clients.together_client.TogetherChatClient"
2030
+
1727
2031
  - name: together/mixtral-8x7b-32kseqlen
1728
2032
  model_name: mistralai/mixtral-8x7b-32kseqlen
1729
2033
  tokenizer_name: mistralai/Mistral-7B-v0.1
@@ -1736,9 +2040,9 @@ model_deployments:
1736
2040
  - name: together/mixtral-8x7b-instruct-v0.1
1737
2041
  model_name: mistralai/mixtral-8x7b-instruct-v0.1
1738
2042
  tokenizer_name: mistralai/Mistral-7B-v0.1
1739
- max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
2043
+ max_sequence_length: 32767
1740
2044
  client_spec:
1741
- class_name: "helm.clients.together_client.TogetherClient"
2045
+ class_name: "helm.clients.together_client.TogetherChatClient"
1742
2046
 
1743
2047
  - name: together/mixtral-8x22b
1744
2048
  model_name: mistralai/mixtral-8x22b
@@ -1752,7 +2056,7 @@ model_deployments:
1752
2056
  tokenizer_name: mistralai/Mistral-7B-v0.1
1753
2057
  max_sequence_length: 65535
1754
2058
  client_spec:
1755
- class_name: "helm.clients.together_client.TogetherClient"
2059
+ class_name: "helm.clients.together_client.TogetherChatClient"
1756
2060
 
1757
2061
 
1758
2062
  ## Snowflake
@@ -1872,18 +2176,6 @@ model_deployments:
1872
2176
  args:
1873
2177
  together_model: togethercomputer/RedPajama-INCITE-7B-Instruct
1874
2178
 
1875
- ## Tsinghua
1876
- - name: together/glm
1877
- deprecated: true # Removed from Together
1878
- model_name: tsinghua/glm
1879
- tokenizer_name: TsinghuaKEG/ice
1880
- max_sequence_length: 2048
1881
- max_request_length: 2049
1882
- client_spec:
1883
- class_name: "helm.clients.together_client.TogetherClient"
1884
- window_service_spec:
1885
- class_name: "helm.benchmark.window_services.ice_window_service.ICEWindowService"
1886
-
1887
2179
  - name: thudm/cogview2
1888
2180
  model_name: thudm/cogview2
1889
2181
  tokenizer_name: openai/clip-vit-large-patch14
@@ -1922,22 +2214,6 @@ model_deployments:
1922
2214
  client_spec:
1923
2215
  class_name: "helm.clients.palmyra_client.PalmyraClient"
1924
2216
 
1925
- - name: writer/palmyra-instruct-30
1926
- model_name: writer/palmyra-instruct-30
1927
- tokenizer_name: writer/gpt2
1928
- max_sequence_length: 2048
1929
- max_sequence_and_generated_tokens_length: 2048
1930
- client_spec:
1931
- class_name: "helm.clients.palmyra_client.PalmyraClient"
1932
-
1933
- - name: writer/palmyra-e
1934
- model_name: writer/palmyra-e
1935
- tokenizer_name: writer/gpt2
1936
- max_sequence_length: 2048
1937
- max_sequence_and_generated_tokens_length: 2048
1938
- client_spec:
1939
- class_name: "helm.clients.palmyra_client.PalmyraClient"
1940
-
1941
2217
  - name: writer/silk-road
1942
2218
  model_name: writer/silk-road
1943
2219
  tokenizer_name: writer/gpt2
@@ -1978,12 +2254,21 @@ model_deployments:
1978
2254
  client_spec:
1979
2255
  class_name: "helm.clients.palmyra_client.PalmyraClient"
1980
2256
 
2257
+ - name: writer/palmyra-vision-003
2258
+ model_name: writer/palmyra-vision-003
2259
+ tokenizer_name: writer/gpt2
2260
+ max_sequence_length: 2048
2261
+ max_sequence_and_generated_tokens_length: 2048
2262
+ client_spec:
2263
+ class_name: "helm.clients.vision_language.palmyra_vision_client.PalmyraVisionClient"
2264
+
2265
+
1981
2266
  # Qwen
1982
2267
 
1983
2268
  - name: together/qwen-7b
1984
2269
  model_name: qwen/qwen-7b
1985
2270
  tokenizer_name: qwen/qwen-7b
1986
- max_sequence_length: 8191
2271
+ max_sequence_length: 32767
1987
2272
  client_spec:
1988
2273
  class_name: "helm.clients.together_client.TogetherClient"
1989
2274
  args:
@@ -2019,12 +2304,54 @@ model_deployments:
2019
2304
  - name: together/qwen1.5-72b
2020
2305
  model_name: qwen/qwen1.5-72b
2021
2306
  tokenizer_name: qwen/qwen1.5-7b
2022
- max_sequence_length: 4095
2307
+ max_sequence_length: 32767
2023
2308
  client_spec:
2024
2309
  class_name: "helm.clients.together_client.TogetherClient"
2025
2310
  args:
2026
2311
  together_model: Qwen/Qwen1.5-72B
2027
2312
 
2313
+ - name: together/qwen1.5-7b-chat
2314
+ model_name: qwen/qwen1.5-7b-chat
2315
+ tokenizer_name: qwen/qwen1.5-7b
2316
+ max_sequence_length: 32767
2317
+ client_spec:
2318
+ class_name: "helm.clients.together_client.TogetherChatClient"
2319
+
2320
+ - name: together/qwen1.5-14b-chat
2321
+ model_name: qwen/qwen1.5-14b-chat
2322
+ tokenizer_name: qwen/qwen1.5-7b
2323
+ max_sequence_length: 32767
2324
+ client_spec:
2325
+ class_name: "helm.clients.together_client.TogetherChatClient"
2326
+
2327
+ - name: together/qwen1.5-32b-chat
2328
+ model_name: qwen/qwen1.5-32b-chat
2329
+ tokenizer_name: qwen/qwen1.5-7b
2330
+ max_sequence_length: 32767
2331
+ client_spec:
2332
+ class_name: "helm.clients.together_client.TogetherChatClient"
2333
+
2334
+ - name: together/qwen1.5-72b-chat
2335
+ model_name: qwen/qwen1.5-72b-chat
2336
+ tokenizer_name: qwen/qwen1.5-7b
2337
+ max_sequence_length: 32767
2338
+ client_spec:
2339
+ class_name: "helm.clients.together_client.TogetherChatClient"
2340
+
2341
+ - name: together/qwen1.5-110b-chat
2342
+ model_name: qwen/qwen1.5-110b-chat
2343
+ tokenizer_name: qwen/qwen1.5-7b
2344
+ max_sequence_length: 32767
2345
+ client_spec:
2346
+ class_name: "helm.clients.together_client.TogetherChatClient"
2347
+
2348
+ - name: together/qwen2-72b-instruct
2349
+ model_name: qwen/qwen2-72b-instruct
2350
+ tokenizer_name: qwen/qwen2-72b-instruct
2351
+ max_sequence_length: 128000
2352
+ client_spec:
2353
+ class_name: "helm.clients.together_client.TogetherChatClient"
2354
+
2028
2355
  - name: huggingface/qwen-vl
2029
2356
  model_name: qwen/qwen-vl
2030
2357
  tokenizer_name: qwen/qwen-vl
@@ -2038,3 +2365,53 @@ model_deployments:
2038
2365
  max_sequence_length: 8191
2039
2366
  client_spec:
2040
2367
  class_name: "helm.clients.vision_language.qwen_vlm_client.QwenVLMClient"
2368
+
2369
+ # Reka
2370
+ - name: reka/reka-core
2371
+ model_name: reka/reka-core
2372
+ tokenizer_name: openai/cl100k_base
2373
+ max_sequence_length: 128000
2374
+ client_spec:
2375
+ class_name: "helm.clients.reka_client.RekaClient"
2376
+
2377
+ - name: reka/reka-core-20240415
2378
+ model_name: reka/reka-core-20240415
2379
+ tokenizer_name: openai/cl100k_base
2380
+ max_sequence_length: 128000
2381
+ client_spec:
2382
+ class_name: "helm.clients.reka_client.RekaClient"
2383
+
2384
+ - name: reka/reka-core-20240501
2385
+ model_name: reka/reka-core-20240501
2386
+ tokenizer_name: openai/cl100k_base
2387
+ max_sequence_length: 128000
2388
+ client_spec:
2389
+ class_name: "helm.clients.reka_client.RekaClient"
2390
+
2391
+ - name: reka/reka-flash
2392
+ model_name: reka/reka-flash
2393
+ tokenizer_name: openai/cl100k_base
2394
+ max_sequence_length: 128000
2395
+ client_spec:
2396
+ class_name: "helm.clients.reka_client.RekaClient"
2397
+
2398
+ - name: reka/reka-flash-20240226
2399
+ model_name: reka/reka-flash-20240226
2400
+ tokenizer_name: openai/cl100k_base
2401
+ max_sequence_length: 128000
2402
+ client_spec:
2403
+ class_name: "helm.clients.reka_client.RekaClient"
2404
+
2405
+ - name: reka/reka-edge
2406
+ model_name: reka/reka-edge
2407
+ tokenizer_name: openai/cl100k_base
2408
+ max_sequence_length: 64000
2409
+ client_spec:
2410
+ class_name: "helm.clients.reka_client.RekaClient"
2411
+
2412
+ - name: reka/reka-edge-20240208
2413
+ model_name: reka/reka-edge-20240208
2414
+ tokenizer_name: openai/cl100k_base
2415
+ max_sequence_length: 64000
2416
+ client_spec:
2417
+ class_name: "helm.clients.reka_client.RekaClient"