crfm-helm 0.5.2__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (184) hide show
  1. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/METADATA +29 -55
  2. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/RECORD +146 -134
  3. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
  5. helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
  6. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
  7. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
  8. helm/benchmark/adaptation/common_adapter_specs.py +2 -0
  9. helm/benchmark/annotation/anthropic_red_team_annotator.py +70 -0
  10. helm/benchmark/annotation/call_center_annotator.py +247 -0
  11. helm/benchmark/annotation/financebench_annotator.py +79 -0
  12. helm/benchmark/annotation/harm_bench_annotator.py +68 -0
  13. helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
  14. helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
  15. helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
  16. helm/benchmark/annotation/live_qa_annotator.py +32 -45
  17. helm/benchmark/annotation/medication_qa_annotator.py +31 -44
  18. helm/benchmark/annotation/model_as_judge.py +45 -0
  19. helm/benchmark/annotation/simple_safety_tests_annotator.py +64 -0
  20. helm/benchmark/annotation/xstest_annotator.py +110 -0
  21. helm/benchmark/metrics/annotation_metrics.py +108 -0
  22. helm/benchmark/metrics/bhasa_metrics.py +188 -0
  23. helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
  24. helm/benchmark/metrics/code_metrics_helper.py +11 -1
  25. helm/benchmark/metrics/safety_metrics.py +57 -0
  26. helm/benchmark/metrics/summac/model_summac.py +3 -3
  27. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
  28. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
  29. helm/benchmark/metrics/vision_language/image_metrics.py +1 -1
  30. helm/benchmark/metrics/vision_language/image_utils.py +1 -1
  31. helm/benchmark/model_metadata_registry.py +3 -3
  32. helm/benchmark/presentation/test_run_entry.py +1 -0
  33. helm/benchmark/run.py +15 -0
  34. helm/benchmark/run_expander.py +56 -30
  35. helm/benchmark/run_specs/bhasa_run_specs.py +638 -0
  36. helm/benchmark/run_specs/call_center_run_specs.py +152 -0
  37. helm/benchmark/run_specs/decodingtrust_run_specs.py +8 -8
  38. helm/benchmark/run_specs/experimental_run_specs.py +52 -0
  39. helm/benchmark/run_specs/finance_run_specs.py +78 -1
  40. helm/benchmark/run_specs/safety_run_specs.py +154 -0
  41. helm/benchmark/run_specs/vlm_run_specs.py +92 -21
  42. helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
  43. helm/benchmark/scenarios/banking77_scenario.py +51 -0
  44. helm/benchmark/scenarios/bhasa_scenario.py +1798 -0
  45. helm/benchmark/scenarios/call_center_scenario.py +84 -0
  46. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
  47. helm/benchmark/scenarios/ewok_scenario.py +116 -0
  48. helm/benchmark/scenarios/fin_qa_scenario.py +2 -0
  49. helm/benchmark/scenarios/financebench_scenario.py +53 -0
  50. helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
  51. helm/benchmark/scenarios/scenario.py +1 -1
  52. helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
  53. helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
  54. helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
  55. helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
  56. helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
  57. helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
  58. helm/benchmark/scenarios/test_math_scenario.py +2 -8
  59. helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
  60. helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
  61. helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
  62. helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
  63. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
  64. helm/benchmark/scenarios/vision_language/bingo_scenario.py +2 -2
  65. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
  66. helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
  67. helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
  68. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
  69. helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
  70. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
  71. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
  72. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -3
  73. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -1
  74. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
  75. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
  76. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
  77. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +41 -12
  78. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
  79. helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
  80. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
  81. helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
  82. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
  83. helm/benchmark/scenarios/vision_language/pairs_scenario.py +1 -1
  84. helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
  85. helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
  86. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
  87. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +2 -2
  88. helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +6 -3
  89. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
  90. helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
  91. helm/benchmark/scenarios/xstest_scenario.py +35 -0
  92. helm/benchmark/server.py +1 -6
  93. helm/benchmark/static/schema_air_bench.yaml +750 -750
  94. helm/benchmark/static/schema_bhasa.yaml +709 -0
  95. helm/benchmark/static/schema_call_center.yaml +232 -0
  96. helm/benchmark/static/schema_cleva.yaml +768 -0
  97. helm/benchmark/static/schema_decodingtrust.yaml +444 -0
  98. helm/benchmark/static/schema_ewok.yaml +367 -0
  99. helm/benchmark/static/schema_finance.yaml +55 -9
  100. helm/benchmark/static/{schema_image2structure.yaml → schema_image2struct.yaml} +231 -90
  101. helm/benchmark/static/schema_safety.yaml +247 -0
  102. helm/benchmark/static/schema_tables.yaml +124 -7
  103. helm/benchmark/static/schema_thai.yaml +21 -0
  104. helm/benchmark/static/schema_vhelm.yaml +96 -91
  105. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  106. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  107. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  108. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  109. helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
  110. helm/benchmark/static_build/assets/index-58f97dcd.js +10 -0
  111. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  112. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  113. helm/benchmark/static_build/index.html +2 -2
  114. helm/benchmark/window_services/test_openai_window_service.py +8 -8
  115. helm/clients/ai21_client.py +71 -1
  116. helm/clients/anthropic_client.py +7 -19
  117. helm/clients/huggingface_client.py +38 -37
  118. helm/clients/nvidia_nim_client.py +35 -0
  119. helm/clients/openai_client.py +2 -3
  120. helm/clients/palmyra_client.py +25 -0
  121. helm/clients/perspective_api_client.py +11 -6
  122. helm/clients/test_client.py +4 -6
  123. helm/clients/vision_language/open_flamingo_client.py +1 -2
  124. helm/clients/vision_language/palmyra_vision_client.py +28 -13
  125. helm/common/images_utils.py +6 -0
  126. helm/common/mongo_key_value_store.py +2 -1
  127. helm/common/request.py +16 -0
  128. helm/config/model_deployments.yaml +315 -332
  129. helm/config/model_metadata.yaml +384 -110
  130. helm/config/tokenizer_configs.yaml +116 -11
  131. helm/proxy/example_queries.py +14 -21
  132. helm/proxy/services/server_service.py +1 -2
  133. helm/proxy/token_counters/test_auto_token_counter.py +2 -2
  134. helm/tokenizers/ai21_tokenizer.py +51 -59
  135. helm/tokenizers/cohere_tokenizer.py +0 -75
  136. helm/tokenizers/huggingface_tokenizer.py +0 -1
  137. helm/tokenizers/test_ai21_tokenizer.py +48 -0
  138. helm/benchmark/static/benchmarking.css +0 -156
  139. helm/benchmark/static/benchmarking.js +0 -1705
  140. helm/benchmark/static/config.js +0 -3
  141. helm/benchmark/static/general.js +0 -122
  142. helm/benchmark/static/images/crfm-logo.png +0 -0
  143. helm/benchmark/static/images/helm-logo-simple.png +0 -0
  144. helm/benchmark/static/images/helm-logo.png +0 -0
  145. helm/benchmark/static/images/language-model-helm.png +0 -0
  146. helm/benchmark/static/images/organizations/ai21.png +0 -0
  147. helm/benchmark/static/images/organizations/anthropic.png +0 -0
  148. helm/benchmark/static/images/organizations/bigscience.png +0 -0
  149. helm/benchmark/static/images/organizations/cohere.png +0 -0
  150. helm/benchmark/static/images/organizations/eleutherai.png +0 -0
  151. helm/benchmark/static/images/organizations/google.png +0 -0
  152. helm/benchmark/static/images/organizations/meta.png +0 -0
  153. helm/benchmark/static/images/organizations/microsoft.png +0 -0
  154. helm/benchmark/static/images/organizations/nvidia.png +0 -0
  155. helm/benchmark/static/images/organizations/openai.png +0 -0
  156. helm/benchmark/static/images/organizations/together.png +0 -0
  157. helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
  158. helm/benchmark/static/images/organizations/yandex.png +0 -0
  159. helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
  160. helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
  161. helm/benchmark/static/index.html +0 -68
  162. helm/benchmark/static/info-icon.png +0 -0
  163. helm/benchmark/static/json-urls.js +0 -69
  164. helm/benchmark/static/plot-captions.js +0 -27
  165. helm/benchmark/static/utils.js +0 -285
  166. helm/benchmark/static_build/assets/index-30dbceba.js +0 -10
  167. helm/benchmark/static_build/assets/index-66b02d40.css +0 -1
  168. helm/benchmark/window_services/ai21_window_service.py +0 -247
  169. helm/benchmark/window_services/cohere_window_service.py +0 -101
  170. helm/benchmark/window_services/test_ai21_window_service.py +0 -163
  171. helm/benchmark/window_services/test_cohere_window_service.py +0 -75
  172. helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
  173. helm/benchmark/window_services/test_ice_window_service.py +0 -327
  174. helm/tokenizers/ice_tokenizer.py +0 -30
  175. helm/tokenizers/test_ice_tokenizer.py +0 -57
  176. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/LICENSE +0 -0
  177. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/entry_points.txt +0 -0
  178. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/top_level.txt +0 -0
  179. /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
  180. /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
  181. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/__init__.py +0 -0
  182. /helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +0 -0
  183. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/__init__.py +0 -0
  184. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
@@ -28,74 +28,47 @@ model_deployments:
28
28
 
29
29
  # AI21 Labs
30
30
 
31
- # J1 models are Deprecated by AI21 Labs
32
- # API returns: Detail: Jurassic J1 models are deprecated
33
- - name: ai21/j1-jumbo
34
- deprecated: true
35
- model_name: ai21/j1-jumbo
36
- tokenizer_name: ai21/j1
37
- max_sequence_length: 2047
38
- client_spec:
39
- class_name: "helm.clients.ai21_client.AI21Client"
40
- window_service_spec:
41
- class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
42
-
43
- - name: ai21/j1-large
44
- deprecated: true
45
- model_name: ai21/j1-large
46
- tokenizer_name: ai21/j1
31
+ - name: ai21/j2-large
32
+ model_name: ai21/j2-large
33
+ tokenizer_name: ai21/j2-tokenizer
47
34
  max_sequence_length: 2047
48
35
  client_spec:
49
36
  class_name: "helm.clients.ai21_client.AI21Client"
50
- window_service_spec:
51
- class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
52
37
 
53
- - name: ai21/j1-grande
54
- deprecated: true
55
- model_name: ai21/j1-grande
56
- tokenizer_name: ai21/j1
57
- max_sequence_length: 2047
58
- client_spec:
59
- class_name: "helm.clients.ai21_client.AI21Client"
60
- window_service_spec:
61
- class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
62
-
63
- - name: ai21/j1-grande-v2-beta
64
- deprecated: true
65
- model_name: ai21/j1-grande-v2-beta
66
- tokenizer_name: ai21/j1
38
+ - name: ai21/j2-grande
39
+ model_name: ai21/j2-grande
40
+ tokenizer_name: ai21/j2-tokenizer
67
41
  max_sequence_length: 2047
68
42
  client_spec:
69
43
  class_name: "helm.clients.ai21_client.AI21Client"
70
- window_service_spec:
71
- class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
72
44
 
73
45
  - name: ai21/j2-jumbo
74
46
  model_name: ai21/j2-jumbo
75
- tokenizer_name: ai21/j1
47
+ tokenizer_name: ai21/j2-tokenizer
76
48
  max_sequence_length: 6000
77
49
  client_spec:
78
50
  class_name: "helm.clients.ai21_client.AI21Client"
79
- window_service_spec:
80
- class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
81
51
 
82
- - name: ai21/j2-large
83
- model_name: ai21/j2-large
84
- tokenizer_name: ai21/j1
85
- max_sequence_length: 2047
52
+ - name: ai21/jamba-instruct
53
+ model_name: ai21/jamba-instruct
54
+ tokenizer_name: ai21/jamba-instruct-tokenizer
55
+ max_sequence_length: 256000
86
56
  client_spec:
87
- class_name: "helm.clients.ai21_client.AI21Client"
88
- window_service_spec:
89
- class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
57
+ class_name: "helm.clients.ai21_client.AI21ChatClient"
90
58
 
91
- - name: ai21/j2-grande
92
- model_name: ai21/j2-grande
93
- tokenizer_name: ai21/j1
94
- max_sequence_length: 2047
59
+ - name: ai21/jamba-1.5-mini
60
+ model_name: ai21/jamba-1.5-mini
61
+ tokenizer_name: ai21/jamba-1.5-mini-tokenizer
62
+ max_sequence_length: 256000
95
63
  client_spec:
96
- class_name: "helm.clients.ai21_client.AI21Client"
97
- window_service_spec:
98
- class_name: "helm.benchmark.window_services.ai21_window_service.AI21WindowService"
64
+ class_name: "helm.clients.ai21_client.AI21ChatClient"
65
+
66
+ - name: ai21/jamba-1.5-large
67
+ model_name: ai21/jamba-1.5-large
68
+ tokenizer_name: ai21/jamba-1.5-large-tokenizer
69
+ max_sequence_length: 256000
70
+ client_spec:
71
+ class_name: "helm.clients.ai21_client.AI21ChatClient"
99
72
 
100
73
  # Aleph Alpha
101
74
  - name: AlephAlpha/luminous-base
@@ -216,6 +189,13 @@ model_deployments:
216
189
  client_spec:
217
190
  class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
218
191
 
192
+ - name: anthropic/claude-3-5-sonnet-20240620
193
+ model_name: anthropic/claude-3-5-sonnet-20240620
194
+ tokenizer_name: anthropic/claude
195
+ max_sequence_length: 200000
196
+ client_spec:
197
+ class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
198
+
219
199
  - name: anthropic/stanford-online-all-v4-s3
220
200
  deprecated: true # Closed model, not accessible via API
221
201
  model_name: anthropic/stanford-online-all-v4-s3
@@ -225,86 +205,6 @@ model_deployments:
225
205
  class_name: "helm.clients.anthropic_client.AnthropicLegacyClient"
226
206
 
227
207
  # Cohere
228
- - name: cohere/xlarge-20220609
229
- model_name: cohere/xlarge-20220609
230
- tokenizer_name: cohere/cohere
231
- max_sequence_length: 2047
232
- max_request_length: 2048
233
- client_spec:
234
- class_name: "helm.clients.cohere_client.CohereClient"
235
- window_service_spec:
236
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
237
-
238
- - name: cohere/large-20220720
239
- model_name: cohere/large-20220720
240
- tokenizer_name: cohere/cohere
241
- max_sequence_length: 2047
242
- max_request_length: 2048
243
- client_spec:
244
- class_name: "helm.clients.cohere_client.CohereClient"
245
- window_service_spec:
246
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
247
-
248
- - name: cohere/medium-20220720
249
- model_name: cohere/medium-20220720
250
- tokenizer_name: cohere/cohere
251
- max_sequence_length: 2047
252
- max_request_length: 2048
253
- client_spec:
254
- class_name: "helm.clients.cohere_client.CohereClient"
255
- window_service_spec:
256
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
257
-
258
- - name: cohere/small-20220720
259
- model_name: cohere/small-20220720
260
- tokenizer_name: cohere/cohere
261
- max_sequence_length: 2047
262
- max_request_length: 2048
263
- client_spec:
264
- class_name: "helm.clients.cohere_client.CohereClient"
265
- window_service_spec:
266
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
267
-
268
- - name: cohere/xlarge-20221108
269
- model_name: cohere/xlarge-20221108
270
- tokenizer_name: cohere/cohere
271
- max_sequence_length: 2047
272
- max_request_length: 2048
273
- client_spec:
274
- class_name: "helm.clients.cohere_client.CohereClient"
275
- window_service_spec:
276
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
277
-
278
- - name: cohere/medium-20221108
279
- model_name: cohere/medium-20221108
280
- tokenizer_name: cohere/cohere
281
- max_sequence_length: 2047
282
- max_request_length: 2048
283
- client_spec:
284
- class_name: "helm.clients.cohere_client.CohereClient"
285
- window_service_spec:
286
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
287
-
288
- - name: cohere/command-medium-beta
289
- model_name: cohere/command-medium-beta
290
- tokenizer_name: cohere/cohere
291
- max_sequence_length: 2019
292
- max_request_length: 2020
293
- client_spec:
294
- class_name: "helm.clients.cohere_client.CohereClient"
295
- window_service_spec:
296
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
297
-
298
- - name: cohere/command-xlarge-beta
299
- model_name: cohere/command-xlarge-beta
300
- tokenizer_name: cohere/cohere
301
- max_sequence_length: 2019
302
- max_request_length: 2020
303
- client_spec:
304
- class_name: "helm.clients.cohere_client.CohereClient"
305
- window_service_spec:
306
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
307
-
308
208
  - name: cohere/command
309
209
  model_name: cohere/command
310
210
  tokenizer_name: cohere/command
@@ -312,8 +212,6 @@ model_deployments:
312
212
  max_request_length: 2020 # TODO: verify this
313
213
  client_spec:
314
214
  class_name: "helm.clients.cohere_client.CohereClient"
315
- window_service_spec:
316
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
317
215
 
318
216
  - name: cohere/command-light
319
217
  model_name: cohere/command-light
@@ -322,8 +220,6 @@ model_deployments:
322
220
  max_request_length: 2020 # TODO: verify this
323
221
  client_spec:
324
222
  class_name: "helm.clients.cohere_client.CohereClient"
325
- window_service_spec:
326
- class_name: "helm.benchmark.window_services.cohere_window_service.CohereWindowService"
327
223
 
328
224
  - name: cohere/command-r
329
225
  model_name: cohere/command-r
@@ -587,6 +483,20 @@ model_deployments:
587
483
  client_spec:
588
484
  class_name: "helm.clients.together_client.TogetherClient"
589
485
 
486
+ - name: together/gemma-2-9b-it
487
+ model_name: google/gemma-2-9b-it
488
+ tokenizer_name: google/gemma-2-9b
489
+ max_sequence_length: 8191
490
+ client_spec:
491
+ class_name: "helm.clients.together_client.TogetherClient"
492
+
493
+ - name: together/gemma-2-27b-it
494
+ model_name: google/gemma-2-27b-it
495
+ tokenizer_name: google/gemma-2-9b
496
+ max_sequence_length: 8191
497
+ client_spec:
498
+ class_name: "helm.clients.together_client.TogetherClient"
499
+
590
500
  ## MedLM
591
501
  - name: google/medlm-medium
592
502
  model_name: google/medlm-medium
@@ -807,6 +717,47 @@ model_deployments:
807
717
  client_spec:
808
718
  class_name: "helm.clients.huggingface_client.HuggingFaceClient"
809
719
 
720
+ ## Google
721
+ - name: huggingface/gemma-2-9b
722
+ model_name: google/gemma-2-9b
723
+ tokenizer_name: google/gemma-2-9b
724
+ max_sequence_length: 8192
725
+ client_spec:
726
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
727
+ args:
728
+ device_map: auto
729
+ torch_dtype: torch.bfloat16
730
+
731
+ - name: huggingface/gemma-2-9b-it
732
+ model_name: google/gemma-2-9b-it
733
+ tokenizer_name: google/gemma-2-9b
734
+ max_sequence_length: 8192
735
+ client_spec:
736
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
737
+ args:
738
+ device_map: auto
739
+ torch_dtype: torch.bfloat16
740
+
741
+ - name: huggingface/gemma-2-27b
742
+ model_name: google/gemma-2-27b
743
+ tokenizer_name: google/gemma-2-9b
744
+ max_sequence_length: 8192
745
+ client_spec:
746
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
747
+ args:
748
+ device_map: auto
749
+ torch_dtype: torch.bfloat16
750
+
751
+ - name: huggingface/gemma-2-27b-it
752
+ model_name: google/gemma-2-27b-it
753
+ tokenizer_name: google/gemma-2-9b
754
+ max_sequence_length: 8192
755
+ client_spec:
756
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
757
+ args:
758
+ device_map: auto
759
+ torch_dtype: torch.bfloat16
760
+
810
761
  ## LMSYS
811
762
  - name: huggingface/vicuna-7b-v1.3
812
763
  model_name: lmsys/vicuna-7b-v1.3
@@ -931,6 +882,7 @@ model_deployments:
931
882
  tokenizer_name: "anas-awadalla-2/mpt-7b"
932
883
  cross_attn_every_n_layers: 4
933
884
 
885
+ ## Microsoft
934
886
  - name: together/phi-2
935
887
  model_name: microsoft/phi-2
936
888
  tokenizer_name: microsoft/phi-2
@@ -938,6 +890,26 @@ model_deployments:
938
890
  client_spec:
939
891
  class_name: "helm.clients.together_client.TogetherClient"
940
892
 
893
+ - name: huggingface/phi-3-small-8k-instruct
894
+ model_name: microsoft/phi-3-small-8k-instruct
895
+ tokenizer_name: microsoft/phi-3-small-8k-instruct
896
+ max_sequence_length: 8192
897
+ client_spec:
898
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
899
+ args:
900
+ torch_dtype: auto
901
+ trust_remote_code: true
902
+
903
+ - name: huggingface/phi-3-medium-4k-instruct
904
+ model_name: microsoft/phi-3-medium-4k-instruct
905
+ tokenizer_name: microsoft/phi-3-medium-4k-instruct
906
+ max_sequence_length: 4096
907
+ client_spec:
908
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
909
+ args:
910
+ device_map: auto
911
+ torch_dtype: auto
912
+
941
913
  ## Mistral AI
942
914
  - name: huggingface/bakLlava-v1-hf
943
915
  model_name: mistralai/bakLlava-v1-hf
@@ -992,42 +964,134 @@ model_deployments:
992
964
  args:
993
965
  pretrained_model_name_or_path: openai-community/gpt2
994
966
 
967
+ ## OpenThaiGPT
968
+ - name: huggingface/openthaigpt-1.0.0-7b-chat
969
+ model_name: openthaigpt/openthaigpt-1.0.0-7b-chat
970
+ tokenizer_name: openthaigpt/openthaigpt-1.0.0-7b-chat
971
+ max_sequence_length: 4096
972
+ client_spec:
973
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
974
+
975
+ - name: huggingface/openthaigpt-1.0.0-13b-chat
976
+ model_name: openthaigpt/openthaigpt-1.0.0-13b-chat
977
+ tokenizer_name: openthaigpt/openthaigpt-1.0.0-7b-chat
978
+ max_sequence_length: 4096
979
+ client_spec:
980
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
981
+ args:
982
+ device_map: auto
983
+
984
+ - name: huggingface/openthaigpt-1.0.0-70b-chat
985
+ model_name: openthaigpt/openthaigpt-1.0.0-70b-chat
986
+ tokenizer_name: huggingface/openthaigpt-1.0.0-7b-chat
987
+ max_sequence_length: 4096
988
+ client_spec:
989
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
990
+ args:
991
+ device_map: auto
992
+
995
993
  ## SAIL (SEA AI Lab)
996
- - name: sail/sailor-7b
994
+ - name: huggingface/sailor-7b
997
995
  model_name: sail/sailor-7b
998
996
  tokenizer_name: qwen/qwen1.5-7b
999
997
  max_sequence_length: 32768
1000
998
  client_spec:
1001
999
  class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1002
1000
 
1003
- - name: sail/sailor-7b-chat
1001
+ - name: huggingface/sailor-7b-chat
1004
1002
  model_name: sail/sailor-7b-chat
1005
1003
  tokenizer_name: qwen/qwen1.5-7b
1006
1004
  max_sequence_length: 32768
1007
1005
  client_spec:
1008
1006
  class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1009
1007
 
1010
- - name: sail/sailor-14b
1008
+ - name: huggingface/sailor-14b
1011
1009
  model_name: sail/sailor-14b
1012
1010
  tokenizer_name: qwen/qwen1.5-7b
1013
1011
  max_sequence_length: 32768
1014
1012
  client_spec:
1015
1013
  class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1014
+ args:
1015
+ device_map: auto
1016
1016
 
1017
- - name: sail/sailor-14b-chat
1017
+ - name: huggingface/sailor-14b-chat
1018
1018
  model_name: sail/sailor-14b-chat
1019
1019
  tokenizer_name: qwen/qwen1.5-7b
1020
1020
  max_sequence_length: 32768
1021
1021
  client_spec:
1022
1022
  class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1023
+ args:
1024
+ device_map: auto
1025
+
1026
+ # SambaNova
1027
+ - name: huggingface/sambalingo-thai-base
1028
+ model_name: sambanova/sambalingo-thai-base
1029
+ tokenizer_name: sambanova/sambalingo-thai-base
1030
+ max_sequence_length: 4096
1031
+ client_spec:
1032
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1033
+ args:
1034
+ pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base
1035
+
1036
+ - name: huggingface/sambalingo-thai-chat
1037
+ model_name: sambanova/sambalingo-thai-chat
1038
+ tokenizer_name: sambanova/sambalingo-thai-base
1039
+ max_sequence_length: 4096
1040
+ client_spec:
1041
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1042
+ args:
1043
+ pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base
1044
+
1045
+ - name: huggingface/sambalingo-thai-base-70b
1046
+ model_name: sambanova/sambalingo-thai-base-70b
1047
+ tokenizer_name: sambanova/sambalingo-thai-base
1048
+ max_sequence_length: 4096
1049
+ client_spec:
1050
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1051
+ args:
1052
+ pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base-70B
1053
+ device_map: auto
1054
+
1055
+ - name: huggingface/sambalingo-thai-chat-70b
1056
+ model_name: sambanova/sambalingo-thai-chat-70b
1057
+ tokenizer_name: sambanova/sambalingo-thai-base
1058
+ max_sequence_length: 4096
1059
+ client_spec:
1060
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1061
+ args:
1062
+ pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base-70B
1063
+ device_map: auto
1023
1064
 
1024
1065
  ## SCB10X
1066
+ - name: huggingface/typhoon-7b
1067
+ model_name: scb10x/typhoon-7b
1068
+ tokenizer_name: scb10x/typhoon-7b
1069
+ max_sequence_length: 4096
1070
+ client_spec:
1071
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1072
+
1073
+ - name: huggingface/typhoon-v1.5-8b
1074
+ model_name: scb10x/typhoon-v1.5-8b
1075
+ tokenizer_name: meta/llama-3-8b
1076
+ max_sequence_length: 8192
1077
+ client_spec:
1078
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1079
+
1080
+ - name: huggingface/typhoon-v1.5-8b-instruct
1081
+ model_name: scb10x/typhoon-v1.5-8b-instruct
1082
+ tokenizer_name: meta/llama-3-8b
1083
+ max_sequence_length: 8192
1084
+ client_spec:
1085
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1086
+
1025
1087
  - name: huggingface/typhoon-v1.5-72b
1026
1088
  model_name: scb10x/typhoon-v1.5-72b
1027
1089
  tokenizer_name: qwen/qwen1.5-7b
1028
1090
  max_sequence_length: 32768
1029
1091
  client_spec:
1030
1092
  class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1093
+ args:
1094
+ device_map: auto
1031
1095
 
1032
1096
  - name: huggingface/typhoon-v1.5-72b-instruct
1033
1097
  model_name: scb10x/typhoon-v1.5-72b-instruct
@@ -1035,6 +1099,43 @@ model_deployments:
1035
1099
  max_sequence_length: 32768
1036
1100
  client_spec:
1037
1101
  class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1102
+ args:
1103
+ device_map: auto
1104
+
1105
+ - name: huggingface/llama-3-typhoon-v1.5x-8b-instruct
1106
+ model_name: scb10x/llama-3-typhoon-v1.5x-8b-instruct
1107
+ tokenizer_name: meta/llama-3-8b
1108
+ max_sequence_length: 8192
1109
+ client_spec:
1110
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1111
+
1112
+ - name: huggingface/llama-3-typhoon-v1.5x-70b-instruct
1113
+ model_name: scb10x/llama-3-typhoon-v1.5x-70b-instruct
1114
+ tokenizer_name: meta/llama-3-8b
1115
+ max_sequence_length: 8192
1116
+ client_spec:
1117
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1118
+ args:
1119
+ device_map: auto
1120
+
1121
+ # Alibaba DAMO Academy
1122
+ - name: huggingface/seallm-7b-v2
1123
+ model_name: damo/seallm-7b-v2
1124
+ tokenizer_name: damo/seallm-7b-v2
1125
+ max_sequence_length: 4096
1126
+ client_spec:
1127
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1128
+ args:
1129
+ pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2
1130
+
1131
+ - name: huggingface/seallm-7b-v2.5
1132
+ model_name: damo/seallm-7b-v2.5
1133
+ tokenizer_name: damo/seallm-7b-v2.5
1134
+ max_sequence_length: 4096
1135
+ client_spec:
1136
+ class_name: "helm.clients.huggingface_client.HuggingFaceClient"
1137
+ args:
1138
+ pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2.5
1038
1139
 
1039
1140
  ## StabilityAI
1040
1141
  - name: huggingface/stablelm-base-alpha-3b
@@ -1315,6 +1416,20 @@ model_deployments:
1315
1416
  client_spec:
1316
1417
  class_name: "helm.clients.mistral_client.MistralAIClient"
1317
1418
 
1419
+ - name: mistralai/mistral-large-2407
1420
+ model_name: mistralai/mistral-large-2407
1421
+ tokenizer_name: mistralai/Mistral-Large-Instruct-2407
1422
+ max_sequence_length: 128000
1423
+ client_spec:
1424
+ class_name: "helm.clients.mistral_client.MistralAIClient"
1425
+
1426
+ - name: mistralai/open-mistral-nemo-2407
1427
+ model_name: mistralai/open-mistral-nemo-2407
1428
+ tokenizer_name: mistralai/Mistral-Nemo-Base-2407
1429
+ max_sequence_length: 128000
1430
+ client_spec:
1431
+ class_name: "helm.clients.mistral_client.MistralAIClient"
1432
+
1318
1433
  # Neurips
1319
1434
  - name: neurips/local
1320
1435
  model_name: neurips/local
@@ -1331,6 +1446,13 @@ model_deployments:
1331
1446
  client_spec:
1332
1447
  class_name: "helm.clients.megatron_client.MegatronClient"
1333
1448
 
1449
+ - name: nvidia/nemotron-4-340b-instruct
1450
+ model_name: nvidia/nemotron-4-340b-instruct
1451
+ tokenizer_name: nvidia/nemotron-4-340b-instruct
1452
+ max_sequence_length: 4085
1453
+ client_spec:
1454
+ class_name: "helm.clients.nvidia_nim_client.NvidiaNimClient"
1455
+
1334
1456
  # OpenAI
1335
1457
 
1336
1458
  ## GPT 3 Models
@@ -1353,99 +1475,6 @@ model_deployments:
1353
1475
  client_spec:
1354
1476
  class_name: "helm.clients.openai_client.OpenAIClient"
1355
1477
 
1356
- # The list of models can be found here: https://beta.openai.com/docs/engines/gpt-3
1357
- # DEPRECATED: Announced on July 06 2023 that these models will be shut down on January 04 2024.
1358
-
1359
- - name: openai/davinci
1360
- deprecated: true
1361
- model_name: openai/davinci
1362
- tokenizer_name: huggingface/gpt2
1363
- max_sequence_length: 2048
1364
- max_request_length: 2049
1365
- client_spec:
1366
- class_name: "helm.clients.openai_client.OpenAIClient"
1367
-
1368
- - name: openai/curie
1369
- deprecated: true
1370
- model_name: openai/curie
1371
- tokenizer_name: huggingface/gpt2
1372
- max_sequence_length: 2048
1373
- max_request_length: 2049
1374
- client_spec:
1375
- class_name: "helm.clients.openai_client.OpenAIClient"
1376
-
1377
- - name: openai/babbage
1378
- deprecated: true
1379
- model_name: openai/babbage
1380
- tokenizer_name: huggingface/gpt2
1381
- max_sequence_length: 2048
1382
- max_request_length: 2049
1383
- client_spec:
1384
- class_name: "helm.clients.openai_client.OpenAIClient"
1385
-
1386
- - name: openai/ada
1387
- deprecated: true
1388
- model_name: openai/ada
1389
- tokenizer_name: huggingface/gpt2
1390
- max_sequence_length: 2048
1391
- max_request_length: 2049
1392
- client_spec:
1393
- class_name: "helm.clients.openai_client.OpenAIClient"
1394
-
1395
- - name: openai/text-davinci-003
1396
- deprecated: true
1397
- model_name: openai/text-davinci-003
1398
- tokenizer_name: huggingface/gpt2
1399
- max_sequence_length: 4000
1400
- max_request_length: 4001
1401
- client_spec:
1402
- class_name: "helm.clients.openai_client.OpenAIClient"
1403
-
1404
- - name: openai/text-davinci-002
1405
- deprecated: true
1406
- model_name: openai/text-davinci-002
1407
- tokenizer_name: huggingface/gpt2
1408
- max_sequence_length: 4000
1409
- max_request_length: 4001
1410
- client_spec:
1411
- class_name: "helm.clients.openai_client.OpenAIClient"
1412
-
1413
- - name: openai/text-davinci-001
1414
- deprecated: true
1415
- model_name: openai/text-davinci-001
1416
- tokenizer_name: huggingface/gpt2
1417
- max_sequence_length: 2048
1418
- max_request_length: 2049
1419
- client_spec:
1420
- class_name: "helm.clients.openai_client.OpenAIClient"
1421
-
1422
- - name: openai/text-curie-001
1423
- deprecated: true
1424
- model_name: openai/text-curie-001
1425
- tokenizer_name: huggingface/gpt2
1426
- max_sequence_length: 2048
1427
- max_request_length: 2049
1428
- client_spec:
1429
- class_name: "helm.clients.openai_client.OpenAIClient"
1430
-
1431
- - name: openai/text-babbage-001
1432
- deprecated: true
1433
- model_name: openai/text-babbage-001
1434
- tokenizer_name: huggingface/gpt2
1435
- max_sequence_length: 2048
1436
- max_request_length: 2049
1437
- client_spec:
1438
- class_name: "helm.clients.openai_client.OpenAIClient"
1439
-
1440
- - name: openai/text-ada-001
1441
- deprecated: true
1442
- model_name: openai/text-ada-001
1443
- tokenizer_name: huggingface/gpt2
1444
- max_sequence_length: 2048
1445
- max_request_length: 2049
1446
- client_spec:
1447
- class_name: "helm.clients.openai_client.OpenAIClient"
1448
-
1449
1478
  ## GPT 3.5 Turbo Models
1450
1479
  # ChatGPT: https://openai.com/blog/chatgpt
1451
1480
 
@@ -1580,6 +1609,20 @@ model_deployments:
1580
1609
  client_spec:
1581
1610
  class_name: "helm.clients.openai_client.OpenAIClient"
1582
1611
 
1612
+ - name: openai/gpt-4o-2024-08-06
1613
+ model_name: openai/gpt-4o-2024-08-06
1614
+ tokenizer_name: openai/o200k_base
1615
+ max_sequence_length: 128000
1616
+ client_spec:
1617
+ class_name: "helm.clients.openai_client.OpenAIClient"
1618
+
1619
+ - name: openai/gpt-4o-mini-2024-07-18
1620
+ model_name: openai/gpt-4o-mini-2024-07-18
1621
+ tokenizer_name: openai/o200k_base
1622
+ max_sequence_length: 128000
1623
+ client_spec:
1624
+ class_name: "helm.clients.openai_client.OpenAIClient"
1625
+
1583
1626
  - name: openai/gpt-4-vision-preview
1584
1627
  model_name: openai/gpt-4-vision-preview
1585
1628
  tokenizer_name: openai/cl100k_base
@@ -1598,78 +1641,10 @@ model_deployments:
1598
1641
  client_spec:
1599
1642
  class_name: "helm.clients.openai_client.OpenAIClient"
1600
1643
 
1601
- ## Codex Models
1602
- # DEPRECATED: Codex models have been shut down on March 23 2023.
1603
-
1604
- - name: openai/code-davinci-002
1605
- deprecated: true
1606
- model_name: openai/code-davinci-002
1607
- tokenizer_name: huggingface/gpt2
1608
- max_sequence_length: 4000
1609
- max_request_length: 4001
1610
- client_spec:
1611
- class_name: "helm.clients.openai_client.OpenAIClient"
1612
-
1613
- - name: openai/code-davinci-001
1614
- deprecated: true
1615
- model_name: openai/code-davinci-001
1616
- tokenizer_name: huggingface/gpt2
1617
- max_sequence_length: 2048
1618
- max_request_length: 2049
1619
- client_spec:
1620
- class_name: "helm.clients.openai_client.OpenAIClient"
1621
-
1622
- - name: openai/code-cushman-001
1623
- deprecated: true
1624
- model_name: openai/code-cushman-001
1625
- tokenizer_name: huggingface/gpt2
1626
- max_sequence_length: 2048
1627
- max_request_length: 2049
1628
- client_spec:
1629
- class_name: "helm.clients.openai_client.OpenAIClient"
1630
-
1631
1644
  ## Text Similarity Models
1632
1645
  # OpenAI similarity embedding models: https://beta.openai.com/docs/guides/embeddings
1633
1646
  # The number of parameters is guessed based on the number of parameters of the
1634
1647
  # corresponding GPT-3 model.
1635
- # DEPRECATED: Announced on July 06 2023 that first generation embeddings models
1636
- # will be shut down on January 04 2024.
1637
-
1638
- - name: openai/text-similarity-davinci-001
1639
- deprecated: true
1640
- model_name: openai/text-similarity-davinci-001
1641
- tokenizer_name: huggingface/gpt2
1642
- max_sequence_length: 2048
1643
- max_request_length: 2049
1644
- client_spec:
1645
- class_name: "helm.clients.openai_client.OpenAIClient"
1646
-
1647
- - name: openai/text-similarity-curie-001
1648
- deprecated: true
1649
- model_name: openai/text-similarity-curie-001
1650
- tokenizer_name: huggingface/gpt2
1651
- max_sequence_length: 2048
1652
- max_request_length: 2049
1653
- client_spec:
1654
- class_name: "helm.clients.openai_client.OpenAIClient"
1655
-
1656
- - name: openai/text-similarity-babbage-001
1657
- deprecated: true
1658
- model_name: openai/text-similarity-babbage-001
1659
- tokenizer_name: huggingface/gpt2
1660
- max_sequence_length: 2048
1661
- max_request_length: 2049
1662
- client_spec:
1663
- class_name: "helm.clients.openai_client.OpenAIClient"
1664
-
1665
- - name: openai/text-similarity-ada-001
1666
- deprecated: true
1667
- model_name: openai/text-similarity-ada-001
1668
- tokenizer_name: huggingface/gpt2
1669
- max_sequence_length: 2048
1670
- max_request_length: 2049
1671
- client_spec:
1672
- class_name: "helm.clients.openai_client.OpenAIClient"
1673
1648
 
1674
1649
  # As of 2023-11-07, text-embedding-ada-002 is not deprecated:
1675
1650
  # "We recommend using text-embedding-ada-002 for nearly all use cases."
@@ -1867,10 +1842,37 @@ model_deployments:
1867
1842
  args:
1868
1843
  together_model: meta-llama/Meta-Llama-3-70B
1869
1844
 
1845
+ - name: together/llama-3.1-8b-instruct-turbo
1846
+ model_name: meta/llama-3.1-8b-instruct-turbo
1847
+ tokenizer_name: meta/llama-3.1-8b
1848
+ max_sequence_length: 128000
1849
+ client_spec:
1850
+ class_name: "helm.clients.together_client.TogetherChatClient"
1851
+ args:
1852
+ together_model: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
1853
+
1854
+ - name: together/llama-3.1-70b-instruct-turbo
1855
+ model_name: meta/llama-3.1-70b-instruct-turbo
1856
+ tokenizer_name: meta/llama-3.1-8b
1857
+ max_sequence_length: 128000
1858
+ client_spec:
1859
+ class_name: "helm.clients.together_client.TogetherChatClient"
1860
+ args:
1861
+ together_model: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
1862
+
1863
+ - name: together/llama-3.1-405b-instruct-turbo
1864
+ model_name: meta/llama-3.1-405b-instruct-turbo
1865
+ tokenizer_name: meta/llama-3.1-8b
1866
+ max_sequence_length: 128000
1867
+ client_spec:
1868
+ class_name: "helm.clients.together_client.TogetherChatClient"
1869
+ args:
1870
+ together_model: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
1871
+
1870
1872
  - name: together/llama-3-8b-chat
1871
1873
  model_name: meta/llama-3-8b-chat
1872
1874
  tokenizer_name: meta/llama-3-8b
1873
- max_sequence_length: 8191
1875
+ max_sequence_length: 8182
1874
1876
  client_spec:
1875
1877
  class_name: "helm.clients.together_client.TogetherChatClient"
1876
1878
  args:
@@ -1879,7 +1881,7 @@ model_deployments:
1879
1881
  - name: together/llama-3-70b-chat
1880
1882
  model_name: meta/llama-3-70b-chat
1881
1883
  tokenizer_name: meta/llama-3-8b
1882
- max_sequence_length: 8191
1884
+ max_sequence_length: 8182
1883
1885
  client_spec:
1884
1886
  class_name: "helm.clients.together_client.TogetherChatClient"
1885
1887
  args:
@@ -1903,6 +1905,15 @@ model_deployments:
1903
1905
  args:
1904
1906
  together_model: meta-llama/llamaguard-2-8b
1905
1907
 
1908
+ - name: together/llama-guard-3-8b
1909
+ model_name: meta/llama-guard-3-8b
1910
+ tokenizer_name: meta/llama-3.1-8b
1911
+ max_sequence_length: 128000
1912
+ client_spec:
1913
+ class_name: "helm.clients.together_client.TogetherClient"
1914
+ args:
1915
+ together_model: meta-llama/Meta-Llama-Guard-3-8B
1916
+
1906
1917
  # 01.AI
1907
1918
  - name: together/yi-6b
1908
1919
  model_name: 01-ai/yi-6b
@@ -2029,7 +2040,7 @@ model_deployments:
2029
2040
  - name: together/mixtral-8x7b-instruct-v0.1
2030
2041
  model_name: mistralai/mixtral-8x7b-instruct-v0.1
2031
2042
  tokenizer_name: mistralai/Mistral-7B-v0.1
2032
- max_sequence_length: 4095 # Subtract 1 token to work around a off-by-one bug in Together's input validation token counting (#2080)
2043
+ max_sequence_length: 32767
2033
2044
  client_spec:
2034
2045
  class_name: "helm.clients.together_client.TogetherChatClient"
2035
2046
 
@@ -2165,18 +2176,6 @@ model_deployments:
2165
2176
  args:
2166
2177
  together_model: togethercomputer/RedPajama-INCITE-7B-Instruct
2167
2178
 
2168
- ## Tsinghua
2169
- - name: together/glm
2170
- deprecated: true # Removed from Together
2171
- model_name: tsinghua/glm
2172
- tokenizer_name: TsinghuaKEG/ice
2173
- max_sequence_length: 2048
2174
- max_request_length: 2049
2175
- client_spec:
2176
- class_name: "helm.clients.together_client.TogetherClient"
2177
- window_service_spec:
2178
- class_name: "helm.benchmark.window_services.ice_window_service.ICEWindowService"
2179
-
2180
2179
  - name: thudm/cogview2
2181
2180
  model_name: thudm/cogview2
2182
2181
  tokenizer_name: openai/clip-vit-large-patch14
@@ -2215,22 +2214,6 @@ model_deployments:
2215
2214
  client_spec:
2216
2215
  class_name: "helm.clients.palmyra_client.PalmyraClient"
2217
2216
 
2218
- - name: writer/palmyra-instruct-30
2219
- model_name: writer/palmyra-instruct-30
2220
- tokenizer_name: writer/gpt2
2221
- max_sequence_length: 2048
2222
- max_sequence_and_generated_tokens_length: 2048
2223
- client_spec:
2224
- class_name: "helm.clients.palmyra_client.PalmyraClient"
2225
-
2226
- - name: writer/palmyra-e
2227
- model_name: writer/palmyra-e
2228
- tokenizer_name: writer/gpt2
2229
- max_sequence_length: 2048
2230
- max_sequence_and_generated_tokens_length: 2048
2231
- client_spec:
2232
- class_name: "helm.clients.palmyra_client.PalmyraClient"
2233
-
2234
2217
  - name: writer/silk-road
2235
2218
  model_name: writer/silk-road
2236
2219
  tokenizer_name: writer/gpt2
@@ -2431,4 +2414,4 @@ model_deployments:
2431
2414
  tokenizer_name: openai/cl100k_base
2432
2415
  max_sequence_length: 64000
2433
2416
  client_spec:
2434
- class_name: "helm.clients.reka_client.RekaClient"
2417
+ class_name: "helm.clients.reka_client.RekaClient"