crfm-helm 0.5.8__py3-none-any.whl → 0.5.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/METADATA +3 -1
- {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/RECORD +117 -115
- helm/benchmark/adaptation/adapter_spec.py +5 -0
- helm/benchmark/metrics/bbq_metrics.py +12 -0
- helm/benchmark/metrics/evaluate_reference_metrics.py +12 -0
- helm/benchmark/metrics/safety_metrics.py +13 -1
- helm/benchmark/metrics/ultra_suite_asr_classification_metrics.py +52 -0
- helm/benchmark/presentation/run_display.py +13 -3
- helm/benchmark/presentation/run_entry.py +2 -2
- helm/benchmark/run.py +1 -1
- helm/benchmark/run_specs/arabic_run_specs.py +6 -0
- helm/benchmark/run_specs/medhelm_run_specs.py +2 -2
- helm/benchmark/run_specs/speech_disorder_audio_run_specs.py +6 -2
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +12 -1
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification_scenario.py +24 -54
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_transcription_scenario.py +19 -48
- helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py +22 -61
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py +21 -29
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py +21 -60
- helm/benchmark/scenarios/banking77_scenario.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/bird_sql_scenario.py +18 -0
- helm/benchmark/scenarios/commonsense_scenario.py +7 -1
- helm/benchmark/scenarios/czech_bank_qa_scenario.py +18 -0
- helm/benchmark/scenarios/fin_qa_scenario.py +20 -0
- helm/benchmark/scenarios/financebench_scenario.py +21 -0
- helm/benchmark/scenarios/gsm_scenario.py +9 -3
- helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +12 -1
- helm/benchmark/scenarios/harm_bench_scenario.py +12 -1
- helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py +21 -0
- helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py +19 -0
- helm/benchmark/scenarios/legalbench_scenario.py +6 -7
- helm/benchmark/scenarios/math_scenario.py +11 -4
- helm/benchmark/scenarios/med_qa_scenario.py +7 -1
- helm/benchmark/scenarios/medi_qa_scenario.py +2 -2
- helm/benchmark/scenarios/mmlu_scenario.py +8 -2
- helm/benchmark/scenarios/narrativeqa_scenario.py +3 -4
- helm/benchmark/scenarios/openai_mrcr_scenario.py +15 -0
- helm/benchmark/scenarios/ruler_qa_scenarios.py +40 -0
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +12 -1
- helm/benchmark/scenarios/spider_scenario.py +18 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +95 -0
- helm/benchmark/scenarios/wmt_14_scenario.py +9 -2
- helm/benchmark/static/schema_long_context.yaml +12 -31
- helm/benchmark/static_build/assets/audio-table-Dn5NMMeJ.png +0 -0
- helm/benchmark/static_build/assets/index-qOFpOyHb.js +10 -0
- helm/benchmark/static_build/assets/react-BteFIppM.js +85 -0
- helm/benchmark/static_build/assets/recharts-DxuQtTOs.js +97 -0
- helm/benchmark/static_build/assets/tremor-DR4fE7ko.js +10 -0
- helm/benchmark/static_build/index.html +5 -6
- helm/clients/ai21_client.py +2 -0
- helm/clients/aleph_alpha_client.py +2 -0
- helm/clients/anthropic_client.py +7 -1
- helm/clients/audio_language/diva_llama_client.py +2 -0
- helm/clients/audio_language/llama_omni_client.py +2 -1
- helm/clients/audio_language/qwen2_5_omni_client.py +2 -1
- helm/clients/audio_language/qwen2_audiolm_client.py +2 -1
- helm/clients/audio_language/qwen_audiolm_client.py +2 -1
- helm/clients/bedrock_client.py +2 -0
- helm/clients/cohere_client.py +3 -0
- helm/clients/google_client.py +2 -0
- helm/clients/http_model_client.py +2 -0
- helm/clients/huggingface_client.py +2 -1
- helm/clients/ibm_client.py +3 -1
- helm/clients/image_generation/adobe_vision_client.py +2 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +2 -0
- helm/clients/image_generation/cogview2_client.py +2 -1
- helm/clients/image_generation/dalle2_client.py +2 -0
- helm/clients/image_generation/dalle_mini_client.py +2 -1
- helm/clients/image_generation/deep_floyd_client.py +2 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +2 -1
- helm/clients/image_generation/lexica_client.py +2 -0
- helm/clients/image_generation/mindalle_client.py +2 -1
- helm/clients/image_generation/together_image_generation_client.py +2 -0
- helm/clients/megatron_client.py +2 -0
- helm/clients/mistral_client.py +2 -0
- helm/clients/moderation_api_client.py +2 -0
- helm/clients/openai_client.py +5 -1
- helm/clients/palmyra_client.py +2 -1
- helm/clients/reka_client.py +2 -1
- helm/clients/stanfordhealthcare_azure_openai_client.py +2 -2
- helm/clients/stanfordhealthcare_http_model_client.py +2 -0
- helm/clients/together_client.py +4 -0
- helm/clients/vertexai_client.py +4 -0
- helm/clients/vision_language/huggingface_vision2seq_client.py +2 -1
- helm/clients/vision_language/huggingface_vlm_client.py +2 -0
- helm/clients/vision_language/idefics_client.py +2 -1
- helm/clients/vision_language/open_flamingo_client.py +2 -1
- helm/clients/vision_language/paligemma_client.py +2 -1
- helm/clients/vision_language/palmyra_vision_client.py +2 -0
- helm/clients/vision_language/qwen2_vlm_client.py +2 -1
- helm/clients/vision_language/qwen_vlm_client.py +2 -1
- helm/clients/writer_client.py +2 -0
- helm/common/hierarchical_logger.py +20 -0
- helm/common/optional_dependencies.py +1 -1
- helm/common/test_general.py +4 -0
- helm/config/model_deployments.yaml +225 -0
- helm/config/model_metadata.yaml +232 -7
- helm/config/tokenizer_configs.yaml +74 -4
- helm/benchmark/static_build/assets/index-671a5e06.js +0 -10
- helm/benchmark/static_build/assets/react-f82877fd.js +0 -85
- helm/benchmark/static_build/assets/recharts-4037aff0.js +0 -97
- helm/benchmark/static_build/assets/tremor-38a10867.js +0 -10
- {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/WHEEL +0 -0
- {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/licenses/LICENSE +0 -0
- {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/top_level.txt +0 -0
- /helm/benchmark/static_build/assets/{air-overview-d2e6c49f.png → air-overview-DpBbyagA.png} +0 -0
- /helm/benchmark/static_build/assets/{crfm-logo-74391ab8.png → crfm-logo-Du4T1uWZ.png} +0 -0
- /helm/benchmark/static_build/assets/{heim-logo-3e5e3aa4.png → heim-logo-BJtQlEbV.png} +0 -0
- /helm/benchmark/static_build/assets/{helm-logo-simple-2ed5400b.png → helm-logo-simple-DzOhNN41.png} +0 -0
- /helm/benchmark/static_build/assets/{helm-safety-2907a7b6.png → helm-safety-COfndXuS.png} +0 -0
- /helm/benchmark/static_build/assets/{helmhero-28e90f4d.png → helmhero-D9TvmJsp.png} +0 -0
- /helm/benchmark/static_build/assets/{index-9352595e.css → index-oIeiQW2g.css} +0 -0
- /helm/benchmark/static_build/assets/{medhelm-overview-eac29843.png → medhelm-overview-CND0EIsy.png} +0 -0
- /helm/benchmark/static_build/assets/{medhelm-v1-overview-3ddfcd65.png → medhelm-v1-overview-Cu2tphBB.png} +0 -0
- /helm/benchmark/static_build/assets/{overview-74aea3d8.png → overview-BwypNWnk.png} +0 -0
- /helm/benchmark/static_build/assets/{process-flow-bd2eba96.png → process-flow-DWDJC733.png} +0 -0
- /helm/benchmark/static_build/assets/{vhelm-aspects-1437d673.png → vhelm-aspects-NiDQofvP.png} +0 -0
- /helm/benchmark/static_build/assets/{vhelm-framework-a1ca3f3f.png → vhelm-framework-NxJE4fdA.png} +0 -0
- /helm/benchmark/static_build/assets/{vhelm-model-8afb7616.png → vhelm-model-ypCL5Yvq.png} +0 -0
helm/config/model_metadata.yaml
CHANGED
|
@@ -278,7 +278,7 @@ models:
|
|
|
278
278
|
# https://aws.amazon.com/ai/generative-ai/nova/
|
|
279
279
|
- name: amazon/nova-premier-v1:0
|
|
280
280
|
display_name: Amazon Nova Premier
|
|
281
|
-
description: Amazon Nova Premier is
|
|
281
|
+
description: Amazon Nova Premier is a capable multimodal foundation model and teacher for model distillation that processes text, images, and videos with a one-million token context window. ([model card](https://www.amazon.science/publications/amazon-nova-premier-technical-report-and-model-card), [blog](https://aws.amazon.com/blogs/aws/amazon-nova-premier-our-most-capable-model-for-complex-tasks-and-teacher-for-model-distillation/))
|
|
282
282
|
creator_organization_name: Amazon
|
|
283
283
|
access: limited
|
|
284
284
|
release_date: 2025-04-30
|
|
@@ -286,7 +286,7 @@ models:
|
|
|
286
286
|
|
|
287
287
|
- name: amazon/nova-pro-v1:0
|
|
288
288
|
display_name: Amazon Nova Pro
|
|
289
|
-
description: Amazon Nova Pro
|
|
289
|
+
description: Amazon Nova Pro is a highly capable multimodal model that balances of accuracy, speed, and cost for a wide range of tasks ([model card](https://www.amazon.science/publications/the-amazon-nova-family-of-models-technical-report-and-model-card))
|
|
290
290
|
creator_organization_name: Amazon
|
|
291
291
|
access: limited
|
|
292
292
|
release_date: 2024-12-03
|
|
@@ -294,7 +294,7 @@ models:
|
|
|
294
294
|
|
|
295
295
|
- name: amazon/nova-lite-v1:0
|
|
296
296
|
display_name: Amazon Nova Lite
|
|
297
|
-
description: Amazon Nova Lite
|
|
297
|
+
description: Amazon Nova Lite is a low-cost multimodal model that is fast for processing images, video, documents and text. ([model card](https://www.amazon.science/publications/the-amazon-nova-family-of-models-technical-report-and-model-card))
|
|
298
298
|
creator_organization_name: Amazon
|
|
299
299
|
access: limited
|
|
300
300
|
release_date: 2024-12-03
|
|
@@ -302,7 +302,7 @@ models:
|
|
|
302
302
|
|
|
303
303
|
- name: amazon/nova-micro-v1:0
|
|
304
304
|
display_name: Amazon Nova Micro
|
|
305
|
-
description: Amazon Nova Micro
|
|
305
|
+
description: Amazon Nova Micro is a text-only model that delivers low-latency responses at low cost. ([model card](https://www.amazon.science/publications/the-amazon-nova-family-of-models-technical-report-and-model-card))
|
|
306
306
|
creator_organization_name: Amazon
|
|
307
307
|
access: limited
|
|
308
308
|
release_date: 2024-12-03
|
|
@@ -555,6 +555,14 @@ models:
|
|
|
555
555
|
release_date: 2025-05-14
|
|
556
556
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
557
557
|
|
|
558
|
+
- name: anthropic/claude-sonnet-4-5-20250929
|
|
559
|
+
display_name: Claude 4.5 Sonnet (20250929)
|
|
560
|
+
description: Claude 4.5 Sonnet is a model from Anthropic that shows particular strengths in software coding, in agentic tasks where it runs in a loop and uses tools, and in using computers. ([blog](https://www.anthropic.com/news/claude-sonnet-4-5), [system card](https://assets.anthropic.com/m/12f214efcc2f457a/original/Claude-Sonnet-4-5-System-Card.pdf))
|
|
561
|
+
creator_organization_name: Anthropic
|
|
562
|
+
access: limited
|
|
563
|
+
release_date: 2025-09-29
|
|
564
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
565
|
+
|
|
558
566
|
- name: anthropic/stanford-online-all-v4-s3
|
|
559
567
|
display_name: Anthropic-LM v4-s3 (52B)
|
|
560
568
|
description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
|
|
@@ -946,6 +954,24 @@ models:
|
|
|
946
954
|
release_date: 2025-01-20
|
|
947
955
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
948
956
|
|
|
957
|
+
- name: deepseek-ai/deepseek-r1-distill-llama-70b
|
|
958
|
+
display_name: DeepSeek-R1-Distill-Llama-70B
|
|
959
|
+
description: DeepSeek-R1-Distill-Llama-70B is a fine-tuned open-source models based on Llama-3.3-70B-Instruct using samples generated by DeepSeek-R1.
|
|
960
|
+
creator_organization_name: DeepSeek
|
|
961
|
+
access: open
|
|
962
|
+
num_parameters: 70600000000
|
|
963
|
+
release_date: 2025-01-20
|
|
964
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
965
|
+
|
|
966
|
+
- name: deepseek-ai/deepseek-r1-distill-qwen-14b
|
|
967
|
+
display_name: DeepSeek-R1-Distill-Qwen-14B
|
|
968
|
+
description: DeepSeek-R1-Distill-Qwen-14B is a fine-tuned open-source models based on Qwen2.5-14B using samples generated by DeepSeek-R1.
|
|
969
|
+
creator_organization_name: DeepSeek
|
|
970
|
+
access: open
|
|
971
|
+
num_parameters: 14800000000
|
|
972
|
+
release_date: 2025-01-20
|
|
973
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
974
|
+
|
|
949
975
|
- name: deepseek-ai/deepseek-coder-6.7b-instruct
|
|
950
976
|
display_name: DeepSeek-Coder-6.7b-Instruct
|
|
951
977
|
description: DeepSeek-Coder-6.7b-Instruct is a model that is fine-tuned from the LLaMA 6.7B model for the DeepSeek-Coder task.
|
|
@@ -1207,7 +1233,7 @@ models:
|
|
|
1207
1233
|
|
|
1208
1234
|
- name: google/gemini-2.0-flash-001
|
|
1209
1235
|
display_name: Gemini 2.0 Flash
|
|
1210
|
-
description: Gemini 2.0 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1236
|
+
description: Gemini 2.0 Flash is a member of the Gemini 2.0 series of models, a suite of highly-capable, natively multimodal models designed to power agentic systems. ([model card](https://storage.googleapis.com/model-cards/documents/gemini-2-flash.pdf), [documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1211
1237
|
creator_organization_name: Google
|
|
1212
1238
|
access: limited
|
|
1213
1239
|
release_date: 2025-02-01
|
|
@@ -1215,7 +1241,7 @@ models:
|
|
|
1215
1241
|
|
|
1216
1242
|
- name: google/gemini-2.0-flash-lite-preview-02-05
|
|
1217
1243
|
display_name: Gemini 2.0 Flash Lite (02-05 preview)
|
|
1218
|
-
description: Gemini 2.0 Flash Lite (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1244
|
+
description: Gemini 2.0 Flash Lite (02-05 preview) ([model card](https://storage.googleapis.com/model-cards/documents/gemini-2-flash.pdf), [documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1219
1245
|
creator_organization_name: Google
|
|
1220
1246
|
access: limited
|
|
1221
1247
|
release_date: 2025-02-05
|
|
@@ -1223,7 +1249,7 @@ models:
|
|
|
1223
1249
|
|
|
1224
1250
|
- name: google/gemini-2.0-flash-lite-001
|
|
1225
1251
|
display_name: Gemini 2.0 Flash Lite
|
|
1226
|
-
description: Gemini 2.0 Flash Lite ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1252
|
+
description: Gemini 2.0 Flash Lite is the fastest and most cost efficient Flash model in the Gemini 2.0 series of models, a suite of highly-capable, natively multimodal models designed to power agentic systems. ([model card](https://storage.googleapis.com/model-cards/documents/gemini-2-flash.pdf), [documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1227
1253
|
creator_organization_name: Google
|
|
1228
1254
|
access: limited
|
|
1229
1255
|
release_date: 2025-03-25
|
|
@@ -2581,6 +2607,14 @@ models:
|
|
|
2581
2607
|
release_date: 2025-05-07
|
|
2582
2608
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2583
2609
|
|
|
2610
|
+
- name: mistralai/mistral-medium-3.1
|
|
2611
|
+
display_name: Mistral Medium 3.1
|
|
2612
|
+
description: Mistral Medium 3.1 is a language model that is intended to to deliver state-of-the-art performance at lower cost. ([blog](https://mistral.ai/news/mistral-medium-3))
|
|
2613
|
+
creator_organization_name: Mistral AI
|
|
2614
|
+
access: limited
|
|
2615
|
+
release_date: 2025-05-07
|
|
2616
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2617
|
+
|
|
2584
2618
|
- name: mistralai/mistral-large-2402
|
|
2585
2619
|
display_name: Mistral Large (2402)
|
|
2586
2620
|
description: Mistral Large is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
|
|
@@ -3598,6 +3632,14 @@ models:
|
|
|
3598
3632
|
release_date: 2025-04-29
|
|
3599
3633
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3600
3634
|
|
|
3635
|
+
- name: qwen/qwen3-next-80b-a3b-thinking
|
|
3636
|
+
display_name: Qwen3-Next 80B A3B Thinking
|
|
3637
|
+
description: Qwen3-Next is a new model architecture for improving training and inference efficiency under long-context and large-parameter settings. Compared to the MoE structure of Qwen3, Qwen3-Next introduces a hybrid attention mechanism, a highly sparse Mixture-of-Experts (MoE) structure, training-stability-friendly optimizations, and a multi-token prediction mechanism for faster inference. ([blog](https://qwen.ai/blog?id=4074cca80393150c248e508aa62983f9cb7d27cd&from=research.latest-advancements-list))
|
|
3638
|
+
creator_organization_name: Qwen
|
|
3639
|
+
access: open
|
|
3640
|
+
release_date: 2025-07-21 # https://x.com/Alibaba_Qwen/status/1947344511988076547
|
|
3641
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3642
|
+
|
|
3601
3643
|
- name: qwen/qwen3-235b-a22b-instruct-2507-fp8
|
|
3602
3644
|
display_name: Qwen3 235B A22B Instruct 2507 FP8
|
|
3603
3645
|
description: Qwen3 235B A22B Instruct 2507 FP8 is an updated version of the non-thinking mode of Qwen3 235B A22B FP8.
|
|
@@ -3949,7 +3991,190 @@ models:
|
|
|
3949
3991
|
release_date: 2023-05-25
|
|
3950
3992
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
3951
3993
|
|
|
3994
|
+
- name: tiiuae/falcon3-1b-instruct
|
|
3995
|
+
display_name: Falcon3-1B-Instruct
|
|
3996
|
+
description: Falcon3-1B-Instruct is an open-weights foundation model that supports 4 languages (English, French, Spanish, Portuguese) that was trained on 14T tokens.
|
|
3997
|
+
creator_organization_name: TII UAE
|
|
3998
|
+
access: open
|
|
3999
|
+
num_parameters: 1670000000
|
|
4000
|
+
release_date: 2024-12-17 # https://huggingface.co/docs/transformers/main/en/model_doc/falcon3
|
|
4001
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4002
|
+
|
|
4003
|
+
- name: tiiuae/falcon3-3b-instruct
|
|
4004
|
+
display_name: Falcon3-3B-Instruct
|
|
4005
|
+
description: Falcon3-3B-Instruct is an open-weights foundation model that supports 4 languages (English, French, Spanish, Portuguese) that was trained on 14T tokens.
|
|
4006
|
+
creator_organization_name: TII UAE
|
|
4007
|
+
access: open
|
|
4008
|
+
num_parameters: 3230000000
|
|
4009
|
+
release_date: 2024-12-17 # https://huggingface.co/docs/transformers/main/en/model_doc/falcon3
|
|
4010
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4011
|
+
|
|
4012
|
+
- name: tiiuae/falcon3-7b-instruct
|
|
4013
|
+
display_name: Falcon3-7B-Instruct
|
|
4014
|
+
description: Falcon3-7B-Instruct is an open-weights foundation model that supports 4 languages (English, French, Spanish, Portuguese) that was trained on 14T tokens.
|
|
4015
|
+
creator_organization_name: TII UAE
|
|
4016
|
+
access: open
|
|
4017
|
+
num_parameters: 7460000000
|
|
4018
|
+
release_date: 2024-12-17 # https://huggingface.co/docs/transformers/main/en/model_doc/falcon3
|
|
4019
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4020
|
+
|
|
4021
|
+
- name: tiiuae/falcon3-10b-instruct
|
|
4022
|
+
display_name: Falcon3-10B-Instruct
|
|
4023
|
+
description: Falcon3-10B-Instruct is an open-weights foundation model that supports 4 languages (English, French, Spanish, Portuguese) that was trained on 14T tokens.
|
|
4024
|
+
creator_organization_name: TII UAE
|
|
4025
|
+
access: open
|
|
4026
|
+
num_parameters: 10300000000
|
|
4027
|
+
release_date: 2024-12-17 # https://huggingface.co/docs/transformers/main/en/model_doc/falcon3
|
|
4028
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4029
|
+
|
|
4030
|
+
# AceGPT-v2
|
|
4031
|
+
- name: freedomintelligence/acegpt-v2-8b-chat
|
|
4032
|
+
display_name: AceGPT-v2-8B-Chat
|
|
4033
|
+
description: AceGPT is a fully fine-tuned generative text model collection, particularly focused on the Arabic language domain. AceGPT-v2-8B-Chat is based on Meta-Llama-3-8B. ([paper](https://arxiv.org/abs/2412.12310))
|
|
4034
|
+
creator_organization_name: FreedomAI
|
|
4035
|
+
access: open
|
|
4036
|
+
num_parameters: 8030000000
|
|
4037
|
+
release_date: 2024-10-20
|
|
4038
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4039
|
+
|
|
4040
|
+
- name: freedomintelligence/acegpt-v2-32b-chat
|
|
4041
|
+
display_name: AceGPT-v2-32B-Chat
|
|
4042
|
+
description: AceGPT is a fully fine-tuned generative text model collection, particularly focused on the Arabic language domain. AceGPT-v2-32B-Chat is based on Qwen1.5-32B. ([paper](https://arxiv.org/abs/2412.12310))
|
|
4043
|
+
creator_organization_name: FreedomAI
|
|
4044
|
+
access: open
|
|
4045
|
+
num_parameters: 32500000000
|
|
4046
|
+
release_date: 2024-10-20
|
|
4047
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3952
4048
|
|
|
4049
|
+
- name: freedomintelligence/acegpt-v2-70b-chat
|
|
4050
|
+
display_name: AceGPT-v2-70B-Chat
|
|
4051
|
+
description: AceGPT is a fully fine-tuned generative text model collection, particularly focused on the Arabic language domain. AceGPT-v2-70B-Chat is based on Meta-Llama-3-70B. ([paper](https://arxiv.org/abs/2412.12310))
|
|
4052
|
+
creator_organization_name: FreedomAI
|
|
4053
|
+
access: open
|
|
4054
|
+
num_parameters: 70600000000
|
|
4055
|
+
release_date: 2024-10-20
|
|
4056
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4057
|
+
|
|
4058
|
+
# ALLaM
|
|
4059
|
+
- name: allam-ai/allam-7b-instruct-preview
|
|
4060
|
+
display_name: ALLaM-7B-Instruct-preview
|
|
4061
|
+
description: ALLaM-7B-Instruct-preview is a model designed to advance Arabic language technology, which used a recipe of training on 4T English tokens followed by training on 1.2T mixed Arabic/English tokens. ([paper](https://arxiv.org/abs/2407.15390v1))
|
|
4062
|
+
creator_organization_name: NCAI & SDAIA
|
|
4063
|
+
access: open
|
|
4064
|
+
num_parameters: 7000000000
|
|
4065
|
+
release_date: 2024-07-22
|
|
4066
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4067
|
+
|
|
4068
|
+
# SILMA
|
|
4069
|
+
- name: silma-ai/silma-9b-instruct-v1.0
|
|
4070
|
+
display_name: SILMA 9B
|
|
4071
|
+
description: SILMA 9B is a compact Arabic language model based on Google Gemma. ([model card](https://huggingface.co/silma-ai/SILMA-9B-Instruct-v1.0))
|
|
4072
|
+
creator_organization_name: SILMA AI
|
|
4073
|
+
access: open
|
|
4074
|
+
num_parameters: 9240000000
|
|
4075
|
+
release_date: 2024-08-17
|
|
4076
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4077
|
+
|
|
4078
|
+
# Jais Family
|
|
4079
|
+
|
|
4080
|
+
- name: inceptionai/jais-family-590m-chat
|
|
4081
|
+
display_name: Jais-family-590m-chat
|
|
4082
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4083
|
+
creator_organization_name: Inception
|
|
4084
|
+
access: open
|
|
4085
|
+
num_parameters: 771000000
|
|
4086
|
+
release_date: 2023-08-30
|
|
4087
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4088
|
+
|
|
4089
|
+
- name: inceptionai/jais-family-1p3b-chat
|
|
4090
|
+
display_name: Jais-family-1p3b-chat
|
|
4091
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4092
|
+
creator_organization_name: Inception
|
|
4093
|
+
access: open
|
|
4094
|
+
num_parameters: 1560000000
|
|
4095
|
+
release_date: 2023-08-30
|
|
4096
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4097
|
+
|
|
4098
|
+
- name: inceptionai/jais-family-2p7b-chat
|
|
4099
|
+
display_name: Jais-family-2p7b-chat
|
|
4100
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4101
|
+
creator_organization_name: Inception
|
|
4102
|
+
access: open
|
|
4103
|
+
num_parameters: 2950000000
|
|
4104
|
+
release_date: 2023-08-30
|
|
4105
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4106
|
+
|
|
4107
|
+
- name: inceptionai/jais-family-6p7b-chat
|
|
4108
|
+
display_name: Jais-family-6p7b-chat
|
|
4109
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4110
|
+
creator_organization_name: Inception
|
|
4111
|
+
access: open
|
|
4112
|
+
num_parameters: 7140000000
|
|
4113
|
+
release_date: 2023-08-30
|
|
4114
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4115
|
+
|
|
4116
|
+
- name: inceptionai/jais-family-6p7b-chat
|
|
4117
|
+
display_name: Jais-family-6p7b-chat
|
|
4118
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4119
|
+
creator_organization_name: Inception
|
|
4120
|
+
access: open
|
|
4121
|
+
num_parameters: 7140000000
|
|
4122
|
+
release_date: 2023-08-30
|
|
4123
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4124
|
+
|
|
4125
|
+
- name: inceptionai/jais-family-13b-chat
|
|
4126
|
+
display_name: Jais-family-13b-chat
|
|
4127
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4128
|
+
creator_organization_name: Inception
|
|
4129
|
+
access: open
|
|
4130
|
+
num_parameters: 13500000000
|
|
4131
|
+
release_date: 2023-08-30
|
|
4132
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4133
|
+
|
|
4134
|
+
- name: inceptionai/jais-family-30b-8k-chat
|
|
4135
|
+
display_name: Jais-family-30b-8k-chat
|
|
4136
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4137
|
+
creator_organization_name: Inception
|
|
4138
|
+
access: open
|
|
4139
|
+
num_parameters: 30800000000
|
|
4140
|
+
release_date: 2023-08-30
|
|
4141
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4142
|
+
|
|
4143
|
+
- name: inceptionai/jais-family-30b-16k-chat
|
|
4144
|
+
display_name: Jais-family-30b-16k-chat
|
|
4145
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4146
|
+
creator_organization_name: Inception
|
|
4147
|
+
access: open
|
|
4148
|
+
num_parameters: 30800000000
|
|
4149
|
+
release_date: 2023-08-30
|
|
4150
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4151
|
+
|
|
4152
|
+
- name: inceptionai/jais-adapted-7b-chat
|
|
4153
|
+
display_name: Jais-adapted-7b-chat
|
|
4154
|
+
description: The Jais adapted models are bilingual English-Arabic large language models (LLMs) that are trained adaptively from Llama-2 and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4155
|
+
creator_organization_name: Inception
|
|
4156
|
+
access: open
|
|
4157
|
+
num_parameters: 7000000000
|
|
4158
|
+
release_date: 2023-08-30
|
|
4159
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4160
|
+
|
|
4161
|
+
- name: inceptionai/jais-adapted-13b-chat
|
|
4162
|
+
display_name: Jais-adapted-13b-chat
|
|
4163
|
+
description: The Jais adapted models are bilingual English-Arabic large language models (LLMs) that are trained adaptively from Llama-2 and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4164
|
+
creator_organization_name: Inception
|
|
4165
|
+
access: open
|
|
4166
|
+
num_parameters: 13300000000
|
|
4167
|
+
release_date: 2023-08-30
|
|
4168
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4169
|
+
|
|
4170
|
+
- name: inceptionai/jais-adapted-70b-chat
|
|
4171
|
+
display_name: Jais-adapted-70b-chat
|
|
4172
|
+
description: The Jais adapted models are bilingual English-Arabic large language models (LLMs) that are trained adaptively from Llama-2 and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4173
|
+
creator_organization_name: Inception
|
|
4174
|
+
access: open
|
|
4175
|
+
num_parameters: 69500000000
|
|
4176
|
+
release_date: 2023-08-30
|
|
4177
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3953
4178
|
|
|
3954
4179
|
# Together
|
|
3955
4180
|
- name: together/gpt-jt-6b-v1
|
|
@@ -460,7 +460,7 @@ tokenizer_configs:
|
|
|
460
460
|
|
|
461
461
|
# Allen Institute for AI
|
|
462
462
|
# The allenai/olmo-7b requires Python 3.9 or newer.
|
|
463
|
-
# To use the allenai/olmo-7b tokenizer, run `pip install crfm-helm[allenai]` first.
|
|
463
|
+
# To use the allenai/olmo-7b tokenizer, run `pip install "crfm-helm[allenai]"` first.
|
|
464
464
|
- name: allenai/olmo-7b
|
|
465
465
|
tokenizer_spec:
|
|
466
466
|
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
@@ -717,6 +717,12 @@ tokenizer_configs:
|
|
|
717
717
|
end_of_text_token: "<|im_end|>"
|
|
718
718
|
prefix_token: ""
|
|
719
719
|
|
|
720
|
+
- name: qwen/qwen3-next-80b-a3b-thinking
|
|
721
|
+
tokenizer_spec:
|
|
722
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
723
|
+
end_of_text_token: "<|im_end|>"
|
|
724
|
+
prefix_token: ""
|
|
725
|
+
|
|
720
726
|
- name: qwen/qwq-32b-preview
|
|
721
727
|
tokenizer_spec:
|
|
722
728
|
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
@@ -797,6 +803,12 @@ tokenizer_configs:
|
|
|
797
803
|
end_of_text_token: "<|endoftext|>"
|
|
798
804
|
prefix_token: ""
|
|
799
805
|
|
|
806
|
+
- name: tiiuae/falcon3-1b-instruct
|
|
807
|
+
tokenizer_spec:
|
|
808
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
809
|
+
end_of_text_token: "<|endoftext|>"
|
|
810
|
+
prefix_token: ""
|
|
811
|
+
|
|
800
812
|
# TsinghuaKEG
|
|
801
813
|
- name: TsinghuaKEG/ice
|
|
802
814
|
tokenizer_spec:
|
|
@@ -1075,8 +1087,6 @@ tokenizer_configs:
|
|
|
1075
1087
|
end_of_text_token: "<|endoftext|>"
|
|
1076
1088
|
prefix_token: ""
|
|
1077
1089
|
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
1090
|
# DeepSeek-R1-Distill-Llama-3.1-8b
|
|
1081
1091
|
- name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
|
|
1082
1092
|
tokenizer_spec:
|
|
@@ -1086,6 +1096,20 @@ tokenizer_configs:
|
|
|
1086
1096
|
end_of_text_token: "<|end▁of▁sentence|>"
|
|
1087
1097
|
prefix_token: "<|begin▁of▁sentence|>"
|
|
1088
1098
|
|
|
1099
|
+
# DeepSeek-R1-Distill-Llama-3.1-8b
|
|
1100
|
+
- name: deepseek-ai/deepseek-r1-distill-llama-70b
|
|
1101
|
+
tokenizer_spec:
|
|
1102
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1103
|
+
end_of_text_token: "<|end▁of▁sentence|>"
|
|
1104
|
+
prefix_token: "<|begin▁of▁sentence|>"
|
|
1105
|
+
|
|
1106
|
+
# DeepSeek-R1-Distill-Qwen-14B
|
|
1107
|
+
- name: deepseek-ai/deepseek-r1-distill-qwen-14b
|
|
1108
|
+
tokenizer_spec:
|
|
1109
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1110
|
+
end_of_text_token: "<|end▁of▁sentence|>"
|
|
1111
|
+
prefix_token: "<|begin▁of▁sentence|>"
|
|
1112
|
+
|
|
1089
1113
|
# deepseek-ai/deepseek-coder-6.7b-instruct
|
|
1090
1114
|
- name: deepseek-ai/deepseek-coder-6.7b-instruct
|
|
1091
1115
|
tokenizer_spec:
|
|
@@ -1095,7 +1119,6 @@ tokenizer_configs:
|
|
|
1095
1119
|
end_of_text_token: "<|end▁of▁sentence|>"
|
|
1096
1120
|
prefix_token: "<|begin▁of▁sentence|>"
|
|
1097
1121
|
|
|
1098
|
-
|
|
1099
1122
|
# vilm/vinallama-2.7b-chat
|
|
1100
1123
|
- name: vilm/vinallama-2.7b-chat
|
|
1101
1124
|
tokenizer_spec:
|
|
@@ -1203,3 +1226,50 @@ tokenizer_configs:
|
|
|
1203
1226
|
pretrained_model_name_or_path: nicholasKluge/TeenyTinyLlama-460m
|
|
1204
1227
|
end_of_text_token: "</s>"
|
|
1205
1228
|
prefix_token: "<s>"
|
|
1229
|
+
|
|
1230
|
+
# AceGPT-v2
|
|
1231
|
+
- name: freedomintelligence/acegpt-v2-8b-chat
|
|
1232
|
+
tokenizer_spec:
|
|
1233
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1234
|
+
end_of_text_token: "<|end_of_text|>"
|
|
1235
|
+
prefix_token: "<|begin_of_text|>"
|
|
1236
|
+
|
|
1237
|
+
- name: freedomintelligence/acegpt-v2-32b-chat
|
|
1238
|
+
tokenizer_spec:
|
|
1239
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1240
|
+
end_of_text_token: "<|endoftext|>"
|
|
1241
|
+
prefix_token: ""
|
|
1242
|
+
|
|
1243
|
+
- name: freedomintelligence/acegpt-v2-70b-chat
|
|
1244
|
+
tokenizer_spec:
|
|
1245
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1246
|
+
end_of_text_token: "<|end_of_text|>"
|
|
1247
|
+
prefix_token: "<|begin_of_text|>"
|
|
1248
|
+
|
|
1249
|
+
# ALLaM
|
|
1250
|
+
- name: allam-ai/allam-7b-instruct-preview
|
|
1251
|
+
tokenizer_spec:
|
|
1252
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1253
|
+
end_of_text_token: "</s>"
|
|
1254
|
+
prefix_token: "<s>"
|
|
1255
|
+
|
|
1256
|
+
# SILMA
|
|
1257
|
+
- name: silma-ai/silma-9b-instruct-v1.0
|
|
1258
|
+
tokenizer_spec:
|
|
1259
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1260
|
+
end_of_text_token: "<eos>"
|
|
1261
|
+
prefix_token: "<bos>"
|
|
1262
|
+
|
|
1263
|
+
# Jais Family
|
|
1264
|
+
- name: inceptionai/jais-family-590m-chat
|
|
1265
|
+
tokenizer_spec:
|
|
1266
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1267
|
+
end_of_text_token: "<|endoftext|>"
|
|
1268
|
+
prefix_token: "<|endoftext|>"
|
|
1269
|
+
|
|
1270
|
+
# Jais Adapted
|
|
1271
|
+
- name: inceptionai/jais-adapted-7b-chat
|
|
1272
|
+
tokenizer_spec:
|
|
1273
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1274
|
+
end_of_text_token: "</s>"
|
|
1275
|
+
prefix_token: "<s>"
|