crfm-helm 0.5.8__py3-none-any.whl → 0.5.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (121) hide show
  1. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/METADATA +3 -1
  2. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/RECORD +117 -115
  3. helm/benchmark/adaptation/adapter_spec.py +5 -0
  4. helm/benchmark/metrics/bbq_metrics.py +12 -0
  5. helm/benchmark/metrics/evaluate_reference_metrics.py +12 -0
  6. helm/benchmark/metrics/safety_metrics.py +13 -1
  7. helm/benchmark/metrics/ultra_suite_asr_classification_metrics.py +52 -0
  8. helm/benchmark/presentation/run_display.py +13 -3
  9. helm/benchmark/presentation/run_entry.py +2 -2
  10. helm/benchmark/run.py +1 -1
  11. helm/benchmark/run_specs/arabic_run_specs.py +6 -0
  12. helm/benchmark/run_specs/medhelm_run_specs.py +2 -2
  13. helm/benchmark/run_specs/speech_disorder_audio_run_specs.py +6 -2
  14. helm/benchmark/scenarios/anthropic_red_team_scenario.py +12 -1
  15. helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification_scenario.py +24 -54
  16. helm/benchmark/scenarios/audio_language/ultra_suite_asr_transcription_scenario.py +19 -48
  17. helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py +22 -61
  18. helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py +21 -29
  19. helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py +21 -60
  20. helm/benchmark/scenarios/banking77_scenario.py +21 -0
  21. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  22. helm/benchmark/scenarios/bird_sql_scenario.py +18 -0
  23. helm/benchmark/scenarios/commonsense_scenario.py +7 -1
  24. helm/benchmark/scenarios/czech_bank_qa_scenario.py +18 -0
  25. helm/benchmark/scenarios/fin_qa_scenario.py +20 -0
  26. helm/benchmark/scenarios/financebench_scenario.py +21 -0
  27. helm/benchmark/scenarios/gsm_scenario.py +9 -3
  28. helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +12 -1
  29. helm/benchmark/scenarios/harm_bench_scenario.py +12 -1
  30. helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py +21 -0
  31. helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py +19 -0
  32. helm/benchmark/scenarios/legalbench_scenario.py +6 -7
  33. helm/benchmark/scenarios/math_scenario.py +11 -4
  34. helm/benchmark/scenarios/med_qa_scenario.py +7 -1
  35. helm/benchmark/scenarios/medi_qa_scenario.py +2 -2
  36. helm/benchmark/scenarios/mmlu_scenario.py +8 -2
  37. helm/benchmark/scenarios/narrativeqa_scenario.py +3 -4
  38. helm/benchmark/scenarios/openai_mrcr_scenario.py +15 -0
  39. helm/benchmark/scenarios/ruler_qa_scenarios.py +40 -0
  40. helm/benchmark/scenarios/simple_safety_tests_scenario.py +12 -1
  41. helm/benchmark/scenarios/spider_scenario.py +18 -0
  42. helm/benchmark/scenarios/thai_exam_scenario.py +95 -0
  43. helm/benchmark/scenarios/wmt_14_scenario.py +9 -2
  44. helm/benchmark/static/schema_long_context.yaml +12 -31
  45. helm/benchmark/static_build/assets/audio-table-Dn5NMMeJ.png +0 -0
  46. helm/benchmark/static_build/assets/index-qOFpOyHb.js +10 -0
  47. helm/benchmark/static_build/assets/react-BteFIppM.js +85 -0
  48. helm/benchmark/static_build/assets/recharts-DxuQtTOs.js +97 -0
  49. helm/benchmark/static_build/assets/tremor-DR4fE7ko.js +10 -0
  50. helm/benchmark/static_build/index.html +5 -6
  51. helm/clients/ai21_client.py +2 -0
  52. helm/clients/aleph_alpha_client.py +2 -0
  53. helm/clients/anthropic_client.py +7 -1
  54. helm/clients/audio_language/diva_llama_client.py +2 -0
  55. helm/clients/audio_language/llama_omni_client.py +2 -1
  56. helm/clients/audio_language/qwen2_5_omni_client.py +2 -1
  57. helm/clients/audio_language/qwen2_audiolm_client.py +2 -1
  58. helm/clients/audio_language/qwen_audiolm_client.py +2 -1
  59. helm/clients/bedrock_client.py +2 -0
  60. helm/clients/cohere_client.py +3 -0
  61. helm/clients/google_client.py +2 -0
  62. helm/clients/http_model_client.py +2 -0
  63. helm/clients/huggingface_client.py +2 -1
  64. helm/clients/ibm_client.py +3 -1
  65. helm/clients/image_generation/adobe_vision_client.py +2 -0
  66. helm/clients/image_generation/aleph_alpha_image_generation_client.py +2 -0
  67. helm/clients/image_generation/cogview2_client.py +2 -1
  68. helm/clients/image_generation/dalle2_client.py +2 -0
  69. helm/clients/image_generation/dalle_mini_client.py +2 -1
  70. helm/clients/image_generation/deep_floyd_client.py +2 -0
  71. helm/clients/image_generation/huggingface_diffusers_client.py +2 -1
  72. helm/clients/image_generation/lexica_client.py +2 -0
  73. helm/clients/image_generation/mindalle_client.py +2 -1
  74. helm/clients/image_generation/together_image_generation_client.py +2 -0
  75. helm/clients/megatron_client.py +2 -0
  76. helm/clients/mistral_client.py +2 -0
  77. helm/clients/moderation_api_client.py +2 -0
  78. helm/clients/openai_client.py +5 -1
  79. helm/clients/palmyra_client.py +2 -1
  80. helm/clients/reka_client.py +2 -1
  81. helm/clients/stanfordhealthcare_azure_openai_client.py +2 -2
  82. helm/clients/stanfordhealthcare_http_model_client.py +2 -0
  83. helm/clients/together_client.py +4 -0
  84. helm/clients/vertexai_client.py +4 -0
  85. helm/clients/vision_language/huggingface_vision2seq_client.py +2 -1
  86. helm/clients/vision_language/huggingface_vlm_client.py +2 -0
  87. helm/clients/vision_language/idefics_client.py +2 -1
  88. helm/clients/vision_language/open_flamingo_client.py +2 -1
  89. helm/clients/vision_language/paligemma_client.py +2 -1
  90. helm/clients/vision_language/palmyra_vision_client.py +2 -0
  91. helm/clients/vision_language/qwen2_vlm_client.py +2 -1
  92. helm/clients/vision_language/qwen_vlm_client.py +2 -1
  93. helm/clients/writer_client.py +2 -0
  94. helm/common/hierarchical_logger.py +20 -0
  95. helm/common/optional_dependencies.py +1 -1
  96. helm/common/test_general.py +4 -0
  97. helm/config/model_deployments.yaml +225 -0
  98. helm/config/model_metadata.yaml +232 -7
  99. helm/config/tokenizer_configs.yaml +74 -4
  100. helm/benchmark/static_build/assets/index-671a5e06.js +0 -10
  101. helm/benchmark/static_build/assets/react-f82877fd.js +0 -85
  102. helm/benchmark/static_build/assets/recharts-4037aff0.js +0 -97
  103. helm/benchmark/static_build/assets/tremor-38a10867.js +0 -10
  104. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/WHEEL +0 -0
  105. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/entry_points.txt +0 -0
  106. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/licenses/LICENSE +0 -0
  107. {crfm_helm-0.5.8.dist-info → crfm_helm-0.5.9.dist-info}/top_level.txt +0 -0
  108. /helm/benchmark/static_build/assets/{air-overview-d2e6c49f.png → air-overview-DpBbyagA.png} +0 -0
  109. /helm/benchmark/static_build/assets/{crfm-logo-74391ab8.png → crfm-logo-Du4T1uWZ.png} +0 -0
  110. /helm/benchmark/static_build/assets/{heim-logo-3e5e3aa4.png → heim-logo-BJtQlEbV.png} +0 -0
  111. /helm/benchmark/static_build/assets/{helm-logo-simple-2ed5400b.png → helm-logo-simple-DzOhNN41.png} +0 -0
  112. /helm/benchmark/static_build/assets/{helm-safety-2907a7b6.png → helm-safety-COfndXuS.png} +0 -0
  113. /helm/benchmark/static_build/assets/{helmhero-28e90f4d.png → helmhero-D9TvmJsp.png} +0 -0
  114. /helm/benchmark/static_build/assets/{index-9352595e.css → index-oIeiQW2g.css} +0 -0
  115. /helm/benchmark/static_build/assets/{medhelm-overview-eac29843.png → medhelm-overview-CND0EIsy.png} +0 -0
  116. /helm/benchmark/static_build/assets/{medhelm-v1-overview-3ddfcd65.png → medhelm-v1-overview-Cu2tphBB.png} +0 -0
  117. /helm/benchmark/static_build/assets/{overview-74aea3d8.png → overview-BwypNWnk.png} +0 -0
  118. /helm/benchmark/static_build/assets/{process-flow-bd2eba96.png → process-flow-DWDJC733.png} +0 -0
  119. /helm/benchmark/static_build/assets/{vhelm-aspects-1437d673.png → vhelm-aspects-NiDQofvP.png} +0 -0
  120. /helm/benchmark/static_build/assets/{vhelm-framework-a1ca3f3f.png → vhelm-framework-NxJE4fdA.png} +0 -0
  121. /helm/benchmark/static_build/assets/{vhelm-model-8afb7616.png → vhelm-model-ypCL5Yvq.png} +0 -0
@@ -278,7 +278,7 @@ models:
278
278
  # https://aws.amazon.com/ai/generative-ai/nova/
279
279
  - name: amazon/nova-premier-v1:0
280
280
  display_name: Amazon Nova Premier
281
- description: Amazon Nova Premier is the most capable model in the Nova family of foundation models. ([blog](https://aws.amazon.com/blogs/aws/amazon-nova-premier-our-most-capable-model-for-complex-tasks-and-teacher-for-model-distillation/))
281
+ description: Amazon Nova Premier is a capable multimodal foundation model and teacher for model distillation that processes text, images, and videos with a one-million token context window. ([model card](https://www.amazon.science/publications/amazon-nova-premier-technical-report-and-model-card), [blog](https://aws.amazon.com/blogs/aws/amazon-nova-premier-our-most-capable-model-for-complex-tasks-and-teacher-for-model-distillation/))
282
282
  creator_organization_name: Amazon
283
283
  access: limited
284
284
  release_date: 2025-04-30
@@ -286,7 +286,7 @@ models:
286
286
 
287
287
  - name: amazon/nova-pro-v1:0
288
288
  display_name: Amazon Nova Pro
289
- description: Amazon Nova Pro Model
289
+ description: Amazon Nova Pro is a highly capable multimodal model that balances of accuracy, speed, and cost for a wide range of tasks ([model card](https://www.amazon.science/publications/the-amazon-nova-family-of-models-technical-report-and-model-card))
290
290
  creator_organization_name: Amazon
291
291
  access: limited
292
292
  release_date: 2024-12-03
@@ -294,7 +294,7 @@ models:
294
294
 
295
295
  - name: amazon/nova-lite-v1:0
296
296
  display_name: Amazon Nova Lite
297
- description: Amazon Nova Lite Model
297
+ description: Amazon Nova Lite is a low-cost multimodal model that is fast for processing images, video, documents and text. ([model card](https://www.amazon.science/publications/the-amazon-nova-family-of-models-technical-report-and-model-card))
298
298
  creator_organization_name: Amazon
299
299
  access: limited
300
300
  release_date: 2024-12-03
@@ -302,7 +302,7 @@ models:
302
302
 
303
303
  - name: amazon/nova-micro-v1:0
304
304
  display_name: Amazon Nova Micro
305
- description: Amazon Nova Micro Model
305
+ description: Amazon Nova Micro is a text-only model that delivers low-latency responses at low cost. ([model card](https://www.amazon.science/publications/the-amazon-nova-family-of-models-technical-report-and-model-card))
306
306
  creator_organization_name: Amazon
307
307
  access: limited
308
308
  release_date: 2024-12-03
@@ -555,6 +555,14 @@ models:
555
555
  release_date: 2025-05-14
556
556
  tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
557
557
 
558
+ - name: anthropic/claude-sonnet-4-5-20250929
559
+ display_name: Claude 4.5 Sonnet (20250929)
560
+ description: Claude 4.5 Sonnet is a model from Anthropic that shows particular strengths in software coding, in agentic tasks where it runs in a loop and uses tools, and in using computers. ([blog](https://www.anthropic.com/news/claude-sonnet-4-5), [system card](https://assets.anthropic.com/m/12f214efcc2f457a/original/Claude-Sonnet-4-5-System-Card.pdf))
561
+ creator_organization_name: Anthropic
562
+ access: limited
563
+ release_date: 2025-09-29
564
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
565
+
558
566
  - name: anthropic/stanford-online-all-v4-s3
559
567
  display_name: Anthropic-LM v4-s3 (52B)
560
568
  description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
@@ -946,6 +954,24 @@ models:
946
954
  release_date: 2025-01-20
947
955
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
948
956
 
957
+ - name: deepseek-ai/deepseek-r1-distill-llama-70b
958
+ display_name: DeepSeek-R1-Distill-Llama-70B
959
+ description: DeepSeek-R1-Distill-Llama-70B is a fine-tuned open-source models based on Llama-3.3-70B-Instruct using samples generated by DeepSeek-R1.
960
+ creator_organization_name: DeepSeek
961
+ access: open
962
+ num_parameters: 70600000000
963
+ release_date: 2025-01-20
964
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
965
+
966
+ - name: deepseek-ai/deepseek-r1-distill-qwen-14b
967
+ display_name: DeepSeek-R1-Distill-Qwen-14B
968
+ description: DeepSeek-R1-Distill-Qwen-14B is a fine-tuned open-source models based on Qwen2.5-14B using samples generated by DeepSeek-R1.
969
+ creator_organization_name: DeepSeek
970
+ access: open
971
+ num_parameters: 14800000000
972
+ release_date: 2025-01-20
973
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
974
+
949
975
  - name: deepseek-ai/deepseek-coder-6.7b-instruct
950
976
  display_name: DeepSeek-Coder-6.7b-Instruct
951
977
  description: DeepSeek-Coder-6.7b-Instruct is a model that is fine-tuned from the LLaMA 6.7B model for the DeepSeek-Coder task.
@@ -1207,7 +1233,7 @@ models:
1207
1233
 
1208
1234
  - name: google/gemini-2.0-flash-001
1209
1235
  display_name: Gemini 2.0 Flash
1210
- description: Gemini 2.0 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1236
+ description: Gemini 2.0 Flash is a member of the Gemini 2.0 series of models, a suite of highly-capable, natively multimodal models designed to power agentic systems. ([model card](https://storage.googleapis.com/model-cards/documents/gemini-2-flash.pdf), [documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1211
1237
  creator_organization_name: Google
1212
1238
  access: limited
1213
1239
  release_date: 2025-02-01
@@ -1215,7 +1241,7 @@ models:
1215
1241
 
1216
1242
  - name: google/gemini-2.0-flash-lite-preview-02-05
1217
1243
  display_name: Gemini 2.0 Flash Lite (02-05 preview)
1218
- description: Gemini 2.0 Flash Lite (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1244
+ description: Gemini 2.0 Flash Lite (02-05 preview) ([model card](https://storage.googleapis.com/model-cards/documents/gemini-2-flash.pdf), [documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1219
1245
  creator_organization_name: Google
1220
1246
  access: limited
1221
1247
  release_date: 2025-02-05
@@ -1223,7 +1249,7 @@ models:
1223
1249
 
1224
1250
  - name: google/gemini-2.0-flash-lite-001
1225
1251
  display_name: Gemini 2.0 Flash Lite
1226
- description: Gemini 2.0 Flash Lite ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1252
+ description: Gemini 2.0 Flash Lite is the fastest and most cost efficient Flash model in the Gemini 2.0 series of models, a suite of highly-capable, natively multimodal models designed to power agentic systems. ([model card](https://storage.googleapis.com/model-cards/documents/gemini-2-flash.pdf), [documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1227
1253
  creator_organization_name: Google
1228
1254
  access: limited
1229
1255
  release_date: 2025-03-25
@@ -2581,6 +2607,14 @@ models:
2581
2607
  release_date: 2025-05-07
2582
2608
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2583
2609
 
2610
+ - name: mistralai/mistral-medium-3.1
2611
+ display_name: Mistral Medium 3.1
2612
+ description: Mistral Medium 3.1 is a language model that is intended to to deliver state-of-the-art performance at lower cost. ([blog](https://mistral.ai/news/mistral-medium-3))
2613
+ creator_organization_name: Mistral AI
2614
+ access: limited
2615
+ release_date: 2025-05-07
2616
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2617
+
2584
2618
  - name: mistralai/mistral-large-2402
2585
2619
  display_name: Mistral Large (2402)
2586
2620
  description: Mistral Large is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
@@ -3598,6 +3632,14 @@ models:
3598
3632
  release_date: 2025-04-29
3599
3633
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3600
3634
 
3635
+ - name: qwen/qwen3-next-80b-a3b-thinking
3636
+ display_name: Qwen3-Next 80B A3B Thinking
3637
+ description: Qwen3-Next is a new model architecture for improving training and inference efficiency under long-context and large-parameter settings. Compared to the MoE structure of Qwen3, Qwen3-Next introduces a hybrid attention mechanism, a highly sparse Mixture-of-Experts (MoE) structure, training-stability-friendly optimizations, and a multi-token prediction mechanism for faster inference. ([blog](https://qwen.ai/blog?id=4074cca80393150c248e508aa62983f9cb7d27cd&from=research.latest-advancements-list))
3638
+ creator_organization_name: Qwen
3639
+ access: open
3640
+ release_date: 2025-07-21 # https://x.com/Alibaba_Qwen/status/1947344511988076547
3641
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3642
+
3601
3643
  - name: qwen/qwen3-235b-a22b-instruct-2507-fp8
3602
3644
  display_name: Qwen3 235B A22B Instruct 2507 FP8
3603
3645
  description: Qwen3 235B A22B Instruct 2507 FP8 is an updated version of the non-thinking mode of Qwen3 235B A22B FP8.
@@ -3949,7 +3991,190 @@ models:
3949
3991
  release_date: 2023-05-25
3950
3992
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
3951
3993
 
3994
+ - name: tiiuae/falcon3-1b-instruct
3995
+ display_name: Falcon3-1B-Instruct
3996
+ description: Falcon3-1B-Instruct is an open-weights foundation model that supports 4 languages (English, French, Spanish, Portuguese) that was trained on 14T tokens.
3997
+ creator_organization_name: TII UAE
3998
+ access: open
3999
+ num_parameters: 1670000000
4000
+ release_date: 2024-12-17 # https://huggingface.co/docs/transformers/main/en/model_doc/falcon3
4001
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4002
+
4003
+ - name: tiiuae/falcon3-3b-instruct
4004
+ display_name: Falcon3-3B-Instruct
4005
+ description: Falcon3-3B-Instruct is an open-weights foundation model that supports 4 languages (English, French, Spanish, Portuguese) that was trained on 14T tokens.
4006
+ creator_organization_name: TII UAE
4007
+ access: open
4008
+ num_parameters: 3230000000
4009
+ release_date: 2024-12-17 # https://huggingface.co/docs/transformers/main/en/model_doc/falcon3
4010
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4011
+
4012
+ - name: tiiuae/falcon3-7b-instruct
4013
+ display_name: Falcon3-7B-Instruct
4014
+ description: Falcon3-7B-Instruct is an open-weights foundation model that supports 4 languages (English, French, Spanish, Portuguese) that was trained on 14T tokens.
4015
+ creator_organization_name: TII UAE
4016
+ access: open
4017
+ num_parameters: 7460000000
4018
+ release_date: 2024-12-17 # https://huggingface.co/docs/transformers/main/en/model_doc/falcon3
4019
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4020
+
4021
+ - name: tiiuae/falcon3-10b-instruct
4022
+ display_name: Falcon3-10B-Instruct
4023
+ description: Falcon3-10B-Instruct is an open-weights foundation model that supports 4 languages (English, French, Spanish, Portuguese) that was trained on 14T tokens.
4024
+ creator_organization_name: TII UAE
4025
+ access: open
4026
+ num_parameters: 10300000000
4027
+ release_date: 2024-12-17 # https://huggingface.co/docs/transformers/main/en/model_doc/falcon3
4028
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4029
+
4030
+ # AceGPT-v2
4031
+ - name: freedomintelligence/acegpt-v2-8b-chat
4032
+ display_name: AceGPT-v2-8B-Chat
4033
+ description: AceGPT is a fully fine-tuned generative text model collection, particularly focused on the Arabic language domain. AceGPT-v2-8B-Chat is based on Meta-Llama-3-8B. ([paper](https://arxiv.org/abs/2412.12310))
4034
+ creator_organization_name: FreedomAI
4035
+ access: open
4036
+ num_parameters: 8030000000
4037
+ release_date: 2024-10-20
4038
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4039
+
4040
+ - name: freedomintelligence/acegpt-v2-32b-chat
4041
+ display_name: AceGPT-v2-32B-Chat
4042
+ description: AceGPT is a fully fine-tuned generative text model collection, particularly focused on the Arabic language domain. AceGPT-v2-32B-Chat is based on Qwen1.5-32B. ([paper](https://arxiv.org/abs/2412.12310))
4043
+ creator_organization_name: FreedomAI
4044
+ access: open
4045
+ num_parameters: 32500000000
4046
+ release_date: 2024-10-20
4047
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3952
4048
 
4049
+ - name: freedomintelligence/acegpt-v2-70b-chat
4050
+ display_name: AceGPT-v2-70B-Chat
4051
+ description: AceGPT is a fully fine-tuned generative text model collection, particularly focused on the Arabic language domain. AceGPT-v2-70B-Chat is based on Meta-Llama-3-70B. ([paper](https://arxiv.org/abs/2412.12310))
4052
+ creator_organization_name: FreedomAI
4053
+ access: open
4054
+ num_parameters: 70600000000
4055
+ release_date: 2024-10-20
4056
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4057
+
4058
+ # ALLaM
4059
+ - name: allam-ai/allam-7b-instruct-preview
4060
+ display_name: ALLaM-7B-Instruct-preview
4061
+ description: ALLaM-7B-Instruct-preview is a model designed to advance Arabic language technology, which used a recipe of training on 4T English tokens followed by training on 1.2T mixed Arabic/English tokens. ([paper](https://arxiv.org/abs/2407.15390v1))
4062
+ creator_organization_name: NCAI & SDAIA
4063
+ access: open
4064
+ num_parameters: 7000000000
4065
+ release_date: 2024-07-22
4066
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4067
+
4068
+ # SILMA
4069
+ - name: silma-ai/silma-9b-instruct-v1.0
4070
+ display_name: SILMA 9B
4071
+ description: SILMA 9B is a compact Arabic language model based on Google Gemma. ([model card](https://huggingface.co/silma-ai/SILMA-9B-Instruct-v1.0))
4072
+ creator_organization_name: SILMA AI
4073
+ access: open
4074
+ num_parameters: 9240000000
4075
+ release_date: 2024-08-17
4076
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4077
+
4078
+ # Jais Family
4079
+
4080
+ - name: inceptionai/jais-family-590m-chat
4081
+ display_name: Jais-family-590m-chat
4082
+ description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
4083
+ creator_organization_name: Inception
4084
+ access: open
4085
+ num_parameters: 771000000
4086
+ release_date: 2023-08-30
4087
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4088
+
4089
+ - name: inceptionai/jais-family-1p3b-chat
4090
+ display_name: Jais-family-1p3b-chat
4091
+ description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
4092
+ creator_organization_name: Inception
4093
+ access: open
4094
+ num_parameters: 1560000000
4095
+ release_date: 2023-08-30
4096
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4097
+
4098
+ - name: inceptionai/jais-family-2p7b-chat
4099
+ display_name: Jais-family-2p7b-chat
4100
+ description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
4101
+ creator_organization_name: Inception
4102
+ access: open
4103
+ num_parameters: 2950000000
4104
+ release_date: 2023-08-30
4105
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4106
+
4107
+ - name: inceptionai/jais-family-6p7b-chat
4108
+ display_name: Jais-family-6p7b-chat
4109
+ description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
4110
+ creator_organization_name: Inception
4111
+ access: open
4112
+ num_parameters: 7140000000
4113
+ release_date: 2023-08-30
4114
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4115
+
4116
+ - name: inceptionai/jais-family-6p7b-chat
4117
+ display_name: Jais-family-6p7b-chat
4118
+ description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
4119
+ creator_organization_name: Inception
4120
+ access: open
4121
+ num_parameters: 7140000000
4122
+ release_date: 2023-08-30
4123
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4124
+
4125
+ - name: inceptionai/jais-family-13b-chat
4126
+ display_name: Jais-family-13b-chat
4127
+ description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
4128
+ creator_organization_name: Inception
4129
+ access: open
4130
+ num_parameters: 13500000000
4131
+ release_date: 2023-08-30
4132
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4133
+
4134
+ - name: inceptionai/jais-family-30b-8k-chat
4135
+ display_name: Jais-family-30b-8k-chat
4136
+ description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
4137
+ creator_organization_name: Inception
4138
+ access: open
4139
+ num_parameters: 30800000000
4140
+ release_date: 2023-08-30
4141
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4142
+
4143
+ - name: inceptionai/jais-family-30b-16k-chat
4144
+ display_name: Jais-family-30b-16k-chat
4145
+ description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
4146
+ creator_organization_name: Inception
4147
+ access: open
4148
+ num_parameters: 30800000000
4149
+ release_date: 2023-08-30
4150
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4151
+
4152
+ - name: inceptionai/jais-adapted-7b-chat
4153
+ display_name: Jais-adapted-7b-chat
4154
+ description: The Jais adapted models are bilingual English-Arabic large language models (LLMs) that are trained adaptively from Llama-2 and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
4155
+ creator_organization_name: Inception
4156
+ access: open
4157
+ num_parameters: 7000000000
4158
+ release_date: 2023-08-30
4159
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4160
+
4161
+ - name: inceptionai/jais-adapted-13b-chat
4162
+ display_name: Jais-adapted-13b-chat
4163
+ description: The Jais adapted models are bilingual English-Arabic large language models (LLMs) that are trained adaptively from Llama-2 and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
4164
+ creator_organization_name: Inception
4165
+ access: open
4166
+ num_parameters: 13300000000
4167
+ release_date: 2023-08-30
4168
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4169
+
4170
+ - name: inceptionai/jais-adapted-70b-chat
4171
+ display_name: Jais-adapted-70b-chat
4172
+ description: The Jais adapted models are bilingual English-Arabic large language models (LLMs) that are trained adaptively from Llama-2 and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
4173
+ creator_organization_name: Inception
4174
+ access: open
4175
+ num_parameters: 69500000000
4176
+ release_date: 2023-08-30
4177
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3953
4178
 
3954
4179
  # Together
3955
4180
  - name: together/gpt-jt-6b-v1
@@ -460,7 +460,7 @@ tokenizer_configs:
460
460
 
461
461
  # Allen Institute for AI
462
462
  # The allenai/olmo-7b requires Python 3.9 or newer.
463
- # To use the allenai/olmo-7b tokenizer, run `pip install crfm-helm[allenai]` first.
463
+ # To use the allenai/olmo-7b tokenizer, run `pip install "crfm-helm[allenai]"` first.
464
464
  - name: allenai/olmo-7b
465
465
  tokenizer_spec:
466
466
  class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
@@ -717,6 +717,12 @@ tokenizer_configs:
717
717
  end_of_text_token: "<|im_end|>"
718
718
  prefix_token: ""
719
719
 
720
+ - name: qwen/qwen3-next-80b-a3b-thinking
721
+ tokenizer_spec:
722
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
723
+ end_of_text_token: "<|im_end|>"
724
+ prefix_token: ""
725
+
720
726
  - name: qwen/qwq-32b-preview
721
727
  tokenizer_spec:
722
728
  class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
@@ -797,6 +803,12 @@ tokenizer_configs:
797
803
  end_of_text_token: "<|endoftext|>"
798
804
  prefix_token: ""
799
805
 
806
+ - name: tiiuae/falcon3-1b-instruct
807
+ tokenizer_spec:
808
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
809
+ end_of_text_token: "<|endoftext|>"
810
+ prefix_token: ""
811
+
800
812
  # TsinghuaKEG
801
813
  - name: TsinghuaKEG/ice
802
814
  tokenizer_spec:
@@ -1075,8 +1087,6 @@ tokenizer_configs:
1075
1087
  end_of_text_token: "<|endoftext|>"
1076
1088
  prefix_token: ""
1077
1089
 
1078
-
1079
-
1080
1090
  # DeepSeek-R1-Distill-Llama-3.1-8b
1081
1091
  - name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
1082
1092
  tokenizer_spec:
@@ -1086,6 +1096,20 @@ tokenizer_configs:
1086
1096
  end_of_text_token: "<|end▁of▁sentence|>"
1087
1097
  prefix_token: "<|begin▁of▁sentence|>"
1088
1098
 
1099
+ # DeepSeek-R1-Distill-Llama-3.1-8b
1100
+ - name: deepseek-ai/deepseek-r1-distill-llama-70b
1101
+ tokenizer_spec:
1102
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
1103
+ end_of_text_token: "<|end▁of▁sentence|>"
1104
+ prefix_token: "<|begin▁of▁sentence|>"
1105
+
1106
+ # DeepSeek-R1-Distill-Qwen-14B
1107
+ - name: deepseek-ai/deepseek-r1-distill-qwen-14b
1108
+ tokenizer_spec:
1109
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
1110
+ end_of_text_token: "<|end▁of▁sentence|>"
1111
+ prefix_token: "<|begin▁of▁sentence|>"
1112
+
1089
1113
  # deepseek-ai/deepseek-coder-6.7b-instruct
1090
1114
  - name: deepseek-ai/deepseek-coder-6.7b-instruct
1091
1115
  tokenizer_spec:
@@ -1095,7 +1119,6 @@ tokenizer_configs:
1095
1119
  end_of_text_token: "<|end▁of▁sentence|>"
1096
1120
  prefix_token: "<|begin▁of▁sentence|>"
1097
1121
 
1098
-
1099
1122
  # vilm/vinallama-2.7b-chat
1100
1123
  - name: vilm/vinallama-2.7b-chat
1101
1124
  tokenizer_spec:
@@ -1203,3 +1226,50 @@ tokenizer_configs:
1203
1226
  pretrained_model_name_or_path: nicholasKluge/TeenyTinyLlama-460m
1204
1227
  end_of_text_token: "</s>"
1205
1228
  prefix_token: "<s>"
1229
+
1230
+ # AceGPT-v2
1231
+ - name: freedomintelligence/acegpt-v2-8b-chat
1232
+ tokenizer_spec:
1233
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
1234
+ end_of_text_token: "<|end_of_text|>"
1235
+ prefix_token: "<|begin_of_text|>"
1236
+
1237
+ - name: freedomintelligence/acegpt-v2-32b-chat
1238
+ tokenizer_spec:
1239
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
1240
+ end_of_text_token: "<|endoftext|>"
1241
+ prefix_token: ""
1242
+
1243
+ - name: freedomintelligence/acegpt-v2-70b-chat
1244
+ tokenizer_spec:
1245
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
1246
+ end_of_text_token: "<|end_of_text|>"
1247
+ prefix_token: "<|begin_of_text|>"
1248
+
1249
+ # ALLaM
1250
+ - name: allam-ai/allam-7b-instruct-preview
1251
+ tokenizer_spec:
1252
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
1253
+ end_of_text_token: "</s>"
1254
+ prefix_token: "<s>"
1255
+
1256
+ # SILMA
1257
+ - name: silma-ai/silma-9b-instruct-v1.0
1258
+ tokenizer_spec:
1259
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
1260
+ end_of_text_token: "<eos>"
1261
+ prefix_token: "<bos>"
1262
+
1263
+ # Jais Family
1264
+ - name: inceptionai/jais-family-590m-chat
1265
+ tokenizer_spec:
1266
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
1267
+ end_of_text_token: "<|endoftext|>"
1268
+ prefix_token: "<|endoftext|>"
1269
+
1270
+ # Jais Adapted
1271
+ - name: inceptionai/jais-adapted-7b-chat
1272
+ tokenizer_spec:
1273
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
1274
+ end_of_text_token: "</s>"
1275
+ prefix_token: "<s>"