crfm-helm 0.5.7__py3-none-any.whl → 0.5.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/METADATA +7 -77
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/RECORD +315 -282
- helm/benchmark/adaptation/adapter_spec.py +10 -0
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +11 -3
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +11 -8
- helm/benchmark/annotation/aci_bench_annotator.py +11 -22
- helm/benchmark/annotation/alrage_annotator.py +90 -0
- helm/benchmark/annotation/chw_care_plan_annotator.py +10 -21
- helm/benchmark/annotation/dischargeme_annotator.py +11 -22
- helm/benchmark/annotation/med_dialog_annotator.py +11 -22
- helm/benchmark/annotation/medalign_annotator.py +11 -22
- helm/benchmark/annotation/medi_qa_annotator.py +11 -22
- helm/benchmark/annotation/medication_qa_annotator.py +11 -22
- helm/benchmark/annotation/mental_health_annotator.py +11 -22
- helm/benchmark/annotation/mimic_bhc_annotator.py +11 -22
- helm/benchmark/annotation/mimic_rrs_annotator.py +11 -22
- helm/benchmark/annotation/model_as_judge.py +23 -18
- helm/benchmark/annotation/mtsamples_procedures_annotator.py +11 -22
- helm/benchmark/annotation/mtsamples_replicate_annotator.py +11 -22
- helm/benchmark/annotation/starr_patient_instructions_annotator.py +11 -22
- helm/benchmark/metrics/air_bench_metrics.py +3157 -1
- helm/benchmark/metrics/alrage_metric.py +35 -0
- helm/benchmark/metrics/basic_metrics.py +267 -2
- helm/benchmark/metrics/bbq_metrics.py +12 -0
- helm/benchmark/metrics/classification_metrics.py +19 -1
- helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +12 -1
- helm/benchmark/metrics/dry_run_metrics.py +30 -1
- helm/benchmark/metrics/efficiency_metrics.py +74 -0
- helm/benchmark/metrics/ehr_sql_metrics.py +57 -1
- helm/benchmark/metrics/evaluate_reference_metrics.py +311 -0
- helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +13 -1
- helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +13 -1
- helm/benchmark/metrics/ifeval_metrics.py +13 -1
- helm/benchmark/metrics/instruction_following_critique_metrics.py +41 -1
- helm/benchmark/metrics/kpi_edgar_metrics.py +21 -0
- helm/benchmark/metrics/language_modeling_metrics.py +13 -1
- helm/benchmark/metrics/live_qa_metrics.py +13 -1
- helm/benchmark/metrics/llm_jury_metrics.py +13 -1
- helm/benchmark/metrics/medcalc_bench_metrics.py +14 -1
- helm/benchmark/metrics/medec_metrics.py +25 -2
- helm/benchmark/metrics/metric.py +25 -0
- helm/benchmark/metrics/mimiciv_billing_code_metrics.py +32 -1
- helm/benchmark/metrics/omni_math_metrics.py +13 -1
- helm/benchmark/metrics/safety_metrics.py +13 -1
- helm/benchmark/metrics/seahelm_metrics.py +14 -1
- helm/benchmark/metrics/summac/model_summac.py +2 -2
- helm/benchmark/metrics/summarization_metrics.py +129 -1
- helm/benchmark/metrics/toxicity_metrics.py +31 -1
- helm/benchmark/metrics/ultra_suite_asr_classification_metrics.py +52 -0
- helm/benchmark/metrics/wildbench_metrics.py +21 -1
- helm/benchmark/presentation/run_display.py +13 -3
- helm/benchmark/presentation/run_entry.py +2 -2
- helm/benchmark/presentation/schema.py +5 -22
- helm/benchmark/presentation/summarize.py +180 -11
- helm/benchmark/presentation/taxonomy_info.py +20 -0
- helm/benchmark/run.py +1 -1
- helm/benchmark/run_expander.py +4 -0
- helm/benchmark/run_specs/arabic_run_specs.py +140 -16
- helm/benchmark/run_specs/bluex_run_specs.py +1 -1
- helm/benchmark/run_specs/classic_run_specs.py +2 -2
- helm/benchmark/run_specs/long_context_run_specs.py +2 -2
- helm/benchmark/run_specs/medhelm/__init__.py +0 -0
- helm/benchmark/run_specs/medhelm/benchmark_config.py +219 -0
- helm/benchmark/run_specs/medhelm_run_specs.py +362 -52
- helm/benchmark/run_specs/speech_disorder_audio_run_specs.py +6 -2
- helm/benchmark/scenarios/aci_bench_scenario.py +23 -0
- helm/benchmark/scenarios/air_bench_scenario.py +21 -0
- helm/benchmark/scenarios/alrage_scenario.py +54 -0
- helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +23 -1
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +12 -1
- helm/benchmark/scenarios/arabic_exams_scenario.py +114 -0
- helm/benchmark/scenarios/arabic_mmlu_scenario.py +8 -4
- helm/benchmark/scenarios/aratrust_scenario.py +19 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification_scenario.py +24 -54
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_transcription_scenario.py +19 -48
- helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py +22 -61
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py +21 -29
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py +21 -60
- helm/benchmark/scenarios/babi_qa_scenario.py +15 -0
- helm/benchmark/scenarios/banking77_scenario.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +15 -0
- helm/benchmark/scenarios/best_chatgpt_prompts.yaml +473 -0
- helm/benchmark/scenarios/bird_sql_scenario.py +18 -0
- helm/benchmark/scenarios/bluex_scenario.py +6 -2
- helm/benchmark/scenarios/bold_scenario.py +15 -0
- helm/benchmark/scenarios/boolq_scenario.py +20 -0
- helm/benchmark/scenarios/chw_care_plan_scenario.py +23 -0
- helm/benchmark/scenarios/civil_comments_scenario.py +13 -0
- helm/benchmark/scenarios/clear_scenario.py +23 -0
- helm/benchmark/scenarios/cleva_scenario.py +479 -0
- helm/benchmark/scenarios/code_scenario.py +28 -0
- helm/benchmark/scenarios/commonsense_scenario.py +32 -0
- helm/benchmark/scenarios/compositional_instructions.yaml +70 -0
- helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +21 -0
- helm/benchmark/scenarios/copyright_scenario.py +35 -1
- helm/benchmark/scenarios/cti_to_mitre_scenario.py +21 -0
- helm/benchmark/scenarios/czech_bank_qa_scenario.py +18 -0
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +22 -1
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +23 -1
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +22 -1
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +21 -1
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +13 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +13 -1
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +13 -1
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +13 -1
- helm/benchmark/scenarios/dischargeme_scenario.py +24 -0
- helm/benchmark/scenarios/disinformation_scenario.py +22 -0
- helm/benchmark/scenarios/dyck_language_scenario.py +15 -0
- helm/benchmark/scenarios/ehrshot_scenario.py +22 -0
- helm/benchmark/scenarios/enem_challenge_scenario.py +19 -0
- helm/benchmark/scenarios/entity_data_imputation_scenario.py +14 -0
- helm/benchmark/scenarios/entity_matching_scenario.py +14 -0
- helm/benchmark/scenarios/fin_qa_scenario.py +20 -0
- helm/benchmark/scenarios/financebench_scenario.py +21 -0
- helm/benchmark/scenarios/financial_phrasebank_scenario.py +21 -0
- helm/benchmark/scenarios/gold_commodity_news_scenario.py +21 -0
- helm/benchmark/scenarios/gpqa_scenario.py +18 -0
- helm/benchmark/scenarios/grammar_scenario.py +20 -1
- helm/benchmark/scenarios/gsm_scenario.py +21 -0
- helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +12 -1
- helm/benchmark/scenarios/harm_bench_scenario.py +12 -1
- helm/benchmark/scenarios/headqa_scenario.py +22 -0
- helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +13 -0
- helm/benchmark/scenarios/ice_scenario.py +21 -1
- helm/benchmark/scenarios/ifeval_scenario.py +18 -0
- helm/benchmark/scenarios/imdb_scenario.py +15 -0
- helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py +21 -0
- helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py +19 -0
- helm/benchmark/scenarios/koala_scenario.py +21 -1
- helm/benchmark/scenarios/kpi_edgar_scenario.py +21 -0
- helm/benchmark/scenarios/legal_contract_summarization_scenario.py +20 -0
- helm/benchmark/scenarios/legal_summarization_scenario.py +50 -0
- helm/benchmark/scenarios/legal_support_scenario.py +13 -0
- helm/benchmark/scenarios/legalbench_scenario.py +19 -0
- helm/benchmark/scenarios/lex_glue_scenario.py +11 -0
- helm/benchmark/scenarios/lextreme_scenario.py +11 -0
- helm/benchmark/scenarios/lsat_qa_scenario.py +14 -0
- helm/benchmark/scenarios/madinah_qa_scenario.py +73 -0
- helm/benchmark/scenarios/math_scenario.py +33 -0
- helm/benchmark/scenarios/mbzuai_human_translated_arabic_mmlu.py +68 -0
- helm/benchmark/scenarios/med_dialog_scenario.py +32 -1
- helm/benchmark/scenarios/med_mcqa_scenario.py +14 -0
- helm/benchmark/scenarios/med_qa_scenario.py +20 -0
- helm/benchmark/scenarios/medalign_scenario.py +23 -0
- helm/benchmark/scenarios/medbullets_scenario.py +22 -0
- helm/benchmark/scenarios/medcalc_bench_scenario.py +22 -0
- helm/benchmark/scenarios/medec_scenario.py +23 -0
- helm/benchmark/scenarios/medhallu_scenario.py +23 -0
- helm/benchmark/scenarios/medhelm/__init__.py +0 -0
- helm/benchmark/scenarios/medhelm/judges.yaml +14 -0
- helm/benchmark/scenarios/medhelm_configurable_scenario.py +101 -0
- helm/benchmark/scenarios/medi_qa_scenario.py +24 -1
- helm/benchmark/scenarios/medication_qa_scenario.py +31 -1
- helm/benchmark/scenarios/mental_health_scenario.py +23 -0
- helm/benchmark/scenarios/mimic_bhc_scenario.py +24 -0
- helm/benchmark/scenarios/mimic_rrs_scenario.py +23 -0
- helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +22 -0
- helm/benchmark/scenarios/mmlu_pro_scenario.py +18 -0
- helm/benchmark/scenarios/mmlu_scenario.py +21 -0
- helm/benchmark/scenarios/msmarco_scenario.py +30 -0
- helm/benchmark/scenarios/mtsamples_procedures_scenario.py +22 -0
- helm/benchmark/scenarios/mtsamples_replicate_scenario.py +22 -0
- helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +20 -0
- helm/benchmark/scenarios/narrativeqa_scenario.py +19 -0
- helm/benchmark/scenarios/natural_qa_scenario.py +32 -0
- helm/benchmark/scenarios/omni_math_scenario.py +18 -0
- helm/benchmark/scenarios/open_assistant_scenario.py +22 -0
- helm/benchmark/scenarios/openai_mrcr_scenario.py +15 -0
- helm/benchmark/scenarios/pubmed_qa_scenario.py +22 -0
- helm/benchmark/scenarios/quac_scenario.py +14 -0
- helm/benchmark/scenarios/race_based_med_scenario.py +23 -0
- helm/benchmark/scenarios/raft_scenario.py +15 -0
- helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +14 -1
- helm/benchmark/scenarios/ruler_qa_scenarios.py +40 -0
- helm/benchmark/scenarios/scenario.py +31 -0
- helm/benchmark/scenarios/seahelm_scenario.py +348 -0
- helm/benchmark/scenarios/self_instruct_scenario.py +29 -1
- helm/benchmark/scenarios/shc_bmt_scenario.py +22 -0
- helm/benchmark/scenarios/shc_cdi_scenario.py +20 -0
- helm/benchmark/scenarios/shc_conf_scenario.py +23 -0
- helm/benchmark/scenarios/shc_ent_scenario.py +21 -0
- helm/benchmark/scenarios/shc_gip_scenario.py +20 -0
- helm/benchmark/scenarios/shc_privacy_scenario.py +22 -0
- helm/benchmark/scenarios/shc_proxy_scenario.py +22 -0
- helm/benchmark/scenarios/shc_ptbm_scenario.py +23 -0
- helm/benchmark/scenarios/shc_sequoia_scenario.py +21 -0
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +12 -1
- helm/benchmark/scenarios/situation_prompts.yaml +49 -0
- helm/benchmark/scenarios/spider_scenario.py +18 -0
- helm/benchmark/scenarios/starr_patient_instructions_scenario.py +22 -0
- helm/benchmark/scenarios/summarization_scenario.py +37 -0
- helm/benchmark/scenarios/synthetic_efficiency_scenario.py +22 -1
- helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +13 -0
- helm/benchmark/scenarios/test_alrage_scenario.py +23 -0
- helm/benchmark/scenarios/test_arabic_exams_scenario.py +21 -0
- helm/benchmark/scenarios/test_aratrust_scenario.py +1 -1
- helm/benchmark/scenarios/test_bluex_scenario.py +2 -2
- helm/benchmark/scenarios/thai_exam_scenario.py +95 -0
- helm/benchmark/scenarios/the_pile_scenario.py +13 -1
- helm/benchmark/scenarios/truthful_qa_scenario.py +14 -0
- helm/benchmark/scenarios/twitter_aae_scenario.py +20 -1
- helm/benchmark/scenarios/vicuna_scenario.py +21 -1
- helm/benchmark/scenarios/wikifact_scenario.py +20 -0
- helm/benchmark/scenarios/wildbench_scenario.py +18 -0
- helm/benchmark/scenarios/wmt_14_scenario.py +19 -0
- helm/benchmark/static/schema_arabic.yaml +55 -12
- helm/benchmark/static/schema_long_context.yaml +11 -30
- helm/benchmark/static/schema_medhelm.yaml +36 -0
- helm/benchmark/static/schema_slp.yaml +219 -0
- helm/benchmark/static_build/assets/audio-table-Dn5NMMeJ.png +0 -0
- helm/benchmark/static_build/assets/index-oIeiQW2g.css +1 -0
- helm/benchmark/static_build/assets/index-qOFpOyHb.js +10 -0
- helm/benchmark/static_build/assets/react-BteFIppM.js +85 -0
- helm/benchmark/static_build/assets/recharts-DxuQtTOs.js +97 -0
- helm/benchmark/static_build/assets/tremor-DR4fE7ko.js +10 -0
- helm/benchmark/static_build/index.html +5 -6
- helm/clients/ai21_client.py +2 -0
- helm/clients/aleph_alpha_client.py +2 -0
- helm/clients/anthropic_client.py +7 -1
- helm/clients/audio_language/diva_llama_client.py +2 -0
- helm/clients/audio_language/llama_omni/arguments.py +61 -0
- helm/clients/audio_language/llama_omni/constants.py +9 -0
- helm/clients/audio_language/llama_omni/conversation.py +213 -0
- helm/clients/audio_language/llama_omni/model/__init__.py +0 -0
- helm/clients/audio_language/llama_omni/model/builder.py +88 -0
- helm/clients/audio_language/llama_omni/model/language_model/omni_speech2s_llama.py +190 -0
- helm/clients/audio_language/llama_omni/model/language_model/omni_speech_llama.py +118 -0
- helm/clients/audio_language/llama_omni/model/omni_speech_arch.py +249 -0
- helm/clients/audio_language/llama_omni/model/speech_encoder/builder.py +9 -0
- helm/clients/audio_language/llama_omni/model/speech_encoder/speech_encoder.py +27 -0
- helm/clients/audio_language/llama_omni/model/speech_generator/builder.py +9 -0
- helm/clients/audio_language/llama_omni/model/speech_generator/generation.py +622 -0
- helm/clients/audio_language/llama_omni/model/speech_generator/speech_generator.py +104 -0
- helm/clients/audio_language/llama_omni/model/speech_projector/builder.py +9 -0
- helm/clients/audio_language/llama_omni/model/speech_projector/speech_projector.py +27 -0
- helm/clients/audio_language/llama_omni/preprocess.py +295 -0
- helm/clients/audio_language/llama_omni/utils.py +202 -0
- helm/clients/audio_language/llama_omni_client.py +2 -1
- helm/clients/audio_language/qwen2_5_omni_client.py +2 -1
- helm/clients/audio_language/qwen2_audiolm_client.py +2 -1
- helm/clients/audio_language/qwen_audiolm_client.py +2 -1
- helm/clients/audio_language/qwen_omni/configuration_qwen2_5_omni.py +519 -0
- helm/clients/audio_language/qwen_omni/modeling_qwen2_5_omni.py +4308 -0
- helm/clients/audio_language/qwen_omni/processing_qwen2_5_omni.py +270 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/__init__.py +0 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/__init__.py +8 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/audio_process.py +56 -0
- helm/clients/audio_language/qwen_omni/qwen2_5_omni_utils/v2_5/vision_process.py +380 -0
- helm/clients/bedrock_client.py +2 -0
- helm/clients/cohere_client.py +3 -0
- helm/clients/google_client.py +2 -0
- helm/clients/http_model_client.py +2 -0
- helm/clients/huggingface_client.py +2 -1
- helm/clients/ibm_client.py +3 -1
- helm/clients/image_generation/adobe_vision_client.py +2 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +2 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +1 -1
- helm/clients/image_generation/cogview2_client.py +2 -1
- helm/clients/image_generation/dalle2_client.py +2 -0
- helm/clients/image_generation/dalle_mini_client.py +2 -1
- helm/clients/image_generation/deep_floyd_client.py +2 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +2 -1
- helm/clients/image_generation/lexica_client.py +2 -0
- helm/clients/image_generation/mindalle/models/stage1/layers.py +2 -2
- helm/clients/image_generation/mindalle_client.py +2 -1
- helm/clients/image_generation/together_image_generation_client.py +2 -0
- helm/clients/megatron_client.py +2 -0
- helm/clients/mistral_client.py +2 -0
- helm/clients/moderation_api_client.py +2 -0
- helm/clients/openai_client.py +36 -20
- helm/clients/openai_responses_client.py +27 -3
- helm/clients/openrouter_client.py +31 -0
- helm/clients/palmyra_client.py +2 -1
- helm/clients/reka_client.py +2 -1
- helm/clients/stanfordhealthcare_azure_openai_client.py +2 -2
- helm/clients/stanfordhealthcare_http_model_client.py +2 -0
- helm/clients/test_openrouter_client.py +69 -0
- helm/clients/together_client.py +52 -11
- helm/clients/vertexai_client.py +12 -2
- helm/clients/vision_language/huggingface_vision2seq_client.py +2 -1
- helm/clients/vision_language/huggingface_vlm_client.py +2 -0
- helm/clients/vision_language/idefics_client.py +2 -1
- helm/clients/vision_language/open_flamingo_client.py +2 -1
- helm/clients/vision_language/paligemma_client.py +2 -1
- helm/clients/vision_language/palmyra_vision_client.py +2 -0
- helm/clients/vision_language/qwen2_vlm_client.py +2 -1
- helm/clients/vision_language/qwen_vlm_client.py +2 -1
- helm/clients/writer_client.py +2 -0
- helm/common/hierarchical_logger.py +20 -0
- helm/common/optional_dependencies.py +1 -1
- helm/common/test_general.py +4 -0
- helm/config/model_deployments.yaml +300 -1
- helm/config/model_metadata.yaml +302 -9
- helm/config/tokenizer_configs.yaml +92 -4
- helm/proxy/example_queries.py +8 -8
- helm/proxy/server.py +2 -1
- helm/proxy/static/index.css +4 -0
- helm/proxy/static/index.js +7 -1
- helm/benchmark/metrics/aci_bench_metrics.py +0 -14
- helm/benchmark/metrics/chw_care_plan_metrics.py +0 -14
- helm/benchmark/metrics/dischargeme_metrics.py +0 -14
- helm/benchmark/metrics/med_dialog_metrics.py +0 -14
- helm/benchmark/metrics/medalign_metrics.py +0 -14
- helm/benchmark/metrics/medi_qa_metrics.py +0 -14
- helm/benchmark/metrics/medication_qa_metrics.py +0 -14
- helm/benchmark/metrics/mental_health_metrics.py +0 -14
- helm/benchmark/metrics/mimic_bhc_metrics.py +0 -14
- helm/benchmark/metrics/mimic_rrs_metrics.py +0 -14
- helm/benchmark/metrics/mtsamples_procedures_metrics.py +0 -14
- helm/benchmark/metrics/mtsamples_replicate_metrics.py +0 -14
- helm/benchmark/metrics/starr_patient_instructions_metrics.py +0 -14
- helm/benchmark/static_build/assets/index-b9779128.css +0 -1
- helm/benchmark/static_build/assets/index-e439d5e1.js +0 -10
- helm/benchmark/static_build/assets/react-f82877fd.js +0 -85
- helm/benchmark/static_build/assets/recharts-4037aff0.js +0 -97
- helm/benchmark/static_build/assets/tremor-38a10867.js +0 -10
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/WHEEL +0 -0
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/licenses/LICENSE +0 -0
- {crfm_helm-0.5.7.dist-info → crfm_helm-0.5.9.dist-info}/top_level.txt +0 -0
- /helm/benchmark/static_build/assets/{air-overview-d2e6c49f.png → air-overview-DpBbyagA.png} +0 -0
- /helm/benchmark/static_build/assets/{crfm-logo-74391ab8.png → crfm-logo-Du4T1uWZ.png} +0 -0
- /helm/benchmark/static_build/assets/{heim-logo-3e5e3aa4.png → heim-logo-BJtQlEbV.png} +0 -0
- /helm/benchmark/static_build/assets/{helm-logo-simple-2ed5400b.png → helm-logo-simple-DzOhNN41.png} +0 -0
- /helm/benchmark/static_build/assets/{helm-safety-2907a7b6.png → helm-safety-COfndXuS.png} +0 -0
- /helm/benchmark/static_build/assets/{helmhero-28e90f4d.png → helmhero-D9TvmJsp.png} +0 -0
- /helm/benchmark/static_build/assets/{medhelm-overview-eac29843.png → medhelm-overview-CND0EIsy.png} +0 -0
- /helm/benchmark/static_build/assets/{medhelm-v1-overview-3ddfcd65.png → medhelm-v1-overview-Cu2tphBB.png} +0 -0
- /helm/benchmark/static_build/assets/{overview-74aea3d8.png → overview-BwypNWnk.png} +0 -0
- /helm/benchmark/static_build/assets/{process-flow-bd2eba96.png → process-flow-DWDJC733.png} +0 -0
- /helm/benchmark/static_build/assets/{vhelm-aspects-1437d673.png → vhelm-aspects-NiDQofvP.png} +0 -0
- /helm/benchmark/static_build/assets/{vhelm-framework-a1ca3f3f.png → vhelm-framework-NxJE4fdA.png} +0 -0
- /helm/benchmark/static_build/assets/{vhelm-model-8afb7616.png → vhelm-model-ypCL5Yvq.png} +0 -0
helm/config/model_metadata.yaml
CHANGED
|
@@ -278,7 +278,7 @@ models:
|
|
|
278
278
|
# https://aws.amazon.com/ai/generative-ai/nova/
|
|
279
279
|
- name: amazon/nova-premier-v1:0
|
|
280
280
|
display_name: Amazon Nova Premier
|
|
281
|
-
description: Amazon Nova Premier is
|
|
281
|
+
description: Amazon Nova Premier is a capable multimodal foundation model and teacher for model distillation that processes text, images, and videos with a one-million token context window. ([model card](https://www.amazon.science/publications/amazon-nova-premier-technical-report-and-model-card), [blog](https://aws.amazon.com/blogs/aws/amazon-nova-premier-our-most-capable-model-for-complex-tasks-and-teacher-for-model-distillation/))
|
|
282
282
|
creator_organization_name: Amazon
|
|
283
283
|
access: limited
|
|
284
284
|
release_date: 2025-04-30
|
|
@@ -286,7 +286,7 @@ models:
|
|
|
286
286
|
|
|
287
287
|
- name: amazon/nova-pro-v1:0
|
|
288
288
|
display_name: Amazon Nova Pro
|
|
289
|
-
description: Amazon Nova Pro
|
|
289
|
+
description: Amazon Nova Pro is a highly capable multimodal model that balances of accuracy, speed, and cost for a wide range of tasks ([model card](https://www.amazon.science/publications/the-amazon-nova-family-of-models-technical-report-and-model-card))
|
|
290
290
|
creator_organization_name: Amazon
|
|
291
291
|
access: limited
|
|
292
292
|
release_date: 2024-12-03
|
|
@@ -294,7 +294,7 @@ models:
|
|
|
294
294
|
|
|
295
295
|
- name: amazon/nova-lite-v1:0
|
|
296
296
|
display_name: Amazon Nova Lite
|
|
297
|
-
description: Amazon Nova Lite
|
|
297
|
+
description: Amazon Nova Lite is a low-cost multimodal model that is fast for processing images, video, documents and text. ([model card](https://www.amazon.science/publications/the-amazon-nova-family-of-models-technical-report-and-model-card))
|
|
298
298
|
creator_organization_name: Amazon
|
|
299
299
|
access: limited
|
|
300
300
|
release_date: 2024-12-03
|
|
@@ -302,7 +302,7 @@ models:
|
|
|
302
302
|
|
|
303
303
|
- name: amazon/nova-micro-v1:0
|
|
304
304
|
display_name: Amazon Nova Micro
|
|
305
|
-
description: Amazon Nova Micro
|
|
305
|
+
description: Amazon Nova Micro is a text-only model that delivers low-latency responses at low cost. ([model card](https://www.amazon.science/publications/the-amazon-nova-family-of-models-technical-report-and-model-card))
|
|
306
306
|
creator_organization_name: Amazon
|
|
307
307
|
access: limited
|
|
308
308
|
release_date: 2024-12-03
|
|
@@ -555,6 +555,14 @@ models:
|
|
|
555
555
|
release_date: 2025-05-14
|
|
556
556
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
557
557
|
|
|
558
|
+
- name: anthropic/claude-sonnet-4-5-20250929
|
|
559
|
+
display_name: Claude 4.5 Sonnet (20250929)
|
|
560
|
+
description: Claude 4.5 Sonnet is a model from Anthropic that shows particular strengths in software coding, in agentic tasks where it runs in a loop and uses tools, and in using computers. ([blog](https://www.anthropic.com/news/claude-sonnet-4-5), [system card](https://assets.anthropic.com/m/12f214efcc2f457a/original/Claude-Sonnet-4-5-System-Card.pdf))
|
|
561
|
+
creator_organization_name: Anthropic
|
|
562
|
+
access: limited
|
|
563
|
+
release_date: 2025-09-29
|
|
564
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
565
|
+
|
|
558
566
|
- name: anthropic/stanford-online-all-v4-s3
|
|
559
567
|
display_name: Anthropic-LM v4-s3 (52B)
|
|
560
568
|
description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
|
|
@@ -946,6 +954,24 @@ models:
|
|
|
946
954
|
release_date: 2025-01-20
|
|
947
955
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
948
956
|
|
|
957
|
+
- name: deepseek-ai/deepseek-r1-distill-llama-70b
|
|
958
|
+
display_name: DeepSeek-R1-Distill-Llama-70B
|
|
959
|
+
description: DeepSeek-R1-Distill-Llama-70B is a fine-tuned open-source models based on Llama-3.3-70B-Instruct using samples generated by DeepSeek-R1.
|
|
960
|
+
creator_organization_name: DeepSeek
|
|
961
|
+
access: open
|
|
962
|
+
num_parameters: 70600000000
|
|
963
|
+
release_date: 2025-01-20
|
|
964
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
965
|
+
|
|
966
|
+
- name: deepseek-ai/deepseek-r1-distill-qwen-14b
|
|
967
|
+
display_name: DeepSeek-R1-Distill-Qwen-14B
|
|
968
|
+
description: DeepSeek-R1-Distill-Qwen-14B is a fine-tuned open-source models based on Qwen2.5-14B using samples generated by DeepSeek-R1.
|
|
969
|
+
creator_organization_name: DeepSeek
|
|
970
|
+
access: open
|
|
971
|
+
num_parameters: 14800000000
|
|
972
|
+
release_date: 2025-01-20
|
|
973
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
974
|
+
|
|
949
975
|
- name: deepseek-ai/deepseek-coder-6.7b-instruct
|
|
950
976
|
display_name: DeepSeek-Coder-6.7b-Instruct
|
|
951
977
|
description: DeepSeek-Coder-6.7b-Instruct is a model that is fine-tuned from the LLaMA 6.7B model for the DeepSeek-Coder task.
|
|
@@ -1207,7 +1233,7 @@ models:
|
|
|
1207
1233
|
|
|
1208
1234
|
- name: google/gemini-2.0-flash-001
|
|
1209
1235
|
display_name: Gemini 2.0 Flash
|
|
1210
|
-
description: Gemini 2.0 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1236
|
+
description: Gemini 2.0 Flash is a member of the Gemini 2.0 series of models, a suite of highly-capable, natively multimodal models designed to power agentic systems. ([model card](https://storage.googleapis.com/model-cards/documents/gemini-2-flash.pdf), [documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1211
1237
|
creator_organization_name: Google
|
|
1212
1238
|
access: limited
|
|
1213
1239
|
release_date: 2025-02-01
|
|
@@ -1215,7 +1241,7 @@ models:
|
|
|
1215
1241
|
|
|
1216
1242
|
- name: google/gemini-2.0-flash-lite-preview-02-05
|
|
1217
1243
|
display_name: Gemini 2.0 Flash Lite (02-05 preview)
|
|
1218
|
-
description: Gemini 2.0 Flash Lite (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1244
|
+
description: Gemini 2.0 Flash Lite (02-05 preview) ([model card](https://storage.googleapis.com/model-cards/documents/gemini-2-flash.pdf), [documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1219
1245
|
creator_organization_name: Google
|
|
1220
1246
|
access: limited
|
|
1221
1247
|
release_date: 2025-02-05
|
|
@@ -1223,7 +1249,7 @@ models:
|
|
|
1223
1249
|
|
|
1224
1250
|
- name: google/gemini-2.0-flash-lite-001
|
|
1225
1251
|
display_name: Gemini 2.0 Flash Lite
|
|
1226
|
-
description: Gemini 2.0 Flash Lite ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1252
|
+
description: Gemini 2.0 Flash Lite is the fastest and most cost efficient Flash model in the Gemini 2.0 series of models, a suite of highly-capable, natively multimodal models designed to power agentic systems. ([model card](https://storage.googleapis.com/model-cards/documents/gemini-2-flash.pdf), [documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1227
1253
|
creator_organization_name: Google
|
|
1228
1254
|
access: limited
|
|
1229
1255
|
release_date: 2025-03-25
|
|
@@ -1253,6 +1279,14 @@ models:
|
|
|
1253
1279
|
release_date: 2025-06-17
|
|
1254
1280
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1255
1281
|
|
|
1282
|
+
- name: google/gemini-2.5-flash-lite
|
|
1283
|
+
display_name: Gemini 2.5 Flash-Lite
|
|
1284
|
+
description: Gemini 2.5 Flash-Lite ([blog](https://blog.google/products/gemini/gemini-2-5-model-family-expands/))
|
|
1285
|
+
creator_organization_name: Google
|
|
1286
|
+
access: limited
|
|
1287
|
+
release_date: 2025-07-22
|
|
1288
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1289
|
+
|
|
1256
1290
|
- name: google/gemini-2.5-flash-preview-04-17
|
|
1257
1291
|
display_name: Gemini 2.5 Flash (04-17 preview)
|
|
1258
1292
|
description: Gemini 2.5 Flash (04-17 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
@@ -2573,6 +2607,14 @@ models:
|
|
|
2573
2607
|
release_date: 2025-05-07
|
|
2574
2608
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2575
2609
|
|
|
2610
|
+
- name: mistralai/mistral-medium-3.1
|
|
2611
|
+
display_name: Mistral Medium 3.1
|
|
2612
|
+
description: Mistral Medium 3.1 is a language model that is intended to to deliver state-of-the-art performance at lower cost. ([blog](https://mistral.ai/news/mistral-medium-3))
|
|
2613
|
+
creator_organization_name: Mistral AI
|
|
2614
|
+
access: limited
|
|
2615
|
+
release_date: 2025-05-07
|
|
2616
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2617
|
+
|
|
2576
2618
|
- name: mistralai/mistral-large-2402
|
|
2577
2619
|
display_name: Mistral Large (2402)
|
|
2578
2620
|
description: Mistral Large is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
|
|
@@ -3052,6 +3094,30 @@ models:
|
|
|
3052
3094
|
release_date: 2025-04-14
|
|
3053
3095
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3054
3096
|
|
|
3097
|
+
- name: openai/gpt-5-2025-08-07
|
|
3098
|
+
display_name: GPT-5 (2025-08-07)
|
|
3099
|
+
description: GPT-5 (2025-08-07) is a multimdodal model trained for real-world coding tasks and long-running agentic tasks. ([blog](https://openai.com/index/introducing-gpt-5-for-developers/), [system card](https://cdn.openai.com/pdf/8124a3ce-ab78-4f06-96eb-49ea29ffb52f/gpt5-system-card-aug7.pdf))
|
|
3100
|
+
creator_organization_name: OpenAI
|
|
3101
|
+
access: limited
|
|
3102
|
+
release_date: 2025-08-07
|
|
3103
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3104
|
+
|
|
3105
|
+
- name: openai/gpt-5-mini-2025-08-07
|
|
3106
|
+
display_name: GPT-5 mini (2025-08-07)
|
|
3107
|
+
description: GPT-5 mini (2025-08-07) is a multimdodal model trained for real-world coding tasks and long-running agentic tasks. ([blog](https://openai.com/index/introducing-gpt-5-for-developers/), [system card](https://cdn.openai.com/pdf/8124a3ce-ab78-4f06-96eb-49ea29ffb52f/gpt5-system-card-aug7.pdf))
|
|
3108
|
+
creator_organization_name: OpenAI
|
|
3109
|
+
access: limited
|
|
3110
|
+
release_date: 2025-08-07
|
|
3111
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3112
|
+
|
|
3113
|
+
- name: openai/gpt-5-nano-2025-08-07
|
|
3114
|
+
display_name: GPT-5 nano (2025-08-07)
|
|
3115
|
+
description: GPT-5 nano (2025-08-07) is a multimdodal model trained for real-world coding tasks and long-running agentic tasks. ([blog](https://openai.com/index/introducing-gpt-5-for-developers/), [system card](https://cdn.openai.com/pdf/8124a3ce-ab78-4f06-96eb-49ea29ffb52f/gpt5-system-card-aug7.pdf))
|
|
3116
|
+
creator_organization_name: OpenAI
|
|
3117
|
+
access: limited
|
|
3118
|
+
release_date: 2025-08-07
|
|
3119
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3120
|
+
|
|
3055
3121
|
- name: openai/whisper-1_gpt-4o-2024-11-20
|
|
3056
3122
|
display_name: Whisper-1 + GPT-4o (2024-11-20)
|
|
3057
3123
|
description: Transcribes the text with Whisper-1 and then uses GPT-4o to generate a response.
|
|
@@ -3273,6 +3339,23 @@ models:
|
|
|
3273
3339
|
release_date: 2025-06-10
|
|
3274
3340
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3275
3341
|
|
|
3342
|
+
## GPT-OSS
|
|
3343
|
+
- name: openai/gpt-oss-20b
|
|
3344
|
+
display_name: gpt-oss-20b
|
|
3345
|
+
description: gpt-oss-20b is an open-weight language model that was trained using a mix of reinforcement learning and other techniques informed by OpenAI's internal models. It uses a mixture-of-experts architecture and activates 3.6B parameters per token. ([blog](https://openai.com/index/introducing-gpt-oss/))
|
|
3346
|
+
creator_organization_name: OpenAI
|
|
3347
|
+
access: open
|
|
3348
|
+
release_date: 2025-08-05
|
|
3349
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3350
|
+
|
|
3351
|
+
- name: openai/gpt-oss-120b
|
|
3352
|
+
display_name: gpt-oss-120b
|
|
3353
|
+
description: gpt-oss-120b is an open-weight language model that was trained using a mix of reinforcement learning and other techniques informed by OpenAI's internal models. It uses a mixture-of-experts architecture and activates 5.1B parameters per token. ([blog](https://openai.com/index/introducing-gpt-oss/))
|
|
3354
|
+
creator_organization_name: OpenAI
|
|
3355
|
+
access: open
|
|
3356
|
+
release_date: 2025-08-05
|
|
3357
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3358
|
+
|
|
3276
3359
|
## Codex Models
|
|
3277
3360
|
# DEPRECATED: Codex models have been shut down on March 23 2023.
|
|
3278
3361
|
|
|
@@ -3549,6 +3632,22 @@ models:
|
|
|
3549
3632
|
release_date: 2025-04-29
|
|
3550
3633
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3551
3634
|
|
|
3635
|
+
- name: qwen/qwen3-next-80b-a3b-thinking
|
|
3636
|
+
display_name: Qwen3-Next 80B A3B Thinking
|
|
3637
|
+
description: Qwen3-Next is a new model architecture for improving training and inference efficiency under long-context and large-parameter settings. Compared to the MoE structure of Qwen3, Qwen3-Next introduces a hybrid attention mechanism, a highly sparse Mixture-of-Experts (MoE) structure, training-stability-friendly optimizations, and a multi-token prediction mechanism for faster inference. ([blog](https://qwen.ai/blog?id=4074cca80393150c248e508aa62983f9cb7d27cd&from=research.latest-advancements-list))
|
|
3638
|
+
creator_organization_name: Qwen
|
|
3639
|
+
access: open
|
|
3640
|
+
release_date: 2025-07-21 # https://x.com/Alibaba_Qwen/status/1947344511988076547
|
|
3641
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3642
|
+
|
|
3643
|
+
- name: qwen/qwen3-235b-a22b-instruct-2507-fp8
|
|
3644
|
+
display_name: Qwen3 235B A22B Instruct 2507 FP8
|
|
3645
|
+
description: Qwen3 235B A22B Instruct 2507 FP8 is an updated version of the non-thinking mode of Qwen3 235B A22B FP8.
|
|
3646
|
+
creator_organization_name: Qwen
|
|
3647
|
+
access: open
|
|
3648
|
+
release_date: 2025-07-21 # https://x.com/Alibaba_Qwen/status/1947344511988076547
|
|
3649
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3650
|
+
|
|
3552
3651
|
- name: qwen/qwq-32b-preview
|
|
3553
3652
|
display_name: QwQ (32B Preview)
|
|
3554
3653
|
description: QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities. ([blog post](https://qwenlm.github.io/blog/qwq-32b-preview/)).
|
|
@@ -3892,7 +3991,190 @@ models:
|
|
|
3892
3991
|
release_date: 2023-05-25
|
|
3893
3992
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
3894
3993
|
|
|
3994
|
+
- name: tiiuae/falcon3-1b-instruct
|
|
3995
|
+
display_name: Falcon3-1B-Instruct
|
|
3996
|
+
description: Falcon3-1B-Instruct is an open-weights foundation model that supports 4 languages (English, French, Spanish, Portuguese) that was trained on 14T tokens.
|
|
3997
|
+
creator_organization_name: TII UAE
|
|
3998
|
+
access: open
|
|
3999
|
+
num_parameters: 1670000000
|
|
4000
|
+
release_date: 2024-12-17 # https://huggingface.co/docs/transformers/main/en/model_doc/falcon3
|
|
4001
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4002
|
+
|
|
4003
|
+
- name: tiiuae/falcon3-3b-instruct
|
|
4004
|
+
display_name: Falcon3-3B-Instruct
|
|
4005
|
+
description: Falcon3-3B-Instruct is an open-weights foundation model that supports 4 languages (English, French, Spanish, Portuguese) that was trained on 14T tokens.
|
|
4006
|
+
creator_organization_name: TII UAE
|
|
4007
|
+
access: open
|
|
4008
|
+
num_parameters: 3230000000
|
|
4009
|
+
release_date: 2024-12-17 # https://huggingface.co/docs/transformers/main/en/model_doc/falcon3
|
|
4010
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4011
|
+
|
|
4012
|
+
- name: tiiuae/falcon3-7b-instruct
|
|
4013
|
+
display_name: Falcon3-7B-Instruct
|
|
4014
|
+
description: Falcon3-7B-Instruct is an open-weights foundation model that supports 4 languages (English, French, Spanish, Portuguese) that was trained on 14T tokens.
|
|
4015
|
+
creator_organization_name: TII UAE
|
|
4016
|
+
access: open
|
|
4017
|
+
num_parameters: 7460000000
|
|
4018
|
+
release_date: 2024-12-17 # https://huggingface.co/docs/transformers/main/en/model_doc/falcon3
|
|
4019
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4020
|
+
|
|
4021
|
+
- name: tiiuae/falcon3-10b-instruct
|
|
4022
|
+
display_name: Falcon3-10B-Instruct
|
|
4023
|
+
description: Falcon3-10B-Instruct is an open-weights foundation model that supports 4 languages (English, French, Spanish, Portuguese) that was trained on 14T tokens.
|
|
4024
|
+
creator_organization_name: TII UAE
|
|
4025
|
+
access: open
|
|
4026
|
+
num_parameters: 10300000000
|
|
4027
|
+
release_date: 2024-12-17 # https://huggingface.co/docs/transformers/main/en/model_doc/falcon3
|
|
4028
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4029
|
+
|
|
4030
|
+
# AceGPT-v2
|
|
4031
|
+
- name: freedomintelligence/acegpt-v2-8b-chat
|
|
4032
|
+
display_name: AceGPT-v2-8B-Chat
|
|
4033
|
+
description: AceGPT is a fully fine-tuned generative text model collection, particularly focused on the Arabic language domain. AceGPT-v2-8B-Chat is based on Meta-Llama-3-8B. ([paper](https://arxiv.org/abs/2412.12310))
|
|
4034
|
+
creator_organization_name: FreedomAI
|
|
4035
|
+
access: open
|
|
4036
|
+
num_parameters: 8030000000
|
|
4037
|
+
release_date: 2024-10-20
|
|
4038
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4039
|
+
|
|
4040
|
+
- name: freedomintelligence/acegpt-v2-32b-chat
|
|
4041
|
+
display_name: AceGPT-v2-32B-Chat
|
|
4042
|
+
description: AceGPT is a fully fine-tuned generative text model collection, particularly focused on the Arabic language domain. AceGPT-v2-32B-Chat is based on Qwen1.5-32B. ([paper](https://arxiv.org/abs/2412.12310))
|
|
4043
|
+
creator_organization_name: FreedomAI
|
|
4044
|
+
access: open
|
|
4045
|
+
num_parameters: 32500000000
|
|
4046
|
+
release_date: 2024-10-20
|
|
4047
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4048
|
+
|
|
4049
|
+
- name: freedomintelligence/acegpt-v2-70b-chat
|
|
4050
|
+
display_name: AceGPT-v2-70B-Chat
|
|
4051
|
+
description: AceGPT is a fully fine-tuned generative text model collection, particularly focused on the Arabic language domain. AceGPT-v2-70B-Chat is based on Meta-Llama-3-70B. ([paper](https://arxiv.org/abs/2412.12310))
|
|
4052
|
+
creator_organization_name: FreedomAI
|
|
4053
|
+
access: open
|
|
4054
|
+
num_parameters: 70600000000
|
|
4055
|
+
release_date: 2024-10-20
|
|
4056
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4057
|
+
|
|
4058
|
+
# ALLaM
|
|
4059
|
+
- name: allam-ai/allam-7b-instruct-preview
|
|
4060
|
+
display_name: ALLaM-7B-Instruct-preview
|
|
4061
|
+
description: ALLaM-7B-Instruct-preview is a model designed to advance Arabic language technology, which used a recipe of training on 4T English tokens followed by training on 1.2T mixed Arabic/English tokens. ([paper](https://arxiv.org/abs/2407.15390v1))
|
|
4062
|
+
creator_organization_name: NCAI & SDAIA
|
|
4063
|
+
access: open
|
|
4064
|
+
num_parameters: 7000000000
|
|
4065
|
+
release_date: 2024-07-22
|
|
4066
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4067
|
+
|
|
4068
|
+
# SILMA
|
|
4069
|
+
- name: silma-ai/silma-9b-instruct-v1.0
|
|
4070
|
+
display_name: SILMA 9B
|
|
4071
|
+
description: SILMA 9B is a compact Arabic language model based on Google Gemma. ([model card](https://huggingface.co/silma-ai/SILMA-9B-Instruct-v1.0))
|
|
4072
|
+
creator_organization_name: SILMA AI
|
|
4073
|
+
access: open
|
|
4074
|
+
num_parameters: 9240000000
|
|
4075
|
+
release_date: 2024-08-17
|
|
4076
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4077
|
+
|
|
4078
|
+
# Jais Family
|
|
4079
|
+
|
|
4080
|
+
- name: inceptionai/jais-family-590m-chat
|
|
4081
|
+
display_name: Jais-family-590m-chat
|
|
4082
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4083
|
+
creator_organization_name: Inception
|
|
4084
|
+
access: open
|
|
4085
|
+
num_parameters: 771000000
|
|
4086
|
+
release_date: 2023-08-30
|
|
4087
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4088
|
+
|
|
4089
|
+
- name: inceptionai/jais-family-1p3b-chat
|
|
4090
|
+
display_name: Jais-family-1p3b-chat
|
|
4091
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4092
|
+
creator_organization_name: Inception
|
|
4093
|
+
access: open
|
|
4094
|
+
num_parameters: 1560000000
|
|
4095
|
+
release_date: 2023-08-30
|
|
4096
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4097
|
+
|
|
4098
|
+
- name: inceptionai/jais-family-2p7b-chat
|
|
4099
|
+
display_name: Jais-family-2p7b-chat
|
|
4100
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4101
|
+
creator_organization_name: Inception
|
|
4102
|
+
access: open
|
|
4103
|
+
num_parameters: 2950000000
|
|
4104
|
+
release_date: 2023-08-30
|
|
4105
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4106
|
+
|
|
4107
|
+
- name: inceptionai/jais-family-6p7b-chat
|
|
4108
|
+
display_name: Jais-family-6p7b-chat
|
|
4109
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4110
|
+
creator_organization_name: Inception
|
|
4111
|
+
access: open
|
|
4112
|
+
num_parameters: 7140000000
|
|
4113
|
+
release_date: 2023-08-30
|
|
4114
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4115
|
+
|
|
4116
|
+
- name: inceptionai/jais-family-6p7b-chat
|
|
4117
|
+
display_name: Jais-family-6p7b-chat
|
|
4118
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4119
|
+
creator_organization_name: Inception
|
|
4120
|
+
access: open
|
|
4121
|
+
num_parameters: 7140000000
|
|
4122
|
+
release_date: 2023-08-30
|
|
4123
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4124
|
+
|
|
4125
|
+
- name: inceptionai/jais-family-13b-chat
|
|
4126
|
+
display_name: Jais-family-13b-chat
|
|
4127
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4128
|
+
creator_organization_name: Inception
|
|
4129
|
+
access: open
|
|
4130
|
+
num_parameters: 13500000000
|
|
4131
|
+
release_date: 2023-08-30
|
|
4132
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3895
4133
|
|
|
4134
|
+
- name: inceptionai/jais-family-30b-8k-chat
|
|
4135
|
+
display_name: Jais-family-30b-8k-chat
|
|
4136
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4137
|
+
creator_organization_name: Inception
|
|
4138
|
+
access: open
|
|
4139
|
+
num_parameters: 30800000000
|
|
4140
|
+
release_date: 2023-08-30
|
|
4141
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4142
|
+
|
|
4143
|
+
- name: inceptionai/jais-family-30b-16k-chat
|
|
4144
|
+
display_name: Jais-family-30b-16k-chat
|
|
4145
|
+
description: The Jais family of models is a series of bilingual English-Arabic large language models (LLMs) that are trained from scratch and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4146
|
+
creator_organization_name: Inception
|
|
4147
|
+
access: open
|
|
4148
|
+
num_parameters: 30800000000
|
|
4149
|
+
release_date: 2023-08-30
|
|
4150
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4151
|
+
|
|
4152
|
+
- name: inceptionai/jais-adapted-7b-chat
|
|
4153
|
+
display_name: Jais-adapted-7b-chat
|
|
4154
|
+
description: The Jais adapted models are bilingual English-Arabic large language models (LLMs) that are trained adaptively from Llama-2 and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4155
|
+
creator_organization_name: Inception
|
|
4156
|
+
access: open
|
|
4157
|
+
num_parameters: 7000000000
|
|
4158
|
+
release_date: 2023-08-30
|
|
4159
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4160
|
+
|
|
4161
|
+
- name: inceptionai/jais-adapted-13b-chat
|
|
4162
|
+
display_name: Jais-adapted-13b-chat
|
|
4163
|
+
description: The Jais adapted models are bilingual English-Arabic large language models (LLMs) that are trained adaptively from Llama-2 and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4164
|
+
creator_organization_name: Inception
|
|
4165
|
+
access: open
|
|
4166
|
+
num_parameters: 13300000000
|
|
4167
|
+
release_date: 2023-08-30
|
|
4168
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4169
|
+
|
|
4170
|
+
- name: inceptionai/jais-adapted-70b-chat
|
|
4171
|
+
display_name: Jais-adapted-70b-chat
|
|
4172
|
+
description: The Jais adapted models are bilingual English-Arabic large language models (LLMs) that are trained adaptively from Llama-2 and optimized to excel in Arabic while having strong English capabilities. ([website](https://inceptionai.ai/jaisfamily/index.html), [blog](https://mbzuai.ac.ae/news/meet-jais-the-worlds-most-advanced-arabic-large-language-model-open-sourced-by-g42s-inception/))
|
|
4173
|
+
creator_organization_name: Inception
|
|
4174
|
+
access: open
|
|
4175
|
+
num_parameters: 69500000000
|
|
4176
|
+
release_date: 2023-08-30
|
|
4177
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3896
4178
|
|
|
3897
4179
|
# Together
|
|
3898
4180
|
- name: together/gpt-jt-6b-v1
|
|
@@ -4315,6 +4597,17 @@ models:
|
|
|
4315
4597
|
release_date: 2025-05-08
|
|
4316
4598
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4317
4599
|
|
|
4600
|
+
# Z.ai
|
|
4601
|
+
|
|
4602
|
+
- name: zai-org/glm-4.5-air-fp8
|
|
4603
|
+
display_name: GLM-4.5-Air-FP8
|
|
4604
|
+
description: GLM-4.5-Air-FP8 is a hybrid reasoning model designed to unify reasoning, coding, and agentic capabilities into a single model. It has 106 billion total parameters and 12 billion active parameters. The thinking mode is enabled by default. ([blog](https://z.ai/blog/glm-4.5))
|
|
4605
|
+
creator_organization_name: Z.ai
|
|
4606
|
+
access: open
|
|
4607
|
+
num_parameters: 110000000000
|
|
4608
|
+
release_date: 2025-07-28
|
|
4609
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4610
|
+
|
|
4318
4611
|
|
|
4319
4612
|
# Granite - IBM
|
|
4320
4613
|
# https://www.ibm.com/granite
|
|
@@ -4530,7 +4823,7 @@ models:
|
|
|
4530
4823
|
|
|
4531
4824
|
- name: ibm/granite-3.3-8b-instruct
|
|
4532
4825
|
display_name: IBM Granite 3.3 8B Instruct
|
|
4533
|
-
description: IBM Granite 3.3 8B Instruct is
|
|
4826
|
+
description: IBM Granite 3.3 8B Instruct is an 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities. ([model card](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct))
|
|
4534
4827
|
creator_organization_name: IBM
|
|
4535
4828
|
access: open
|
|
4536
4829
|
num_parameters: 8170000000
|
|
@@ -4539,7 +4832,7 @@ models:
|
|
|
4539
4832
|
|
|
4540
4833
|
- name: ibm/granite-3.3-8b-instruct-with-guardian
|
|
4541
4834
|
display_name: IBM Granite 3.3 8B Instruct (with guardian)
|
|
4542
|
-
description: IBM Granite 3.3 8B Instruct is
|
|
4835
|
+
description: IBM Granite 3.3 8B Instruct is an 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities. All prompts were first evaluated for risk by [IBM Granite Guardian 3.2 5B](https://www.ibm.com/granite/docs/models/guardian/) and prompts that were deemed risky (with a risk threshold of 0.8) received the response "I'm very sorry, but I can't assist with that.". ([model card](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct))
|
|
4543
4836
|
creator_organization_name: IBM
|
|
4544
4837
|
access: open
|
|
4545
4838
|
num_parameters: 8170000000
|
|
@@ -460,7 +460,7 @@ tokenizer_configs:
|
|
|
460
460
|
|
|
461
461
|
# Allen Institute for AI
|
|
462
462
|
# The allenai/olmo-7b requires Python 3.9 or newer.
|
|
463
|
-
# To use the allenai/olmo-7b tokenizer, run `pip install crfm-helm[allenai]` first.
|
|
463
|
+
# To use the allenai/olmo-7b tokenizer, run `pip install "crfm-helm[allenai]"` first.
|
|
464
464
|
- name: allenai/olmo-7b
|
|
465
465
|
tokenizer_spec:
|
|
466
466
|
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
@@ -650,6 +650,12 @@ tokenizer_configs:
|
|
|
650
650
|
end_of_text_token: "<|endoftext|>"
|
|
651
651
|
prefix_token: "<|endoftext|>"
|
|
652
652
|
|
|
653
|
+
- name: openai/o200k_harmony
|
|
654
|
+
tokenizer_spec:
|
|
655
|
+
class_name: "helm.tokenizers.tiktoken_tokenizer.TiktokenTokenizer"
|
|
656
|
+
end_of_text_token: "<|endoftext|>"
|
|
657
|
+
prefix_token: "<|startoftext|>"
|
|
658
|
+
|
|
653
659
|
- name: openai/clip-vit-large-patch14
|
|
654
660
|
tokenizer_spec:
|
|
655
661
|
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
@@ -705,6 +711,18 @@ tokenizer_configs:
|
|
|
705
711
|
end_of_text_token: "<|im_end|>"
|
|
706
712
|
prefix_token: "<|im_start|>"
|
|
707
713
|
|
|
714
|
+
- name: qwen/qwen3-235b-a22b-instruct-2507-fp8
|
|
715
|
+
tokenizer_spec:
|
|
716
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
717
|
+
end_of_text_token: "<|im_end|>"
|
|
718
|
+
prefix_token: ""
|
|
719
|
+
|
|
720
|
+
- name: qwen/qwen3-next-80b-a3b-thinking
|
|
721
|
+
tokenizer_spec:
|
|
722
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
723
|
+
end_of_text_token: "<|im_end|>"
|
|
724
|
+
prefix_token: ""
|
|
725
|
+
|
|
708
726
|
- name: qwen/qwq-32b-preview
|
|
709
727
|
tokenizer_spec:
|
|
710
728
|
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
@@ -785,6 +803,12 @@ tokenizer_configs:
|
|
|
785
803
|
end_of_text_token: "<|endoftext|>"
|
|
786
804
|
prefix_token: ""
|
|
787
805
|
|
|
806
|
+
- name: tiiuae/falcon3-1b-instruct
|
|
807
|
+
tokenizer_spec:
|
|
808
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
809
|
+
end_of_text_token: "<|endoftext|>"
|
|
810
|
+
prefix_token: ""
|
|
811
|
+
|
|
788
812
|
# TsinghuaKEG
|
|
789
813
|
- name: TsinghuaKEG/ice
|
|
790
814
|
tokenizer_spec:
|
|
@@ -1048,7 +1072,6 @@ tokenizer_configs:
|
|
|
1048
1072
|
end_of_text_token: ""
|
|
1049
1073
|
|
|
1050
1074
|
# IBM Granite 3.3
|
|
1051
|
-
|
|
1052
1075
|
- name: ibm/granite-3.3-8b-instruct
|
|
1053
1076
|
tokenizer_spec:
|
|
1054
1077
|
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
@@ -1057,7 +1080,12 @@ tokenizer_configs:
|
|
|
1057
1080
|
end_of_text_token: "<|end_of_text|>"
|
|
1058
1081
|
prefix_token: "<|end_of_text|>"
|
|
1059
1082
|
|
|
1060
|
-
|
|
1083
|
+
# Z.ai GLM-4.5-AIR-FP8
|
|
1084
|
+
- name: zai-org/glm-4.5-air-fp8
|
|
1085
|
+
tokenizer_spec:
|
|
1086
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1087
|
+
end_of_text_token: "<|endoftext|>"
|
|
1088
|
+
prefix_token: ""
|
|
1061
1089
|
|
|
1062
1090
|
# DeepSeek-R1-Distill-Llama-3.1-8b
|
|
1063
1091
|
- name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
|
|
@@ -1068,6 +1096,20 @@ tokenizer_configs:
|
|
|
1068
1096
|
end_of_text_token: "<|end▁of▁sentence|>"
|
|
1069
1097
|
prefix_token: "<|begin▁of▁sentence|>"
|
|
1070
1098
|
|
|
1099
|
+
# DeepSeek-R1-Distill-Llama-3.1-8b
|
|
1100
|
+
- name: deepseek-ai/deepseek-r1-distill-llama-70b
|
|
1101
|
+
tokenizer_spec:
|
|
1102
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1103
|
+
end_of_text_token: "<|end▁of▁sentence|>"
|
|
1104
|
+
prefix_token: "<|begin▁of▁sentence|>"
|
|
1105
|
+
|
|
1106
|
+
# DeepSeek-R1-Distill-Qwen-14B
|
|
1107
|
+
- name: deepseek-ai/deepseek-r1-distill-qwen-14b
|
|
1108
|
+
tokenizer_spec:
|
|
1109
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1110
|
+
end_of_text_token: "<|end▁of▁sentence|>"
|
|
1111
|
+
prefix_token: "<|begin▁of▁sentence|>"
|
|
1112
|
+
|
|
1071
1113
|
# deepseek-ai/deepseek-coder-6.7b-instruct
|
|
1072
1114
|
- name: deepseek-ai/deepseek-coder-6.7b-instruct
|
|
1073
1115
|
tokenizer_spec:
|
|
@@ -1077,7 +1119,6 @@ tokenizer_configs:
|
|
|
1077
1119
|
end_of_text_token: "<|end▁of▁sentence|>"
|
|
1078
1120
|
prefix_token: "<|begin▁of▁sentence|>"
|
|
1079
1121
|
|
|
1080
|
-
|
|
1081
1122
|
# vilm/vinallama-2.7b-chat
|
|
1082
1123
|
- name: vilm/vinallama-2.7b-chat
|
|
1083
1124
|
tokenizer_spec:
|
|
@@ -1185,3 +1226,50 @@ tokenizer_configs:
|
|
|
1185
1226
|
pretrained_model_name_or_path: nicholasKluge/TeenyTinyLlama-460m
|
|
1186
1227
|
end_of_text_token: "</s>"
|
|
1187
1228
|
prefix_token: "<s>"
|
|
1229
|
+
|
|
1230
|
+
# AceGPT-v2
|
|
1231
|
+
- name: freedomintelligence/acegpt-v2-8b-chat
|
|
1232
|
+
tokenizer_spec:
|
|
1233
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1234
|
+
end_of_text_token: "<|end_of_text|>"
|
|
1235
|
+
prefix_token: "<|begin_of_text|>"
|
|
1236
|
+
|
|
1237
|
+
- name: freedomintelligence/acegpt-v2-32b-chat
|
|
1238
|
+
tokenizer_spec:
|
|
1239
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1240
|
+
end_of_text_token: "<|endoftext|>"
|
|
1241
|
+
prefix_token: ""
|
|
1242
|
+
|
|
1243
|
+
- name: freedomintelligence/acegpt-v2-70b-chat
|
|
1244
|
+
tokenizer_spec:
|
|
1245
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1246
|
+
end_of_text_token: "<|end_of_text|>"
|
|
1247
|
+
prefix_token: "<|begin_of_text|>"
|
|
1248
|
+
|
|
1249
|
+
# ALLaM
|
|
1250
|
+
- name: allam-ai/allam-7b-instruct-preview
|
|
1251
|
+
tokenizer_spec:
|
|
1252
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1253
|
+
end_of_text_token: "</s>"
|
|
1254
|
+
prefix_token: "<s>"
|
|
1255
|
+
|
|
1256
|
+
# SILMA
|
|
1257
|
+
- name: silma-ai/silma-9b-instruct-v1.0
|
|
1258
|
+
tokenizer_spec:
|
|
1259
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1260
|
+
end_of_text_token: "<eos>"
|
|
1261
|
+
prefix_token: "<bos>"
|
|
1262
|
+
|
|
1263
|
+
# Jais Family
|
|
1264
|
+
- name: inceptionai/jais-family-590m-chat
|
|
1265
|
+
tokenizer_spec:
|
|
1266
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1267
|
+
end_of_text_token: "<|endoftext|>"
|
|
1268
|
+
prefix_token: "<|endoftext|>"
|
|
1269
|
+
|
|
1270
|
+
# Jais Adapted
|
|
1271
|
+
- name: inceptionai/jais-adapted-7b-chat
|
|
1272
|
+
tokenizer_spec:
|
|
1273
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
1274
|
+
end_of_text_token: "</s>"
|
|
1275
|
+
prefix_token: "<s>"
|
helm/proxy/example_queries.py
CHANGED
|
@@ -21,7 +21,7 @@ example_queries = [
|
|
|
21
21
|
"""
|
|
22
22
|
temperature: 0.5 # Medium amount of randomness
|
|
23
23
|
stop_sequences: [.] # Stop when you hit a period
|
|
24
|
-
model: openai/gpt-
|
|
24
|
+
model: openai/gpt-4.1-nano-2025-04-14
|
|
25
25
|
"""
|
|
26
26
|
),
|
|
27
27
|
environments="",
|
|
@@ -33,7 +33,7 @@ example_queries = [
|
|
|
33
33
|
temperature: 0.5 # Medium amount of randomness
|
|
34
34
|
stop_sequences: [\\n] # Stop when you hit a newline
|
|
35
35
|
num_completions: 5 # Generate many samples
|
|
36
|
-
model: openai/gpt-
|
|
36
|
+
model: openai/gpt-4.1-nano-2025-04-14
|
|
37
37
|
"""
|
|
38
38
|
),
|
|
39
39
|
environments="",
|
|
@@ -58,7 +58,7 @@ example_queries = [
|
|
|
58
58
|
"""
|
|
59
59
|
temperature: 0 # Deterministic
|
|
60
60
|
max_tokens: 50
|
|
61
|
-
model: openai/gpt-
|
|
61
|
+
model: openai/gpt-4.1-nano-2025-04-14
|
|
62
62
|
"""
|
|
63
63
|
),
|
|
64
64
|
environments="",
|
|
@@ -76,7 +76,7 @@ example_queries = [
|
|
|
76
76
|
environments=dedent(
|
|
77
77
|
"""
|
|
78
78
|
occupation: [mathematician, lawyer, doctor]
|
|
79
|
-
model: [openai/gpt-
|
|
79
|
+
model: [openai/gpt-4.1-nano-2025-04-14, openai/gpt-4.1-mini-2025-04-14]
|
|
80
80
|
"""
|
|
81
81
|
),
|
|
82
82
|
),
|
|
@@ -101,7 +101,7 @@ example_queries = [
|
|
|
101
101
|
),
|
|
102
102
|
environments=dedent(
|
|
103
103
|
"""
|
|
104
|
-
model: [openai/gpt-
|
|
104
|
+
model: [openai/gpt-4.1-nano-2025-04-14, openai/gpt-4.1-mini-2025-04-14]
|
|
105
105
|
"""
|
|
106
106
|
),
|
|
107
107
|
),
|
|
@@ -136,7 +136,7 @@ example_queries = [
|
|
|
136
136
|
),
|
|
137
137
|
environments=dedent(
|
|
138
138
|
"""
|
|
139
|
-
model: [openai/gpt-
|
|
139
|
+
model: [openai/gpt-4.1-nano-2025-04-14, openai/gpt-4.1-mini-2025-04-14]
|
|
140
140
|
"""
|
|
141
141
|
),
|
|
142
142
|
),
|
|
@@ -144,7 +144,7 @@ example_queries = [
|
|
|
144
144
|
prompt="Write a Python function that takes two vectors a and b and returns their Euclidean distance.",
|
|
145
145
|
settings=dedent(
|
|
146
146
|
"""
|
|
147
|
-
model: openai/gpt-
|
|
147
|
+
model: openai/gpt-4.1-nano-2025-04-14
|
|
148
148
|
"""
|
|
149
149
|
),
|
|
150
150
|
environments="",
|
|
@@ -161,7 +161,7 @@ example_queries = [
|
|
|
161
161
|
),
|
|
162
162
|
environments=dedent(
|
|
163
163
|
"""
|
|
164
|
-
model: [openai/gpt-
|
|
164
|
+
model: [openai/gpt-4.1-nano-2025-04-14, openai/gpt-4.1-mini-2025-04-14]
|
|
165
165
|
"""
|
|
166
166
|
),
|
|
167
167
|
),
|