PyPI - crfm-helm - Versions diffs - 0.5.5__py3-none-any.whl → 0.5.7__py3-none-any.whl - Mend

crfm-helm 0.5.5py3-none-any.whl → 0.5.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (268) hide show

{crfm_helm-0.5.5.dist-info → crfm_helm-0.5.7.dist-info}/METADATA +74 -53
{crfm_helm-0.5.5.dist-info → crfm_helm-0.5.7.dist-info}/RECORD +262 -182
{crfm_helm-0.5.5.dist-info → crfm_helm-0.5.7.dist-info}/WHEEL +1 -1
helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +3 -3
helm/benchmark/adaptation/adapters/test_adapter.py +4 -4
helm/benchmark/annotation/air_bench_annotator.py +2 -2
helm/benchmark/annotation/bigcodebench_annotator.py +3 -3
helm/benchmark/annotation/bird_sql_annotator.py +2 -2
helm/benchmark/annotation/chw_care_plan_annotator.py +7 -12
helm/benchmark/annotation/ehr_sql_annotator.py +2 -2
helm/benchmark/annotation/helpdesk_call_summarization_annotator.py +7 -7
helm/benchmark/annotation/live_qa_annotator.py +1 -1
helm/benchmark/annotation/mimic_bhc_annotator.py +100 -0
helm/benchmark/annotation/model_as_judge.py +12 -16
helm/benchmark/annotation/omni_math_annotator.py +13 -14
helm/benchmark/annotation/wildbench_annotator.py +9 -9
helm/benchmark/executor.py +11 -12
helm/benchmark/metrics/aci_bench_metrics.py +9 -29
helm/benchmark/metrics/bias_word_lists.py +1 -1
helm/benchmark/metrics/chw_care_plan_metrics.py +10 -30
helm/benchmark/metrics/classification_metrics.py +3 -3
helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
helm/benchmark/metrics/codeinsights_code_efficiency_metrics.py +186 -0
helm/benchmark/metrics/codeinsights_code_evaluation_metrics.py +477 -0
helm/benchmark/metrics/codeinsights_correct_code_metrics.py +366 -0
helm/benchmark/metrics/codeinsights_edge_case_metrics.py +92 -0
helm/benchmark/metrics/codeinsights_metric_specs.py +51 -0
helm/benchmark/metrics/comet_metric.py +1 -1
helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +2 -2
helm/benchmark/metrics/copyright_metrics.py +1 -1
helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +1 -1
helm/benchmark/metrics/dischargeme_metrics.py +9 -29
helm/benchmark/metrics/efficiency_metrics.py +3 -3
helm/benchmark/metrics/evaluate_reference_metrics.py +1 -1
helm/benchmark/metrics/gpt4_audio_refusal_metrics.py +145 -0
helm/benchmark/metrics/ifeval_metrics.py +2 -2
helm/benchmark/metrics/image_generation/clip_score_metrics.py +13 -2
helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +1 -1
helm/benchmark/metrics/kpi_edgar_metrics.py +121 -0
helm/benchmark/metrics/llm_jury_metrics.py +46 -0
helm/benchmark/metrics/lmkt_metric_specs.py +12 -0
helm/benchmark/metrics/lmkt_metrics.py +47 -0
helm/benchmark/metrics/med_dialog_metrics.py +9 -29
helm/benchmark/metrics/medalign_metrics.py +9 -29
helm/benchmark/metrics/medi_qa_metrics.py +9 -29
helm/benchmark/metrics/medication_qa_metrics.py +10 -30
helm/benchmark/metrics/melt_bias_metric.py +234 -0
helm/benchmark/metrics/melt_bias_word_lists.py +1367 -0
helm/benchmark/metrics/melt_metric_specs.py +43 -0
helm/benchmark/metrics/melt_toxicity_metric.py +107 -0
helm/benchmark/metrics/mental_health_metrics.py +9 -29
helm/benchmark/metrics/metric_service.py +11 -11
helm/benchmark/metrics/mimic_bhc_metrics.py +14 -0
helm/benchmark/metrics/mimic_rrs_metrics.py +9 -29
helm/benchmark/metrics/mtsamples_procedures_metrics.py +9 -29
helm/benchmark/metrics/mtsamples_replicate_metrics.py +9 -29
helm/benchmark/metrics/openai_mrcr_metrics.py +52 -0
helm/benchmark/metrics/ruler_qa_metrics.py +34 -0
helm/benchmark/metrics/starr_patient_instructions_metrics.py +9 -29
helm/benchmark/metrics/summac/model_summac.py +2 -3
helm/benchmark/metrics/summarization_metrics.py +2 -1
helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +2 -2
helm/benchmark/metrics/toxicity_metrics.py +2 -2
helm/benchmark/metrics/unitxt_metrics.py +3 -4
helm/benchmark/metrics/vision_language/emd_utils.py +4 -2
helm/benchmark/metrics/vision_language/image_utils.py +2 -2
helm/benchmark/model_deployment_registry.py +16 -26
helm/benchmark/presentation/contamination.py +3 -3
helm/benchmark/presentation/create_plots.py +43 -13
helm/benchmark/presentation/run_display.py +13 -0
helm/benchmark/presentation/schema.py +7 -1
helm/benchmark/presentation/summarize.py +84 -61
helm/benchmark/presentation/test_create_plots.py +4 -1
helm/benchmark/reeval_run.py +3 -4
helm/benchmark/reeval_runner.py +3 -3
helm/benchmark/run.py +84 -73
helm/benchmark/run_expander.py +12 -1
helm/benchmark/run_spec_factory.py +7 -6
helm/benchmark/run_specs/arabic_run_specs.py +73 -0
helm/benchmark/run_specs/audio_run_specs.py +52 -8
helm/benchmark/run_specs/bluex_run_specs.py +40 -0
helm/benchmark/run_specs/classic_run_specs.py +0 -53
helm/benchmark/run_specs/codeinsights_run_specs.py +192 -0
helm/benchmark/run_specs/enterprise_run_specs.py +20 -0
helm/benchmark/run_specs/experimental_run_specs.py +31 -1
helm/benchmark/run_specs/healthqa_br_run_specs.py +40 -0
helm/benchmark/run_specs/heim_run_specs.py +3 -1
helm/benchmark/run_specs/lmkt_run_specs.py +144 -0
helm/benchmark/run_specs/long_context_run_specs.py +114 -15
helm/benchmark/run_specs/medhelm_run_specs.py +146 -41
helm/benchmark/run_specs/melt_run_specs.py +783 -0
helm/benchmark/run_specs/multilingual_run_specs.py +50 -0
helm/benchmark/run_specs/speech_disorder_audio_run_specs.py +163 -0
helm/benchmark/run_specs/vlm_run_specs.py +28 -0
helm/benchmark/runner.py +5 -5
helm/benchmark/scenarios/aci_bench_scenario.py +7 -1
helm/benchmark/scenarios/alghafa_scenario.py +126 -0
helm/benchmark/scenarios/arabic_mmlu_scenario.py +78 -0
helm/benchmark/scenarios/aratrust_scenario.py +76 -0
helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py +3 -1
helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py +5 -5
helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py +1 -1
helm/benchmark/scenarios/audio_language/corebench_scenario.py +77 -0
helm/benchmark/scenarios/audio_language/mustard_scenario.py +1 -1
helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification_scenario.py +104 -0
helm/benchmark/scenarios/audio_language/ultra_suite_asr_transcription_scenario.py +99 -0
helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py +118 -0
helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py +86 -0
helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py +117 -0
helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py +15 -1
helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py +1 -2
helm/benchmark/scenarios/autobencher_capabilities_scenario.py +2 -2
helm/benchmark/scenarios/bluex_scenario.py +66 -0
helm/benchmark/scenarios/chw_care_plan_scenario.py +14 -13
helm/benchmark/scenarios/clear_scenario.py +11 -7
helm/benchmark/scenarios/cleva_scenario.py +1 -1
helm/benchmark/scenarios/codeinsights_code_efficiency_scenario.py +197 -0
helm/benchmark/scenarios/codeinsights_correct_code_scenario.py +78 -0
helm/benchmark/scenarios/codeinsights_edge_case_scenario.py +192 -0
helm/benchmark/scenarios/codeinsights_student_coding_scenario.py +162 -0
helm/benchmark/scenarios/codeinsights_student_mistake_scenario.py +188 -0
helm/benchmark/scenarios/dischargeme_scenario.py +36 -21
helm/benchmark/scenarios/ehr_sql_scenario.py +7 -1
helm/benchmark/scenarios/ehrshot_scenario.py +28 -55
helm/benchmark/scenarios/exams_multilingual_scenario.py +115 -0
helm/benchmark/scenarios/grammar.py +2 -2
helm/benchmark/scenarios/headqa_scenario.py +6 -1
helm/benchmark/scenarios/healthqa_br_scenario.py +80 -0
helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py +90 -0
helm/benchmark/scenarios/infinite_bench_en_qa_scenario.py +85 -0
helm/benchmark/scenarios/{infinite_bench_sum_scenario.py → infinite_bench_en_sum_scenario.py} +10 -13
helm/benchmark/scenarios/kpi_edgar_scenario.py +151 -0
helm/benchmark/scenarios/lmkt_scenarios.py +288 -0
helm/benchmark/scenarios/math_scenario.py +21 -20
helm/benchmark/scenarios/med_dialog_scenario.py +6 -1
helm/benchmark/scenarios/medalign_scenario.py +9 -3
helm/benchmark/scenarios/medalign_scenario_helper.py +27 -130
helm/benchmark/scenarios/medbullets_scenario.py +7 -2
helm/benchmark/scenarios/medcalc_bench_scenario.py +4 -2
helm/benchmark/scenarios/medec_scenario.py +6 -1
helm/benchmark/scenarios/medhallu_scenario.py +7 -1
helm/benchmark/scenarios/medi_qa_scenario.py +10 -4
helm/benchmark/scenarios/medication_qa_scenario.py +7 -1
helm/benchmark/scenarios/melt_ir_scenario.py +171 -0
helm/benchmark/scenarios/melt_knowledge_scenario.py +246 -0
helm/benchmark/scenarios/melt_lm_scenarios.py +252 -0
helm/benchmark/scenarios/melt_scenarios.py +793 -0
helm/benchmark/scenarios/melt_srn_scenario.py +342 -0
helm/benchmark/scenarios/melt_synthetic_reasoning_scenario.py +222 -0
helm/benchmark/scenarios/melt_translation_scenario.py +152 -0
helm/benchmark/scenarios/mental_health_scenario.py +16 -5
helm/benchmark/scenarios/mimic_bhc_scenario.py +13 -8
helm/benchmark/scenarios/mimic_rrs_scenario.py +17 -8
helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +14 -8
helm/benchmark/scenarios/mmlu_pro_scenario.py +1 -1
helm/benchmark/scenarios/mmmlu_scenario.py +85 -0
helm/benchmark/scenarios/mtsamples_procedures_scenario.py +5 -2
helm/benchmark/scenarios/mtsamples_replicate_scenario.py +3 -2
helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +11 -5
helm/benchmark/scenarios/openai_mrcr_scenario.py +79 -0
helm/benchmark/scenarios/pubmed_qa_scenario.py +6 -1
helm/benchmark/scenarios/race_based_med_scenario.py +18 -8
helm/benchmark/scenarios/ruler_qa_scenario_helper.py +2 -2
helm/benchmark/scenarios/ruler_qa_scenarios.py +2 -2
helm/benchmark/scenarios/seahelm_scenario.py +2 -2
helm/benchmark/scenarios/shc_bmt_scenario.py +12 -6
helm/benchmark/scenarios/shc_cdi_scenario.py +11 -6
helm/benchmark/scenarios/shc_conf_scenario.py +12 -6
helm/benchmark/scenarios/shc_ent_scenario.py +11 -6
helm/benchmark/scenarios/shc_gip_scenario.py +13 -5
helm/benchmark/scenarios/shc_privacy_scenario.py +78 -0
helm/benchmark/scenarios/shc_proxy_scenario.py +76 -0
helm/benchmark/scenarios/shc_ptbm_scenario.py +12 -7
helm/benchmark/scenarios/shc_sei_scenario.py +12 -7
helm/benchmark/scenarios/shc_sequoia_scenario.py +13 -5
helm/benchmark/scenarios/starr_patient_instructions_scenario.py +15 -8
helm/benchmark/scenarios/test_alghafa_scenario.py +29 -0
helm/benchmark/scenarios/test_aratrust_scenario.py +21 -0
helm/benchmark/scenarios/test_bluex_scenario.py +59 -0
helm/benchmark/scenarios/test_exams_multilingual_scenario.py +29 -0
helm/benchmark/scenarios/test_healtha_br_scenario.py +57 -0
helm/benchmark/scenarios/test_infinite_bench_en_qa_scenario.py +18 -0
helm/benchmark/scenarios/test_infinite_bench_en_sum_scenario.py +31 -0
helm/benchmark/scenarios/truthful_qa_scenario.py +2 -1
helm/benchmark/scenarios/vision_language/msr_vtt_scenario.py +75 -0
helm/benchmark/server.py +2 -1
helm/benchmark/slurm_jobs.py +1 -2
helm/benchmark/slurm_runner.py +8 -1
helm/benchmark/static/schema_arabic.yaml +228 -0
helm/benchmark/static/schema_audio.yaml +60 -49
helm/benchmark/static/schema_classic.yaml +0 -17
helm/benchmark/static/schema_enterprise.yaml +21 -0
helm/benchmark/static/schema_long_context.yaml +81 -20
helm/benchmark/static/schema_medhelm.yaml +272 -213
helm/benchmark/static/schema_melt.yaml +1257 -0
helm/benchmark/static/schema_slphelm.yaml +162 -0
helm/benchmark/static/schema_vhelm.yaml +26 -26
helm/benchmark/static/schema_video.yaml +219 -0
helm/benchmark/static_build/assets/index-b9779128.css +1 -0
helm/benchmark/static_build/assets/index-e439d5e1.js +10 -0
helm/benchmark/static_build/assets/medhelm-overview-eac29843.png +0 -0
helm/benchmark/static_build/assets/{tremor-9cefc3c5.js → tremor-38a10867.js} +1 -1
helm/benchmark/static_build/index.html +4 -4
helm/benchmark/window_services/encoder_decoder_window_service.py +3 -3
helm/benchmark/window_services/image_generation/clip_window_service.py +1 -3
helm/benchmark/window_services/test_utils.py +3 -4
helm/benchmark/window_services/tokenizer_service.py +7 -8
helm/clients/anthropic_client.py +69 -29
helm/clients/audio_language/diva_llama_client.py +4 -2
helm/clients/audio_language/qwen2_5_omni_client.py +209 -0
helm/clients/audio_language/qwen2_audiolm_client.py +8 -6
helm/clients/audio_language/qwen_audiolm_client.py +4 -2
helm/clients/audio_language/test.py +62 -0
helm/clients/bedrock_client.py +3 -1
helm/clients/client.py +7 -7
helm/clients/grok_client.py +36 -0
helm/clients/huggingface_client.py +42 -3
helm/clients/huggingface_pipeline_client.py +138 -0
helm/clients/image_generation/dalle_mini/model/configuration.py +1 -1
helm/clients/image_generation/dalle_mini/model/modeling.py +1 -1
helm/clients/image_generation/dalle_mini/model/processor.py +1 -1
helm/clients/image_generation/dalle_mini/model/tokenizer.py +1 -1
helm/clients/openai_client.py +102 -55
helm/clients/openai_responses_client.py +176 -0
helm/clients/palmyra_client.py +2 -5
helm/clients/reka_client.py +2 -2
helm/clients/test_huggingface_client.py +3 -3
helm/clients/together_client.py +31 -6
helm/clients/vertexai_client.py +17 -9
helm/clients/vision_language/huggingface_vision2seq_client.py +6 -4
helm/clients/vision_language/huggingface_vlm_client.py +2 -2
helm/clients/vision_language/idefics_client.py +6 -2
helm/clients/vision_language/paligemma_client.py +2 -2
helm/clients/vision_language/qwen2_vlm_client.py +66 -53
helm/clients/vision_language/qwen_vlm_client.py +7 -5
helm/clients/vllm_client.py +43 -7
helm/clients/vllm_granite_thinking_client.py +56 -0
helm/clients/writer_client.py +102 -0
helm/common/context.py +80 -0
helm/common/credentials_utils.py +5 -5
helm/common/critique_request.py +0 -1
helm/common/general.py +9 -2
helm/common/hierarchical_logger.py +104 -12
helm/common/local_context.py +140 -0
helm/common/object_spec.py +23 -8
helm/common/remote_context.py +61 -0
helm/common/request.py +8 -0
helm/common/test_logging.py +94 -0
helm/config/model_deployments.yaml +995 -45
helm/config/model_metadata.yaml +780 -59
helm/config/tokenizer_configs.yaml +224 -3
helm/proxy/cli.py +4 -2
helm/proxy/critique/mechanical_turk_utils.py +1 -1
helm/proxy/retry.py +5 -0
helm/proxy/services/server_service.py +21 -85
helm/tokenizers/grok_tokenizer.py +55 -0
helm/tokenizers/huggingface_tokenizer.py +1 -1
helm/tokenizers/test_grok_tokenizer.py +33 -0
helm/benchmark/metrics/numeracy_metrics.py +0 -72
helm/benchmark/metrics/test_numeracy_metrics.py +0 -95
helm/benchmark/scenarios/numeracy_scenario.py +0 -793
helm/benchmark/scenarios/test_infinite_bench_sum_scenario.py +0 -46
helm/benchmark/static_build/assets/index-262903c1.js +0 -10
helm/benchmark/static_build/assets/index-42060d71.css +0 -1
{crfm_helm-0.5.5.dist-info → crfm_helm-0.5.7.dist-info}/entry_points.txt +0 -0
{crfm_helm-0.5.5.dist-info → crfm_helm-0.5.7.dist-info}/licenses/LICENSE +0 -0
{crfm_helm-0.5.5.dist-info → crfm_helm-0.5.7.dist-info}/top_level.txt +0 -0
/helm/benchmark/static_build/assets/{medhelm-overview-3ddfcd65.png → medhelm-v1-overview-3ddfcd65.png} +0 -0

helm/config/model_metadata.yaml CHANGED Viewed

@@ -276,6 +276,14 @@ models:
   # Amazon Nova models
   # References for Amazon Nova models:
   # https://aws.amazon.com/ai/generative-ai/nova/
+  - name: amazon/nova-premier-v1:0
+    display_name: Amazon Nova Premier
+    description: Amazon Nova Premier is the most capable model in the Nova family of foundation models. ([blog](https://aws.amazon.com/blogs/aws/amazon-nova-premier-our-most-capable-model-for-complex-tasks-and-teacher-for-model-distillation/))
+    creator_organization_name: Amazon
+    access: limited
+    release_date: 2025-04-30
+    tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
   - name: amazon/nova-pro-v1:0
     display_name: Amazon Nova Pro
     description: Amazon Nova Pro Model
@@ -507,6 +515,46 @@ models:
     release_date: 2025-02-24
     tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: anthropic/claude-3-7-sonnet-20250219-thinking-10k
+    display_name: Claude 3.7 Sonnet (20250219, extended thinking)
+    description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)). Extended thinking is enabled with 10k budget tokens.
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2025-02-24
+    tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: anthropic/claude-sonnet-4-20250514
+    display_name: Claude 4 Sonnet (20250514)
+    description: Claude 4 Sonnet is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)).
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2025-05-14
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: anthropic/claude-sonnet-4-20250514-thinking-10k
+    display_name: Claude 4 Sonnet (20250514, extended thinking)
+    description: Claude 4 Sonnet is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)). Extended thinking is enabled with 10k budget tokens.
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2025-05-14
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: anthropic/claude-opus-4-20250514
+    display_name: Claude 4 Opus (20250514)
+    description: Claude 4 Opus is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)).
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2025-05-14
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: anthropic/claude-opus-4-20250514-thinking-10k
+    display_name: Claude 4 Opus (20250514, extended thinking)
+    description: Claude 4 Opus is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)). Extended thinking is enabled with 10k budget tokens.
+    creator_organization_name: Anthropic
+    access: limited
+    release_date: 2025-05-14
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: anthropic/stanford-online-all-v4-s3
     display_name: Anthropic-LM v4-s3 (52B)
     description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
@@ -868,7 +916,7 @@ models:
     # NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
     num_parameters: 685000000000
     release_date: 2025-01-20
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: deepseek-ai/deepseek-r1-hide-reasoning
     display_name: DeepSeek R1 (hide reasoning)
@@ -878,8 +926,35 @@ models:
     # NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
     num_parameters: 685000000000
     release_date: 2025-01-20
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: deepseek-ai/deepseek-r1-0528
+    display_name: DeepSeek-R1-0528
+    description: DeepSeek-R1-0528 is a minor version upgrade from DeepSeek R1 that has improved its depth of reasoning and inference capabilities by leveraging increased computational resources and introducing algorithmic optimization mechanisms during post-training. ([paper](https://arxiv.org/abs/2501.12948))
+    creator_organization_name: DeepSeek
+    access: open
+    num_parameters: 685000000000
+    release_date: 2025-05-28
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+    display_name: DeepSeek-R1-Distill-Llama-8b
+    description: DeepSeek-R1-Distill-Llama-8b is a model that is distilled from LLaMA 8B model for the DeepSeek-R1 task.
+    creator_organization_name: DeepSeek
+    access: open
+    num_parameters: 8000000000
+    release_date: 2025-01-20
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: deepseek-ai/deepseek-coder-6.7b-instruct
+    display_name: DeepSeek-Coder-6.7b-Instruct
+    description: DeepSeek-Coder-6.7b-Instruct is a model that is fine-tuned from the LLaMA 6.7B model for the DeepSeek-Coder task.
+    creator_organization_name: DeepSeek
+    access: open
+    num_parameters: 6740000000
+    release_date: 2025-01-20
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   # EleutherAI
   - name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together.
     display_name: GPT-J (6B)
@@ -1146,6 +1221,14 @@ models:
     release_date: 2025-02-05
     tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/gemini-2.0-flash-lite-001
+    display_name: Gemini 2.0 Flash Lite
+    description: Gemini 2.0 Flash Lite ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-03-25
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: google/gemini-2.0-flash-thinking-exp-01-21
     display_name: Gemini 2.0 Flash Thinking (01-21 preview)
     description: Gemini 2.0 Flash Thinking (01-21 preview) ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/thinking))
@@ -1162,6 +1245,70 @@ models:
     release_date: 2025-02-05
     tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/gemini-2.5-flash-lite-preview-06-17
+    display_name: Gemini 2.5 Flash-Lite (06-17 preview)
+    description: Gemini 2.5 Flash-Lite (06-17 preview) ([blog](https://blog.google/products/gemini/gemini-2-5-model-family-expands/))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-06-17
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/gemini-2.5-flash-preview-04-17
+    display_name: Gemini 2.5 Flash (04-17 preview)
+    description: Gemini 2.5 Flash (04-17 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-04-17
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/gemini-2.5-flash-preview-05-20
+    display_name: Gemini 2.5 Flash (05-20 preview)
+    description: Gemini 2.5 Flash (05-20 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-04-17
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/gemini-2.5-flash
+    display_name: Gemini 2.5 Flash
+    description: Gemini 2.5 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-06-17
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/gemini-2.5-pro-exp-03-25
+    display_name: Gemini 2.5 Pro (03-25 experimental)
+    description: Gemini 2.5 Pro (03-25 experimental) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-03-25
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/gemini-2.5-pro-preview-03-25
+    display_name: Gemini 2.5 Pro (03-25 preview)
+    description: Gemini 2.5 Pro (03-25 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-04-09  # source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/gemini-2.5-pro-preview-05-06
+    display_name: Gemini 2.5 Pro (05-06 preview)
+    description: Gemini 2.5 Pro (05-06 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-05-06  # source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/gemini-2.5-pro
+    display_name: Gemini 2.5 Pro
+    description: Gemini 2.5 Pro ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2025-06-17
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: google/gemma-2b
     display_name: Gemma (2B)
     description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
@@ -1360,6 +1507,60 @@ models:
     release_date: 2023-08-22
     tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, IDEFICS_INSTRUCT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+  - name: huggingface/smollm2-135m
+    display_name: SmolLM2 (135M)
+    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
+    creator_organization_name: HuggingFace
+    access: open
+    num_parameters: 135000000
+    release_date: 2024-10-31
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: huggingface/smollm2-360m
+    display_name: SmolLM2 (360M)
+    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
+    creator_organization_name: HuggingFace
+    access: open
+    num_parameters: 362000000
+    release_date: 2024-10-31
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: huggingface/smollm2-1.7b
+    display_name: SmolLM2 (1.7B)
+    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
+    creator_organization_name: HuggingFace
+    access: open
+    num_parameters: 1710000000
+    release_date: 2024-10-31
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: huggingface/smollm2-135m-instruct
+    display_name: SmolLM2 Instruct (135M)
+    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
+    creator_organization_name: HuggingFace
+    access: open
+    num_parameters: 135000000
+    release_date: 2024-10-31
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: huggingface/smollm2-360m-instruct
+    display_name: SmolLM2 Instruct (360M)
+    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
+    creator_organization_name: HuggingFace
+    access: open
+    num_parameters: 362000000
+    release_date: 2024-10-31
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: huggingface/smollm2-1.7b-instruct
+    display_name: SmolLM2 Instruct (1.7B)
+    description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
+    creator_organization_name: HuggingFace
+    access: open
+    num_parameters: 1710000000
+    release_date: 2024-10-31
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   ## Text-to-Image Diffusion Models
   - name: huggingface/dreamlike-diffusion-v1-0
     display_name: Dreamlike Diffusion v1.0 (1B)
@@ -1573,6 +1774,16 @@ models:
     release_date: 2023-06-22
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  # Marin Community
+  - name: marin-community/marin-8b-instruct
+    display_name: Marin 8B Instruct
+    description: Marin 8B Instruct is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT.
+    creator_organization_name: Marin Community
+    access: open
+    num_parameters: 8030000000
+    release_date: 2025-05-15
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   # Meta
   - name: meta/opt-iml-175b # NOT SUPPORTED
     display_name: OPT-IML (175B)
@@ -1875,6 +2086,24 @@ models:
     release_date: 2024-12-06
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: meta/llama-4-scout-17b-16e-instruct
+    display_name: Llama 4 Scout (17Bx16E) Instruct
+    description: Llama 4 Scout (17Bx16E) Instruct is part of the Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences using a mixture-of-experts architecture. ([blog](https://ai.meta.com/blog/llama-4-multimodal-intelligence/))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 109000000000
+    release_date: 2025-04-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: meta/llama-4-maverick-17b-128e-instruct-fp8
+    display_name: Llama 4 Maverick (17Bx128E) Instruct FP8
+    description: Llama 4 Maverick (17Bx128E) Instruct FP8 is part of the Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences using a mixture-of-experts architecture. ([blog](https://ai.meta.com/blog/llama-4-multimodal-intelligence/))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 402000000000
+    release_date: 2025-04-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: meta/llama-3-8b-chat
     display_name: Llama 3 Instruct (8B)
     description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
@@ -2150,6 +2379,42 @@ models:
     release_date: 2024-04-17
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: allenai/olmo-2-1124-7b-instruct
+    display_name: OLMo 2 7B Instruct November 2024
+    description: OLMo 2 is a family of 7B and 13B models trained on up to 5T tokens. ([blog](https://allenai.org/blog/olmo2))
+    creator_organization_name: Allen Institute for AI
+    access: open
+    num_parameters: 7300000000
+    release_date: 2024-11-26
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: allenai/olmo-2-1124-13b-instruct
+    display_name: OLMo 2 13B Instruct November 2024
+    description: OLMo 2 is a family of 7B and 13B models trained on up to 5T tokens. ([blog](https://allenai.org/blog/olmo2))
+    creator_organization_name: Allen Institute for AI
+    access: open
+    num_parameters: 13700000000
+    release_date: 2024-11-26
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: allenai/olmo-2-0325-32b-instruct
+    display_name: OLMo 2 32B Instruct March 2025
+    description: OLMo 2 32B Instruct March 2025 is trained up to 6T tokens and post-trained using Tulu 3.1. ([blog](https://allenai.org/blog/olmo2-32B))
+    creator_organization_name: Allen Institute for AI
+    access: open
+    num_parameters: 32200000000
+    release_date: 2025-03-13
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: allenai/olmoe-1b-7b-0125-instruct
+    display_name: OLMoE 1B-7B Instruct January 2025
+    description: OLMoE 1B-7B Instruct January 2025 is a fully open language model leveraging sparse Mixture-of-Experts (MoE). It has 7B parameters but uses only 1B per input token. It was pretrained on 5T tokens. ([blog](https://allenai.org/blog/olmoe-an-open-small-and-state-of-the-art-mixture-of-experts-model-c258432d0514), [paper](https://arxiv.org/abs/2409.02060))
+    creator_organization_name: Allen Institute for AI
+    access: open
+    num_parameters: 32200000000
+    release_date: 2025-03-13
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   # Mistral AI
   - name: mistralai/mistral-7b-v0.1
     display_name: Mistral v0.1 (7B)
@@ -2300,6 +2565,14 @@ models:
     release_date: 2023-12-11
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: mistralai/mistral-medium-2505
+    display_name: Mistral Medium 3 (2505)
+    description: Mistral Medium 3 (2505) is a language model that is intended to to deliver state-of-the-art performance at lower cost. ([blog](https://mistral.ai/news/mistral-medium-3))
+    creator_organization_name: Mistral AI
+    access: limited
+    release_date: 2025-05-07
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: mistralai/mistral-large-2402
     display_name: Mistral Large (2402)
     description: Mistral Large is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
@@ -2351,6 +2624,15 @@ models:
     release_date: 2024-11-18
     tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  # Moonshot AI
+  - name: moonshotai/kimi-k2-instruct
+    display_name: Kimi K2 Instruct
+    description:  Kimi K2 Instruct is a mixture-of-experts (MoE) language model with 32 billion activated parameters and 1 trillion total parameters trained with the Muon optimizer on 15.5T tokens. ([blog](https://moonshotai.github.io/Kimi-K2/))
+    creator_organization_name: Moonshot AI
+    access: open
+    num_parameters: 1029173256720
+    release_date: 2024-07-14  # Blog post has no date, so use the date from this news article https://www.cnbc.com/2025/07/14/alibaba-backed-moonshot-releases-kimi-k2-ai-rivaling-chatgpt-claude.html
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   # MosaicML
   - name: mosaicml/mpt-7b
@@ -2746,6 +3028,30 @@ models:
     release_date: 2024-07-18
     tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: openai/gpt-4.1-2025-04-14
+    display_name: GPT-4.1 (2025-04-14)
+    description: GPT-4.1 (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-14
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: openai/gpt-4.1-mini-2025-04-14
+    display_name: GPT-4.1 mini (2025-04-14)
+    description: GPT-4.1 mini (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-14
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: openai/gpt-4.1-nano-2025-04-14
+    display_name: GPT-4.1 nano (2025-04-14)
+    description: GPT-4.1 nano (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-14
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: openai/whisper-1_gpt-4o-2024-11-20
     display_name: Whisper-1 + GPT-4o (2024-11-20)
     description: Transcribes the text with Whisper-1 and then uses GPT-4o to generate a response.
@@ -2754,6 +3060,22 @@ models:
     release_date: 2024-11-20
     tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
+  - name: openai/gpt-4o-transcribe_gpt-4o-2024-11-20
+    display_name: GPT-4o Transcribe + GPT-4o (2024-11-20)
+    description: Transcribes the text with GPT-4o Transcribe and then uses GPT-4o to generate a response.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-03-20
+    tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
+  - name: openai/gpt-4o-mini-transcribe_gpt-4o-2024-11-20
+    display_name: GPT-4o mini Transcribe + GPT-4o (2024-11-20)
+    description: Transcribes the text with GPT-4o mini Transcribe and then uses GPT-4o to generate a response.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-03-20
+    tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
   - name: openai/gpt-4o-audio-preview-2024-10-01
     display_name: GPT-4o Audio (Preview 2024-10-01)
     description: GPT-4o Audio (Preview 2024-10-01) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
@@ -2807,6 +3129,30 @@ models:
     tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   ## o1 Models
+  - name: openai/o1-pro-2025-03-19
+    display_name: o1 pro (2025-03-19)
+    description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-03-19
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: openai/o1-pro-2025-03-19-low-reasoning-effort
+    display_name: o1 pro (2025-03-19, low reasoning effort)
+    description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to low.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-03-19
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: openai/o1-pro-2025-03-19-high-reasoning-effort
+    display_name: o1 pro (2025-03-19, high reasoning effort)
+    description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to high.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-03-19
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: openai/o1-2024-12-17
     display_name: o1 (2024-12-17)
     description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
@@ -2871,6 +3217,62 @@ models:
     release_date: 2025-01-31
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: openai/o3-2025-04-16
+    display_name: o3 (2025-04-16)
+    description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-16
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: openai/o3-2025-04-16-low-reasoning-effort
+    display_name: o3 (2025-04-16, low reasoning effort)
+    description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-16
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: openai/o3-2025-04-16-high-reasoning-effort
+    display_name: o3 (2025-04-16, high reasoning effort)
+    description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-16
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: openai/o4-mini-2025-04-16
+    display_name: o4-mini (2025-04-16)
+    description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-16
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: openai/o4-mini-2025-04-16-low-reasoning-effort
+    display_name: o4-mini (2025-04-16, low reasoning effort)
+    description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-16
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: openai/o4-mini-2025-04-16-high-reasoning-effort
+    display_name: o4-mini (2025-04-16, high reasoning effort)
+    description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-04-16
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: openai/o3-pro-2025-06-10-high-reasoning-effort
+    display_name: o3-pro (2025-06-10, high reasoning effort)
+    description: o3-pro is an o-series model designed to think longer and provide the most reliable responses. ([blog post](https://help.openai.com/en/articles/9624314-model-release-notes))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2025-06-10
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   ## Codex Models
   # DEPRECATED: Codex models have been shut down on March 23 2023.
@@ -3139,6 +3541,14 @@ models:
     release_date: 2024-09-19
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: qwen/qwen3-235b-a22b-fp8-tput
+    display_name: Qwen3 235B A22B FP8 Throughput
+    description: Qwen3 235B A22B FP8 Throughput is a hybrid instruct and reasoning mixture-of-experts model ([blog](https://qwenlm.github.io/blog/qwen3/)).
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2025-04-29
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: qwen/qwq-32b-preview
     display_name: QwQ (32B Preview)
     description: QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities. ([blog post](https://qwenlm.github.io/blog/qwq-32b-preview/)).
@@ -3180,6 +3590,38 @@ models:
     release_date: 2024-08-29
     tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+  - name: qwen/qwen2.5-vl-3b-instruct
+    display_name: Qwen2.5-VL Instruct (3B)
+    description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
+    creator_organization_name: Alibaba Group
+    access: open
+    release_date: 2025-01-26
+    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+  - name: qwen/qwen2.5-vl-7b-instruct
+    display_name: Qwen2.5-VL Instruct (7B)
+    description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
+    creator_organization_name: Alibaba Group
+    access: open
+    release_date: 2025-01-26
+    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+  - name: qwen/qwen2.5-vl-32b-instruct
+    display_name: Qwen2.5-VL Instruct (32B)
+    description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
+    creator_organization_name: Alibaba Group
+    access: open
+    release_date: 2025-01-26
+    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+  - name: qwen/qwen2.5-vl-72b-instruct
+    display_name: Qwen2.5-VL Instruct (72B)
+    description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
+    creator_organization_name: Alibaba Group
+    access: open
+    release_date: 2025-01-26
+    tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
   - name: qwen/qwen-audio-chat
     display_name: Qwen-Audio Chat
     description: Auditory multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2311.07919)).
@@ -3196,6 +3638,14 @@ models:
     release_date: 2024-07-15
     tags: [AUDIO_LANGUAGE_MODEL_TAG]
+  - name: qwen/qwen2.5-omni-7b
+    display_name: Qwen2.5-Omni (7B)
+    description: The new flagship end-to-end multimodal model in the Qwen series that can process inputs including text, images, audio, and video ([paper](https://arxiv.org/abs/2503.20215)).
+    creator_organization_name: Alibaba Cloud
+    access: open
+    release_date: 2025-03-27
+    tags: [AUDIO_LANGUAGE_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
   # SAIL (Sea AI Lab)
   - name: sail/sailor-7b
     display_name: Sailor (7B)
@@ -3670,6 +4120,14 @@ models:
     release_date: 2024-09-12
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: writer/palmyra-x5
+    display_name: Palmyra X5
+    description: Palmyra X5 is a language model for enterprise that uses a Mixture of Experts (MoE) architecture and a hybrid attention mechanism that blends linear and softmax attention. ([blog](https://writer.com/engineering/long-context-palmyra-x5/))
+    creator_organization_name: Writer
+    access: limited
+    release_date: 2024-04-28
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: writer/palmyra-med-32k
     display_name: Palmyra-Med 32K (70B)
     description: Palmyra-Med 32K (70B) is a model finetuned from Palmyra-X-003 intended for medical applications.
@@ -3680,11 +4138,10 @@ models:
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: writer/palmyra-med
-    display_name: Palmyra-Med (70B)
-    description: Palmyra-Med (70B) is a model finetuned from Palmyra-X-003 intended for medical applications.
+    display_name: Palmyra Med
+    description: Palmyra Med is a model intended for medical applications.
     creator_organization_name: Writer
     access: open
-    num_parameters: 70600000000
     release_date: 2024-07-31
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
@@ -3707,12 +4164,28 @@ models:
   # xAI
-  - name: xai/grok-beta
-    display_name: Grok Beta
-    description: Grok Beta is a model from xAI.
+  - name: xai/grok-3-beta
+    display_name: Grok 3 Beta
+    description: Grok 3 Beta is a model trained on xAI's Colossus supercluster with significant improvements in reasoning, mathematics, coding, world knowledge, and instruction-following tasks. ([blog](https://x.ai/news/grok-3))
     creator_organization_name: xAI
-    access: closed
-    release_date: 2024-08-13
+    access: limited
+    release_date: 2025-04-03  # https://docs.x.ai/docs/release-notes#april-2025
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: xai/grok-3-mini-beta
+    display_name: Grok 3 mini Beta
+    description: Grok 3 mini Beta is a model trained on xAI's Colossus supercluster with significant improvements in reasoning, mathematics, coding, world knowledge, and instruction-following tasks. ([blog](https://x.ai/news/grok-3))
+    creator_organization_name: xAI
+    access: limited
+    release_date: 2025-04-03  # https://docs.x.ai/docs/release-notes#april-2025
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: xai/grok-4-0709
+    display_name: Grok 4 (0709)
+    description: Grok 4 (0709) is a model that includes native tool use and real-time search integration. ([blog](https://x.ai/news/grok-4))
+    creator_organization_name: xAI
+    access: limited
+    release_date: 2025-07-09
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   # Yandex
@@ -3807,6 +4280,42 @@ models:
     release_date: 2024-09-10
     tags: [AUDIO_LANGUAGE_MODEL_TAG]
+# Maritaca AI
+  - name: maritaca-ai/sabia-7b
+    display_name: Sabia 7B
+    description: Sabia 7B
+    creator_organization_name: MARITACA-AI
+    access: open
+    num_parameters: 6740000000
+    release_date: 2023-11-08
+    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: maritaca-ai/sabiazinho-3
+    display_name: Sabiazinho 3
+    description: Sabiazinho-3 is a decoder-only language model designed for Portuguese text generation and understanding tasks. It supports a long context window of up to 128,000 tokens and is offered via API with scalable rate limits. The model is trained on diverse Portuguese corpora with knowledge up to july 2023.
+    creator_organization_name: Maritaca AI
+    access: limited
+    release_date: 2025-02-06
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: maritaca-ai/sabia-3
+    display_name: Sabía 3
+    description: Sabiá-3 is a decoder-only language model designed for Portuguese text generation and understanding tasks. It supports a long context window of up to 128,000 tokens and is offered via API with scalable rate limits. The model is trained on diverse Portuguese corpora with knowledge up to july 2023.
+    creator_organization_name: Maritaca AI
+    access: limited
+    release_date: 2024-12-11
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: maritaca-ai/sabia-3.1-2025-05-08
+    display_name: Sabía 3.1
+    description: Sabiá-3.1 is a decoder-only language model designed for Portuguese text generation and understanding tasks. It supports a long context window of up to 128,000 tokens and is offered via API with scalable rate limits. The model is trained on diverse Portuguese corpora with knowledge up to August 2024.
+    creator_organization_name: Maritaca AI
+    access: limited
+    release_date: 2025-05-08
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
 # Granite - IBM
 # https://www.ibm.com/granite
 # https://github.com/ibm-granite/granite-3.0-language-models
@@ -3882,17 +4391,7 @@ models:
     num_parameters: 1380000000
     release: 2024-10-21
     tags: [TEXT_MODEL_TAG]
-  - name: maritaca-ai/sabia-7b
-    display_name: Sabia 7B
-    description: Sabia 7B
-    creator_organization_name: MARITACA-AI
-    access: open
-    num_parameters: 6740000000
-    release_date: 2023-11-08
-    tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-# Granite-3.1-8b-base
   - name: ibm-granite/granite-3.1-8b-base
     display_name: Granite 3.1 - 8B - Base
     description: Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
@@ -3902,7 +4401,6 @@ models:
     release_date: 2024-12-18
     tags: [TEXT_MODEL_TAG]
-# Granite-3.1-8b-instruct
   - name: ibm-granite/granite-3.1-8b-instruct
     display_name: Granite 3.1 - 8B - Instruct
     description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
@@ -3912,7 +4410,6 @@ models:
     release_date: 2024-12-18
     tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-# Granite-3.1-2b-instruct
   - name: ibm-granite/granite-3.1-2b-instruct
     display_name: Granite 3.1 - 2B - Instruct
     description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
@@ -3922,7 +4419,6 @@ models:
     release_date: 2024-12-18
     tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-# Granite-3.1-2b-base
   - name: ibm-granite/granite-3.1-2b-base
     display_name: Granite 3.1 - 2B - Base
     description: Granite-3.1-2B-Base extends the context length of Granite-3.0-2B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
@@ -3932,7 +4428,6 @@ models:
     release_date: 2024-12-18
     tags: [TEXT_MODEL_TAG]
-# Granite-3.1-3b-a800m-instruct
   - name: ibm-granite/granite-3.1-3b-a800m-instruct
     display_name: Granite 3.1 - 3B - A800M - Instruct
     description: Granite-3.1-3B-A800M-Instruct is a 3B parameter long-context instruct model finetuned from Granite-3.1-3B-A800M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
@@ -3942,7 +4437,6 @@ models:
     release_date: 2024-12-18
     tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-# Granite-3.1-3b-a800m-base
   - name: ibm-granite/granite-3.1-3b-a800m-base
     display_name: Granite 3.1 - 3B - A800M - Base
     description: Granite-3.1-3B-A800M-Base extends the context length of Granite-3.0-3B-A800M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
@@ -3952,7 +4446,6 @@ models:
     release_date: 2024-12-18
     tags: [TEXT_MODEL_TAG]
-# Granite-3.1-1b-a400m-instruct
   - name: ibm-granite/granite-3.1-1b-a400m-instruct
     display_name: Granite 3.1 - 1B - A400M - Instruct
     description: Granite-3.1-1B-A400M-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
@@ -3962,7 +4455,6 @@ models:
     release_date: 2024-12-18
     tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-# Granite-3.1-1b-a400m-base
   - name: ibm-granite/granite-3.1-1b-a400m-base
     display_name: Granite 3.1 - 1B - A400M - Base
     description: Granite-3.1-1B-A400M-Base extends the context length of Granite-3.0-1B-A400M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
@@ -3972,27 +4464,6 @@ models:
     release_date: 2024-12-18
     tags: [TEXT_MODEL_TAG]
-# DeepSeek-R1-Distill-Llama-3.1-8b
-  - name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
-    display_name: DeepSeek-R1-Distill-Llama-8b
-    description: DeepSeek-R1-Distill-Llama-8b is a model that is distilled from LLaMA 8B model for the DeepSeek-R1 task.
-    creator_organization_name: DeepSeek
-    access: open
-    num_parameters: 8000000000
-    release_date: 2025-01-20
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-# deepseek-ai/deepseek-coder-6.7b-instruct
-  - name: deepseek-ai/deepseek-coder-6.7b-instruct
-    display_name: DeepSeek-Coder-6.7b-Instruct
-    description: DeepSeek-Coder-6.7b-Instruct is a model that is fine-tuned from the LLaMA 6.7B model for the DeepSeek-Coder task.
-    creator_organization_name: DeepSeek
-    access: open
-    num_parameters: 6740000000
-    release_date: 2025-01-20
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
-# WatsonX - IBM
   - name: ibm/granite-13b-instruct-v2
     display_name: Granite 13b instruct v2
     description: Granite Base (13B) Instruct V2.0 is a large decoder-only transformer model.The following features were used in the design of the model Decoder-only model
@@ -4039,11 +4510,6 @@ models:
     release: 2024-6-18
     tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
   - name: ibm/granite-3.1-8b-instruct
     display_name: Granite 3.1 - 8B - Instruct
     description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
@@ -4062,10 +4528,265 @@ models:
     release_date: 2024-12-18
     tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
-  - name: mistralai/mixtral-8x7b-instruct-v0:1
-    display_name: Mixtral 8x7B Instruct on IBM WatsonX
-    description: A 7B sparse Mixture-of-Experts model with stronger capabilities than Mistral 7B. Uses 12B active parameters out of 45B total. Supports multiple languages, code and 32k context window.
-    creator_organization_name: Mistral
-    access: limited
-    release_date: 2023-12-11
+  - name: ibm/granite-3.3-8b-instruct
+    display_name: IBM Granite 3.3 8B Instruct
+    description: IBM Granite 3.3 8B Instruct is a 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities. ([model card](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct))
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 8170000000
+    release_date: 2025-04-16
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: ibm/granite-3.3-8b-instruct-with-guardian
+    display_name: IBM Granite 3.3 8B Instruct (with guardian)
+    description: IBM Granite 3.3 8B Instruct is a 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities. ([model card](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct)) This model was run with an additional safety filter using [Granite Guardian 3.2](https://www.ibm.com/granite/docs/models/guardian/).
+    creator_organization_name: IBM
+    access: open
+    num_parameters: 8170000000
+    release_date: 2025-04-16
+    # Unfortunately this setup is not easily reproducible, so we mark it with DEPRECATED_MODEL_TAG
+    tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: ura-hcmut/ura-llama-2.1-8b
+    display_name: URA-Llama 2.1 (8B)
+    description: URA-Llama 2.1 (8B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: URA
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-08-04
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: ura-hcmut/ura-llama-2-8b
+    display_name: URA-Llama 2 (8B)
+    description: URA-Llama 2 (8B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: URA
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-08-04
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: ura-hcmut/ura-llama-7b
+    display_name: URA-Llama 7B (7B)
+    description: URA-Llama 7B (7B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: URA
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-10-10
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: ura-hcmut/ura-llama-13b
+    display_name: URA-Llama 13B (13B)
+    description: URA-Llama 13B (13B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: URA
+    access: open
+    num_parameters: 13000000000
+    release_date: 2023-10-10
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: ura-hcmut/ura-llama-70b
+    display_name: URA-Llama 70B (70B)
+    description: URA-Llama 70B (70B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: URA
+    access: open
+    num_parameters: 70000000000
+    release_date: 2023-10-10
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: ura-hcmut/GemSUra-7B
+    display_name: GemSUra 7B
+    description: GemSUra 7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: URA
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-03-12
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: ura-hcmut/GemSUra-2B
+    display_name: GemSUra 2B
+    description: GemSUra 2B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: URA
+    access: open
+    num_parameters: 2000000000
+    release_date: 2024-03-12
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: ura-hcmut/MixSUra
+    display_name: MixSUra
+    description: MixSUra is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text. It is a mixture of experts model with 8 active experts.
+    creator_organization_name: URA
+    access: open
+    num_parameters: 46700000000
+    release_date: 2024-03-12
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: vilm/vinallama-7b-chat
+    display_name: VinaLLaMa
+    description: VinaLLaMa is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-03-12
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: vilm/vinallama-2.7b-chat
+    display_name: VinaLLaMa 2.7B
+    description: VinaLLaMa 2.7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 2700000000
+    release_date: 2024-03-12
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: vilm/vietcuna-7b-v3
+    display_name: VietCuna 7B (v3)
+    description: VietCuna 7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-08-07
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: vilm/vietcuna-3b-v2
+    display_name: VietCuna 3B (v2)
+    description: VietCuna 3B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 3000000000
+    release_date: 2023-07-15
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: vilm/Quyen-v0.1
+    display_name: Quyen (v0.1)
+    description: Quyen is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 4000000000
+    release_date: 2024-02-26
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: vilm/Quyen-Plus-v0.1
+    display_name: Quyen Plus (v0.1)
+    description: Quyen Plus is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-02-26
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: vilm/Quyen-Pro-v0.1
+    display_name: Quyen Pro (v0.1)
+    description: Quyen Pro is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 14000000000
+    release_date: 2024-02-26
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: vilm/Quyen-Pro-Max-v0.1
+    display_name: Quyen Pro Max (v0.1)
+    description: Quyen Pro Max is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 72000000000
+    release_date: 2024-02-26
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: vilm/Quyen-Mini-v0.1
+    display_name: Quyen Mini (v0.1)
+    description: Quyen Mini is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 1800000000
+    release_date: 2024-02-26
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: vilm/Quyen-SE-v0.1
+    display_name: Quyen SE (v0.1)
+    description: Quyen SE is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: ViLM
+    access: open
+    num_parameters: 500000000
+    release_date: 2024-02-26
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: Viet-Mistral/Vistral-7B-Chat
+    display_name: Vistral 7B Chat
+    description: Vistral 7B Chat is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: Viet-Mistral
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-02-28
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: vinai/PhoGPT-7B5-Instruct
+    display_name: PhoGPT 7B5 Instruct
+    description: PhoGPT 7B5 Instruct is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: VinAI
+    access: open
+    num_parameters: 7500000000
+    release_date: 2024-02-19
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: vinai/PhoGPT-4B-Chat
+    display_name: PhoGPT 4B Chat
+    description: PhoGPT 4B Chat is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
+    creator_organization_name: VinAI
+    access: open
+    num_parameters: 4000000000
+    release_date: 2024-04-02
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: CEIA-UFG/Gemma-3-Gaia-PT-BR-4b-it
+    display_name: Gemma-3 Gaia PT-BR 4b Instruct
+    description: Gemma-3 Gaia PT-BR 4b Instruct is a model trained by CEIA-UFG for understanding and generating Brazilian Portuguese text.
+    creator_organization_name: CEIA-UFG
+    access: open
+    num_parameters: 4000000000
+    release_date: 2025-06-01
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: recogna-nlp/bode-13b-alpaca-pt-br-no-peft
+    display_name: Bode 13B Alpaca PT-BR
+    description: Bode is a language model (LLM) for Portuguese, based on LLaMA 2 and fine-tuned with the Alpaca dataset translated into Portuguese. Suitable for instruction, text generation, translation and tasks in Portuguese.
+    creator_organization_name: Recogna NLP
+    access: open
+    num_parameters: 13000000000
+    release_date: 2024-01-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: 22h/cabrita_7b_pt_850000
+    display_name: Cabrita PT-BR 7B
+    description: Cabrita is an OpenLLaMA-based model, continuously trained in Portuguese (mC4-pt subset) for 850000 steps with efficient tokenization adapted to the language.
+    creator_organization_name: 22h
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-08-23
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: PORTULAN/gervasio-7b-portuguese-ptbr-decoder
+    display_name: Gervásio PT-BR/PT-PT 7B Decoder
+    description: Gervásio PT* is a 7B parameter decoder model, adapted from LLaMA27B, trained for both Brazilian and European Portuguese. Fine-tuned with translated data from benchmarks such as GLUE and SuperGLUE.
+    creator_organization_name: PORTULAN (University of Lisbon NLX)
+    access: open
+    num_parameters: 6740000000
+    release_date: 2024-02-29
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: TucanoBR/Tucano-2b4
+    display_name: Tucano PT-BR 2b4
+    description: Tucano is a series of decoder models based on LLaMA2, natively pre-trained in Portuguese using the GigaVerbo dataset (200B tokens), with the 2B model trained for 1.96M steps over 845h (515B tokens, 4 epochs).
+    creator_organization_name: TucanoBR (University of Bonn)
+    access: open
+    num_parameters: 2444618240
+    release_date: 2024-12-11
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: nicholasKluge/TeenyTinyLlama-460m
+    display_name: TeenyTinyLlama 460M PT-BR
+    description: TeenyTinyLlama-460m is a lightweight and efficient model based on LLaMA2, trained exclusively on Brazilian Portuguese. It uses RoPE embeddings and SwiGLU activations, with a refined SentencePiece tokenizer and a low-resource optimized architecture.
+    creator_organization_name: Nicholas Kluge.
+    access: open
+    num_parameters: 460000000
+    release_date: 2024-01-30
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]

crfm-helm 0.5.5__py3-none-any.whl → 0.5.7__py3-none-any.whl

Potentially problematic release.

crfm-helm 0.5.5py3-none-any.whl → 0.5.7py3-none-any.whl