crfm-helm 0.5.5__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.5.5.dist-info → crfm_helm-0.5.7.dist-info}/METADATA +74 -53
- {crfm_helm-0.5.5.dist-info → crfm_helm-0.5.7.dist-info}/RECORD +262 -182
- {crfm_helm-0.5.5.dist-info → crfm_helm-0.5.7.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +3 -3
- helm/benchmark/adaptation/adapters/test_adapter.py +4 -4
- helm/benchmark/annotation/air_bench_annotator.py +2 -2
- helm/benchmark/annotation/bigcodebench_annotator.py +3 -3
- helm/benchmark/annotation/bird_sql_annotator.py +2 -2
- helm/benchmark/annotation/chw_care_plan_annotator.py +7 -12
- helm/benchmark/annotation/ehr_sql_annotator.py +2 -2
- helm/benchmark/annotation/helpdesk_call_summarization_annotator.py +7 -7
- helm/benchmark/annotation/live_qa_annotator.py +1 -1
- helm/benchmark/annotation/mimic_bhc_annotator.py +100 -0
- helm/benchmark/annotation/model_as_judge.py +12 -16
- helm/benchmark/annotation/omni_math_annotator.py +13 -14
- helm/benchmark/annotation/wildbench_annotator.py +9 -9
- helm/benchmark/executor.py +11 -12
- helm/benchmark/metrics/aci_bench_metrics.py +9 -29
- helm/benchmark/metrics/bias_word_lists.py +1 -1
- helm/benchmark/metrics/chw_care_plan_metrics.py +10 -30
- helm/benchmark/metrics/classification_metrics.py +3 -3
- helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
- helm/benchmark/metrics/codeinsights_code_efficiency_metrics.py +186 -0
- helm/benchmark/metrics/codeinsights_code_evaluation_metrics.py +477 -0
- helm/benchmark/metrics/codeinsights_correct_code_metrics.py +366 -0
- helm/benchmark/metrics/codeinsights_edge_case_metrics.py +92 -0
- helm/benchmark/metrics/codeinsights_metric_specs.py +51 -0
- helm/benchmark/metrics/comet_metric.py +1 -1
- helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +2 -2
- helm/benchmark/metrics/copyright_metrics.py +1 -1
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +1 -1
- helm/benchmark/metrics/dischargeme_metrics.py +9 -29
- helm/benchmark/metrics/efficiency_metrics.py +3 -3
- helm/benchmark/metrics/evaluate_reference_metrics.py +1 -1
- helm/benchmark/metrics/gpt4_audio_refusal_metrics.py +145 -0
- helm/benchmark/metrics/ifeval_metrics.py +2 -2
- helm/benchmark/metrics/image_generation/clip_score_metrics.py +13 -2
- helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +1 -1
- helm/benchmark/metrics/kpi_edgar_metrics.py +121 -0
- helm/benchmark/metrics/llm_jury_metrics.py +46 -0
- helm/benchmark/metrics/lmkt_metric_specs.py +12 -0
- helm/benchmark/metrics/lmkt_metrics.py +47 -0
- helm/benchmark/metrics/med_dialog_metrics.py +9 -29
- helm/benchmark/metrics/medalign_metrics.py +9 -29
- helm/benchmark/metrics/medi_qa_metrics.py +9 -29
- helm/benchmark/metrics/medication_qa_metrics.py +10 -30
- helm/benchmark/metrics/melt_bias_metric.py +234 -0
- helm/benchmark/metrics/melt_bias_word_lists.py +1367 -0
- helm/benchmark/metrics/melt_metric_specs.py +43 -0
- helm/benchmark/metrics/melt_toxicity_metric.py +107 -0
- helm/benchmark/metrics/mental_health_metrics.py +9 -29
- helm/benchmark/metrics/metric_service.py +11 -11
- helm/benchmark/metrics/mimic_bhc_metrics.py +14 -0
- helm/benchmark/metrics/mimic_rrs_metrics.py +9 -29
- helm/benchmark/metrics/mtsamples_procedures_metrics.py +9 -29
- helm/benchmark/metrics/mtsamples_replicate_metrics.py +9 -29
- helm/benchmark/metrics/openai_mrcr_metrics.py +52 -0
- helm/benchmark/metrics/ruler_qa_metrics.py +34 -0
- helm/benchmark/metrics/starr_patient_instructions_metrics.py +9 -29
- helm/benchmark/metrics/summac/model_summac.py +2 -3
- helm/benchmark/metrics/summarization_metrics.py +2 -1
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +2 -2
- helm/benchmark/metrics/toxicity_metrics.py +2 -2
- helm/benchmark/metrics/unitxt_metrics.py +3 -4
- helm/benchmark/metrics/vision_language/emd_utils.py +4 -2
- helm/benchmark/metrics/vision_language/image_utils.py +2 -2
- helm/benchmark/model_deployment_registry.py +16 -26
- helm/benchmark/presentation/contamination.py +3 -3
- helm/benchmark/presentation/create_plots.py +43 -13
- helm/benchmark/presentation/run_display.py +13 -0
- helm/benchmark/presentation/schema.py +7 -1
- helm/benchmark/presentation/summarize.py +84 -61
- helm/benchmark/presentation/test_create_plots.py +4 -1
- helm/benchmark/reeval_run.py +3 -4
- helm/benchmark/reeval_runner.py +3 -3
- helm/benchmark/run.py +84 -73
- helm/benchmark/run_expander.py +12 -1
- helm/benchmark/run_spec_factory.py +7 -6
- helm/benchmark/run_specs/arabic_run_specs.py +73 -0
- helm/benchmark/run_specs/audio_run_specs.py +52 -8
- helm/benchmark/run_specs/bluex_run_specs.py +40 -0
- helm/benchmark/run_specs/classic_run_specs.py +0 -53
- helm/benchmark/run_specs/codeinsights_run_specs.py +192 -0
- helm/benchmark/run_specs/enterprise_run_specs.py +20 -0
- helm/benchmark/run_specs/experimental_run_specs.py +31 -1
- helm/benchmark/run_specs/healthqa_br_run_specs.py +40 -0
- helm/benchmark/run_specs/heim_run_specs.py +3 -1
- helm/benchmark/run_specs/lmkt_run_specs.py +144 -0
- helm/benchmark/run_specs/long_context_run_specs.py +114 -15
- helm/benchmark/run_specs/medhelm_run_specs.py +146 -41
- helm/benchmark/run_specs/melt_run_specs.py +783 -0
- helm/benchmark/run_specs/multilingual_run_specs.py +50 -0
- helm/benchmark/run_specs/speech_disorder_audio_run_specs.py +163 -0
- helm/benchmark/run_specs/vlm_run_specs.py +28 -0
- helm/benchmark/runner.py +5 -5
- helm/benchmark/scenarios/aci_bench_scenario.py +7 -1
- helm/benchmark/scenarios/alghafa_scenario.py +126 -0
- helm/benchmark/scenarios/arabic_mmlu_scenario.py +78 -0
- helm/benchmark/scenarios/aratrust_scenario.py +76 -0
- helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py +3 -1
- helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py +5 -5
- helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py +1 -1
- helm/benchmark/scenarios/audio_language/corebench_scenario.py +77 -0
- helm/benchmark/scenarios/audio_language/mustard_scenario.py +1 -1
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification_scenario.py +104 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_transcription_scenario.py +99 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py +118 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py +86 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py +117 -0
- helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py +15 -1
- helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py +1 -2
- helm/benchmark/scenarios/autobencher_capabilities_scenario.py +2 -2
- helm/benchmark/scenarios/bluex_scenario.py +66 -0
- helm/benchmark/scenarios/chw_care_plan_scenario.py +14 -13
- helm/benchmark/scenarios/clear_scenario.py +11 -7
- helm/benchmark/scenarios/cleva_scenario.py +1 -1
- helm/benchmark/scenarios/codeinsights_code_efficiency_scenario.py +197 -0
- helm/benchmark/scenarios/codeinsights_correct_code_scenario.py +78 -0
- helm/benchmark/scenarios/codeinsights_edge_case_scenario.py +192 -0
- helm/benchmark/scenarios/codeinsights_student_coding_scenario.py +162 -0
- helm/benchmark/scenarios/codeinsights_student_mistake_scenario.py +188 -0
- helm/benchmark/scenarios/dischargeme_scenario.py +36 -21
- helm/benchmark/scenarios/ehr_sql_scenario.py +7 -1
- helm/benchmark/scenarios/ehrshot_scenario.py +28 -55
- helm/benchmark/scenarios/exams_multilingual_scenario.py +115 -0
- helm/benchmark/scenarios/grammar.py +2 -2
- helm/benchmark/scenarios/headqa_scenario.py +6 -1
- helm/benchmark/scenarios/healthqa_br_scenario.py +80 -0
- helm/benchmark/scenarios/infinite_bench_en_mc_scenario.py +90 -0
- helm/benchmark/scenarios/infinite_bench_en_qa_scenario.py +85 -0
- helm/benchmark/scenarios/{infinite_bench_sum_scenario.py → infinite_bench_en_sum_scenario.py} +10 -13
- helm/benchmark/scenarios/kpi_edgar_scenario.py +151 -0
- helm/benchmark/scenarios/lmkt_scenarios.py +288 -0
- helm/benchmark/scenarios/math_scenario.py +21 -20
- helm/benchmark/scenarios/med_dialog_scenario.py +6 -1
- helm/benchmark/scenarios/medalign_scenario.py +9 -3
- helm/benchmark/scenarios/medalign_scenario_helper.py +27 -130
- helm/benchmark/scenarios/medbullets_scenario.py +7 -2
- helm/benchmark/scenarios/medcalc_bench_scenario.py +4 -2
- helm/benchmark/scenarios/medec_scenario.py +6 -1
- helm/benchmark/scenarios/medhallu_scenario.py +7 -1
- helm/benchmark/scenarios/medi_qa_scenario.py +10 -4
- helm/benchmark/scenarios/medication_qa_scenario.py +7 -1
- helm/benchmark/scenarios/melt_ir_scenario.py +171 -0
- helm/benchmark/scenarios/melt_knowledge_scenario.py +246 -0
- helm/benchmark/scenarios/melt_lm_scenarios.py +252 -0
- helm/benchmark/scenarios/melt_scenarios.py +793 -0
- helm/benchmark/scenarios/melt_srn_scenario.py +342 -0
- helm/benchmark/scenarios/melt_synthetic_reasoning_scenario.py +222 -0
- helm/benchmark/scenarios/melt_translation_scenario.py +152 -0
- helm/benchmark/scenarios/mental_health_scenario.py +16 -5
- helm/benchmark/scenarios/mimic_bhc_scenario.py +13 -8
- helm/benchmark/scenarios/mimic_rrs_scenario.py +17 -8
- helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +14 -8
- helm/benchmark/scenarios/mmlu_pro_scenario.py +1 -1
- helm/benchmark/scenarios/mmmlu_scenario.py +85 -0
- helm/benchmark/scenarios/mtsamples_procedures_scenario.py +5 -2
- helm/benchmark/scenarios/mtsamples_replicate_scenario.py +3 -2
- helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +11 -5
- helm/benchmark/scenarios/openai_mrcr_scenario.py +79 -0
- helm/benchmark/scenarios/pubmed_qa_scenario.py +6 -1
- helm/benchmark/scenarios/race_based_med_scenario.py +18 -8
- helm/benchmark/scenarios/ruler_qa_scenario_helper.py +2 -2
- helm/benchmark/scenarios/ruler_qa_scenarios.py +2 -2
- helm/benchmark/scenarios/seahelm_scenario.py +2 -2
- helm/benchmark/scenarios/shc_bmt_scenario.py +12 -6
- helm/benchmark/scenarios/shc_cdi_scenario.py +11 -6
- helm/benchmark/scenarios/shc_conf_scenario.py +12 -6
- helm/benchmark/scenarios/shc_ent_scenario.py +11 -6
- helm/benchmark/scenarios/shc_gip_scenario.py +13 -5
- helm/benchmark/scenarios/shc_privacy_scenario.py +78 -0
- helm/benchmark/scenarios/shc_proxy_scenario.py +76 -0
- helm/benchmark/scenarios/shc_ptbm_scenario.py +12 -7
- helm/benchmark/scenarios/shc_sei_scenario.py +12 -7
- helm/benchmark/scenarios/shc_sequoia_scenario.py +13 -5
- helm/benchmark/scenarios/starr_patient_instructions_scenario.py +15 -8
- helm/benchmark/scenarios/test_alghafa_scenario.py +29 -0
- helm/benchmark/scenarios/test_aratrust_scenario.py +21 -0
- helm/benchmark/scenarios/test_bluex_scenario.py +59 -0
- helm/benchmark/scenarios/test_exams_multilingual_scenario.py +29 -0
- helm/benchmark/scenarios/test_healtha_br_scenario.py +57 -0
- helm/benchmark/scenarios/test_infinite_bench_en_qa_scenario.py +18 -0
- helm/benchmark/scenarios/test_infinite_bench_en_sum_scenario.py +31 -0
- helm/benchmark/scenarios/truthful_qa_scenario.py +2 -1
- helm/benchmark/scenarios/vision_language/msr_vtt_scenario.py +75 -0
- helm/benchmark/server.py +2 -1
- helm/benchmark/slurm_jobs.py +1 -2
- helm/benchmark/slurm_runner.py +8 -1
- helm/benchmark/static/schema_arabic.yaml +228 -0
- helm/benchmark/static/schema_audio.yaml +60 -49
- helm/benchmark/static/schema_classic.yaml +0 -17
- helm/benchmark/static/schema_enterprise.yaml +21 -0
- helm/benchmark/static/schema_long_context.yaml +81 -20
- helm/benchmark/static/schema_medhelm.yaml +272 -213
- helm/benchmark/static/schema_melt.yaml +1257 -0
- helm/benchmark/static/schema_slphelm.yaml +162 -0
- helm/benchmark/static/schema_vhelm.yaml +26 -26
- helm/benchmark/static/schema_video.yaml +219 -0
- helm/benchmark/static_build/assets/index-b9779128.css +1 -0
- helm/benchmark/static_build/assets/index-e439d5e1.js +10 -0
- helm/benchmark/static_build/assets/medhelm-overview-eac29843.png +0 -0
- helm/benchmark/static_build/assets/{tremor-9cefc3c5.js → tremor-38a10867.js} +1 -1
- helm/benchmark/static_build/index.html +4 -4
- helm/benchmark/window_services/encoder_decoder_window_service.py +3 -3
- helm/benchmark/window_services/image_generation/clip_window_service.py +1 -3
- helm/benchmark/window_services/test_utils.py +3 -4
- helm/benchmark/window_services/tokenizer_service.py +7 -8
- helm/clients/anthropic_client.py +69 -29
- helm/clients/audio_language/diva_llama_client.py +4 -2
- helm/clients/audio_language/qwen2_5_omni_client.py +209 -0
- helm/clients/audio_language/qwen2_audiolm_client.py +8 -6
- helm/clients/audio_language/qwen_audiolm_client.py +4 -2
- helm/clients/audio_language/test.py +62 -0
- helm/clients/bedrock_client.py +3 -1
- helm/clients/client.py +7 -7
- helm/clients/grok_client.py +36 -0
- helm/clients/huggingface_client.py +42 -3
- helm/clients/huggingface_pipeline_client.py +138 -0
- helm/clients/image_generation/dalle_mini/model/configuration.py +1 -1
- helm/clients/image_generation/dalle_mini/model/modeling.py +1 -1
- helm/clients/image_generation/dalle_mini/model/processor.py +1 -1
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +1 -1
- helm/clients/openai_client.py +102 -55
- helm/clients/openai_responses_client.py +176 -0
- helm/clients/palmyra_client.py +2 -5
- helm/clients/reka_client.py +2 -2
- helm/clients/test_huggingface_client.py +3 -3
- helm/clients/together_client.py +31 -6
- helm/clients/vertexai_client.py +17 -9
- helm/clients/vision_language/huggingface_vision2seq_client.py +6 -4
- helm/clients/vision_language/huggingface_vlm_client.py +2 -2
- helm/clients/vision_language/idefics_client.py +6 -2
- helm/clients/vision_language/paligemma_client.py +2 -2
- helm/clients/vision_language/qwen2_vlm_client.py +66 -53
- helm/clients/vision_language/qwen_vlm_client.py +7 -5
- helm/clients/vllm_client.py +43 -7
- helm/clients/vllm_granite_thinking_client.py +56 -0
- helm/clients/writer_client.py +102 -0
- helm/common/context.py +80 -0
- helm/common/credentials_utils.py +5 -5
- helm/common/critique_request.py +0 -1
- helm/common/general.py +9 -2
- helm/common/hierarchical_logger.py +104 -12
- helm/common/local_context.py +140 -0
- helm/common/object_spec.py +23 -8
- helm/common/remote_context.py +61 -0
- helm/common/request.py +8 -0
- helm/common/test_logging.py +94 -0
- helm/config/model_deployments.yaml +995 -45
- helm/config/model_metadata.yaml +780 -59
- helm/config/tokenizer_configs.yaml +224 -3
- helm/proxy/cli.py +4 -2
- helm/proxy/critique/mechanical_turk_utils.py +1 -1
- helm/proxy/retry.py +5 -0
- helm/proxy/services/server_service.py +21 -85
- helm/tokenizers/grok_tokenizer.py +55 -0
- helm/tokenizers/huggingface_tokenizer.py +1 -1
- helm/tokenizers/test_grok_tokenizer.py +33 -0
- helm/benchmark/metrics/numeracy_metrics.py +0 -72
- helm/benchmark/metrics/test_numeracy_metrics.py +0 -95
- helm/benchmark/scenarios/numeracy_scenario.py +0 -793
- helm/benchmark/scenarios/test_infinite_bench_sum_scenario.py +0 -46
- helm/benchmark/static_build/assets/index-262903c1.js +0 -10
- helm/benchmark/static_build/assets/index-42060d71.css +0 -1
- {crfm_helm-0.5.5.dist-info → crfm_helm-0.5.7.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.5.dist-info → crfm_helm-0.5.7.dist-info}/licenses/LICENSE +0 -0
- {crfm_helm-0.5.5.dist-info → crfm_helm-0.5.7.dist-info}/top_level.txt +0 -0
- /helm/benchmark/static_build/assets/{medhelm-overview-3ddfcd65.png → medhelm-v1-overview-3ddfcd65.png} +0 -0
helm/config/model_metadata.yaml
CHANGED
|
@@ -276,6 +276,14 @@ models:
|
|
|
276
276
|
# Amazon Nova models
|
|
277
277
|
# References for Amazon Nova models:
|
|
278
278
|
# https://aws.amazon.com/ai/generative-ai/nova/
|
|
279
|
+
- name: amazon/nova-premier-v1:0
|
|
280
|
+
display_name: Amazon Nova Premier
|
|
281
|
+
description: Amazon Nova Premier is the most capable model in the Nova family of foundation models. ([blog](https://aws.amazon.com/blogs/aws/amazon-nova-premier-our-most-capable-model-for-complex-tasks-and-teacher-for-model-distillation/))
|
|
282
|
+
creator_organization_name: Amazon
|
|
283
|
+
access: limited
|
|
284
|
+
release_date: 2025-04-30
|
|
285
|
+
tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
286
|
+
|
|
279
287
|
- name: amazon/nova-pro-v1:0
|
|
280
288
|
display_name: Amazon Nova Pro
|
|
281
289
|
description: Amazon Nova Pro Model
|
|
@@ -507,6 +515,46 @@ models:
|
|
|
507
515
|
release_date: 2025-02-24
|
|
508
516
|
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
509
517
|
|
|
518
|
+
- name: anthropic/claude-3-7-sonnet-20250219-thinking-10k
|
|
519
|
+
display_name: Claude 3.7 Sonnet (20250219, extended thinking)
|
|
520
|
+
description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)). Extended thinking is enabled with 10k budget tokens.
|
|
521
|
+
creator_organization_name: Anthropic
|
|
522
|
+
access: limited
|
|
523
|
+
release_date: 2025-02-24
|
|
524
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
525
|
+
|
|
526
|
+
- name: anthropic/claude-sonnet-4-20250514
|
|
527
|
+
display_name: Claude 4 Sonnet (20250514)
|
|
528
|
+
description: Claude 4 Sonnet is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)).
|
|
529
|
+
creator_organization_name: Anthropic
|
|
530
|
+
access: limited
|
|
531
|
+
release_date: 2025-05-14
|
|
532
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
533
|
+
|
|
534
|
+
- name: anthropic/claude-sonnet-4-20250514-thinking-10k
|
|
535
|
+
display_name: Claude 4 Sonnet (20250514, extended thinking)
|
|
536
|
+
description: Claude 4 Sonnet is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)). Extended thinking is enabled with 10k budget tokens.
|
|
537
|
+
creator_organization_name: Anthropic
|
|
538
|
+
access: limited
|
|
539
|
+
release_date: 2025-05-14
|
|
540
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
541
|
+
|
|
542
|
+
- name: anthropic/claude-opus-4-20250514
|
|
543
|
+
display_name: Claude 4 Opus (20250514)
|
|
544
|
+
description: Claude 4 Opus is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)).
|
|
545
|
+
creator_organization_name: Anthropic
|
|
546
|
+
access: limited
|
|
547
|
+
release_date: 2025-05-14
|
|
548
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
549
|
+
|
|
550
|
+
- name: anthropic/claude-opus-4-20250514-thinking-10k
|
|
551
|
+
display_name: Claude 4 Opus (20250514, extended thinking)
|
|
552
|
+
description: Claude 4 Opus is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)). Extended thinking is enabled with 10k budget tokens.
|
|
553
|
+
creator_organization_name: Anthropic
|
|
554
|
+
access: limited
|
|
555
|
+
release_date: 2025-05-14
|
|
556
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
557
|
+
|
|
510
558
|
- name: anthropic/stanford-online-all-v4-s3
|
|
511
559
|
display_name: Anthropic-LM v4-s3 (52B)
|
|
512
560
|
description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
|
|
@@ -868,7 +916,7 @@ models:
|
|
|
868
916
|
# NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
|
|
869
917
|
num_parameters: 685000000000
|
|
870
918
|
release_date: 2025-01-20
|
|
871
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
919
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
872
920
|
|
|
873
921
|
- name: deepseek-ai/deepseek-r1-hide-reasoning
|
|
874
922
|
display_name: DeepSeek R1 (hide reasoning)
|
|
@@ -878,8 +926,35 @@ models:
|
|
|
878
926
|
# NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
|
|
879
927
|
num_parameters: 685000000000
|
|
880
928
|
release_date: 2025-01-20
|
|
881
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
929
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
882
930
|
|
|
931
|
+
- name: deepseek-ai/deepseek-r1-0528
|
|
932
|
+
display_name: DeepSeek-R1-0528
|
|
933
|
+
description: DeepSeek-R1-0528 is a minor version upgrade from DeepSeek R1 that has improved its depth of reasoning and inference capabilities by leveraging increased computational resources and introducing algorithmic optimization mechanisms during post-training. ([paper](https://arxiv.org/abs/2501.12948))
|
|
934
|
+
creator_organization_name: DeepSeek
|
|
935
|
+
access: open
|
|
936
|
+
num_parameters: 685000000000
|
|
937
|
+
release_date: 2025-05-28
|
|
938
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
939
|
+
|
|
940
|
+
- name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
|
|
941
|
+
display_name: DeepSeek-R1-Distill-Llama-8b
|
|
942
|
+
description: DeepSeek-R1-Distill-Llama-8b is a model that is distilled from LLaMA 8B model for the DeepSeek-R1 task.
|
|
943
|
+
creator_organization_name: DeepSeek
|
|
944
|
+
access: open
|
|
945
|
+
num_parameters: 8000000000
|
|
946
|
+
release_date: 2025-01-20
|
|
947
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
948
|
+
|
|
949
|
+
- name: deepseek-ai/deepseek-coder-6.7b-instruct
|
|
950
|
+
display_name: DeepSeek-Coder-6.7b-Instruct
|
|
951
|
+
description: DeepSeek-Coder-6.7b-Instruct is a model that is fine-tuned from the LLaMA 6.7B model for the DeepSeek-Coder task.
|
|
952
|
+
creator_organization_name: DeepSeek
|
|
953
|
+
access: open
|
|
954
|
+
num_parameters: 6740000000
|
|
955
|
+
release_date: 2025-01-20
|
|
956
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
957
|
+
|
|
883
958
|
# EleutherAI
|
|
884
959
|
- name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together.
|
|
885
960
|
display_name: GPT-J (6B)
|
|
@@ -1146,6 +1221,14 @@ models:
|
|
|
1146
1221
|
release_date: 2025-02-05
|
|
1147
1222
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1148
1223
|
|
|
1224
|
+
- name: google/gemini-2.0-flash-lite-001
|
|
1225
|
+
display_name: Gemini 2.0 Flash Lite
|
|
1226
|
+
description: Gemini 2.0 Flash Lite ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1227
|
+
creator_organization_name: Google
|
|
1228
|
+
access: limited
|
|
1229
|
+
release_date: 2025-03-25
|
|
1230
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1231
|
+
|
|
1149
1232
|
- name: google/gemini-2.0-flash-thinking-exp-01-21
|
|
1150
1233
|
display_name: Gemini 2.0 Flash Thinking (01-21 preview)
|
|
1151
1234
|
description: Gemini 2.0 Flash Thinking (01-21 preview) ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/thinking))
|
|
@@ -1162,6 +1245,70 @@ models:
|
|
|
1162
1245
|
release_date: 2025-02-05
|
|
1163
1246
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1164
1247
|
|
|
1248
|
+
- name: google/gemini-2.5-flash-lite-preview-06-17
|
|
1249
|
+
display_name: Gemini 2.5 Flash-Lite (06-17 preview)
|
|
1250
|
+
description: Gemini 2.5 Flash-Lite (06-17 preview) ([blog](https://blog.google/products/gemini/gemini-2-5-model-family-expands/))
|
|
1251
|
+
creator_organization_name: Google
|
|
1252
|
+
access: limited
|
|
1253
|
+
release_date: 2025-06-17
|
|
1254
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1255
|
+
|
|
1256
|
+
- name: google/gemini-2.5-flash-preview-04-17
|
|
1257
|
+
display_name: Gemini 2.5 Flash (04-17 preview)
|
|
1258
|
+
description: Gemini 2.5 Flash (04-17 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1259
|
+
creator_organization_name: Google
|
|
1260
|
+
access: limited
|
|
1261
|
+
release_date: 2025-04-17
|
|
1262
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1263
|
+
|
|
1264
|
+
- name: google/gemini-2.5-flash-preview-05-20
|
|
1265
|
+
display_name: Gemini 2.5 Flash (05-20 preview)
|
|
1266
|
+
description: Gemini 2.5 Flash (05-20 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1267
|
+
creator_organization_name: Google
|
|
1268
|
+
access: limited
|
|
1269
|
+
release_date: 2025-04-17
|
|
1270
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1271
|
+
|
|
1272
|
+
- name: google/gemini-2.5-flash
|
|
1273
|
+
display_name: Gemini 2.5 Flash
|
|
1274
|
+
description: Gemini 2.5 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1275
|
+
creator_organization_name: Google
|
|
1276
|
+
access: limited
|
|
1277
|
+
release_date: 2025-06-17
|
|
1278
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1279
|
+
|
|
1280
|
+
- name: google/gemini-2.5-pro-exp-03-25
|
|
1281
|
+
display_name: Gemini 2.5 Pro (03-25 experimental)
|
|
1282
|
+
description: Gemini 2.5 Pro (03-25 experimental) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1283
|
+
creator_organization_name: Google
|
|
1284
|
+
access: limited
|
|
1285
|
+
release_date: 2025-03-25
|
|
1286
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1287
|
+
|
|
1288
|
+
- name: google/gemini-2.5-pro-preview-03-25
|
|
1289
|
+
display_name: Gemini 2.5 Pro (03-25 preview)
|
|
1290
|
+
description: Gemini 2.5 Pro (03-25 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1291
|
+
creator_organization_name: Google
|
|
1292
|
+
access: limited
|
|
1293
|
+
release_date: 2025-04-09 # source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
|
|
1294
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1295
|
+
|
|
1296
|
+
- name: google/gemini-2.5-pro-preview-05-06
|
|
1297
|
+
display_name: Gemini 2.5 Pro (05-06 preview)
|
|
1298
|
+
description: Gemini 2.5 Pro (05-06 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1299
|
+
creator_organization_name: Google
|
|
1300
|
+
access: limited
|
|
1301
|
+
release_date: 2025-05-06 # source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
|
|
1302
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1303
|
+
|
|
1304
|
+
- name: google/gemini-2.5-pro
|
|
1305
|
+
display_name: Gemini 2.5 Pro
|
|
1306
|
+
description: Gemini 2.5 Pro ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1307
|
+
creator_organization_name: Google
|
|
1308
|
+
access: limited
|
|
1309
|
+
release_date: 2025-06-17
|
|
1310
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1311
|
+
|
|
1165
1312
|
- name: google/gemma-2b
|
|
1166
1313
|
display_name: Gemma (2B)
|
|
1167
1314
|
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
|
|
@@ -1360,6 +1507,60 @@ models:
|
|
|
1360
1507
|
release_date: 2023-08-22
|
|
1361
1508
|
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, IDEFICS_INSTRUCT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
1362
1509
|
|
|
1510
|
+
- name: huggingface/smollm2-135m
|
|
1511
|
+
display_name: SmolLM2 (135M)
|
|
1512
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1513
|
+
creator_organization_name: HuggingFace
|
|
1514
|
+
access: open
|
|
1515
|
+
num_parameters: 135000000
|
|
1516
|
+
release_date: 2024-10-31
|
|
1517
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1518
|
+
|
|
1519
|
+
- name: huggingface/smollm2-360m
|
|
1520
|
+
display_name: SmolLM2 (360M)
|
|
1521
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1522
|
+
creator_organization_name: HuggingFace
|
|
1523
|
+
access: open
|
|
1524
|
+
num_parameters: 362000000
|
|
1525
|
+
release_date: 2024-10-31
|
|
1526
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1527
|
+
|
|
1528
|
+
- name: huggingface/smollm2-1.7b
|
|
1529
|
+
display_name: SmolLM2 (1.7B)
|
|
1530
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1531
|
+
creator_organization_name: HuggingFace
|
|
1532
|
+
access: open
|
|
1533
|
+
num_parameters: 1710000000
|
|
1534
|
+
release_date: 2024-10-31
|
|
1535
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1536
|
+
|
|
1537
|
+
- name: huggingface/smollm2-135m-instruct
|
|
1538
|
+
display_name: SmolLM2 Instruct (135M)
|
|
1539
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1540
|
+
creator_organization_name: HuggingFace
|
|
1541
|
+
access: open
|
|
1542
|
+
num_parameters: 135000000
|
|
1543
|
+
release_date: 2024-10-31
|
|
1544
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1545
|
+
|
|
1546
|
+
- name: huggingface/smollm2-360m-instruct
|
|
1547
|
+
display_name: SmolLM2 Instruct (360M)
|
|
1548
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1549
|
+
creator_organization_name: HuggingFace
|
|
1550
|
+
access: open
|
|
1551
|
+
num_parameters: 362000000
|
|
1552
|
+
release_date: 2024-10-31
|
|
1553
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1554
|
+
|
|
1555
|
+
- name: huggingface/smollm2-1.7b-instruct
|
|
1556
|
+
display_name: SmolLM2 Instruct (1.7B)
|
|
1557
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1558
|
+
creator_organization_name: HuggingFace
|
|
1559
|
+
access: open
|
|
1560
|
+
num_parameters: 1710000000
|
|
1561
|
+
release_date: 2024-10-31
|
|
1562
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1563
|
+
|
|
1363
1564
|
## Text-to-Image Diffusion Models
|
|
1364
1565
|
- name: huggingface/dreamlike-diffusion-v1-0
|
|
1365
1566
|
display_name: Dreamlike Diffusion v1.0 (1B)
|
|
@@ -1573,6 +1774,16 @@ models:
|
|
|
1573
1774
|
release_date: 2023-06-22
|
|
1574
1775
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1575
1776
|
|
|
1777
|
+
# Marin Community
|
|
1778
|
+
- name: marin-community/marin-8b-instruct
|
|
1779
|
+
display_name: Marin 8B Instruct
|
|
1780
|
+
description: Marin 8B Instruct is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT.
|
|
1781
|
+
creator_organization_name: Marin Community
|
|
1782
|
+
access: open
|
|
1783
|
+
num_parameters: 8030000000
|
|
1784
|
+
release_date: 2025-05-15
|
|
1785
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1786
|
+
|
|
1576
1787
|
# Meta
|
|
1577
1788
|
- name: meta/opt-iml-175b # NOT SUPPORTED
|
|
1578
1789
|
display_name: OPT-IML (175B)
|
|
@@ -1875,6 +2086,24 @@ models:
|
|
|
1875
2086
|
release_date: 2024-12-06
|
|
1876
2087
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1877
2088
|
|
|
2089
|
+
- name: meta/llama-4-scout-17b-16e-instruct
|
|
2090
|
+
display_name: Llama 4 Scout (17Bx16E) Instruct
|
|
2091
|
+
description: Llama 4 Scout (17Bx16E) Instruct is part of the Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences using a mixture-of-experts architecture. ([blog](https://ai.meta.com/blog/llama-4-multimodal-intelligence/))
|
|
2092
|
+
creator_organization_name: Meta
|
|
2093
|
+
access: open
|
|
2094
|
+
num_parameters: 109000000000
|
|
2095
|
+
release_date: 2025-04-05
|
|
2096
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2097
|
+
|
|
2098
|
+
- name: meta/llama-4-maverick-17b-128e-instruct-fp8
|
|
2099
|
+
display_name: Llama 4 Maverick (17Bx128E) Instruct FP8
|
|
2100
|
+
description: Llama 4 Maverick (17Bx128E) Instruct FP8 is part of the Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences using a mixture-of-experts architecture. ([blog](https://ai.meta.com/blog/llama-4-multimodal-intelligence/))
|
|
2101
|
+
creator_organization_name: Meta
|
|
2102
|
+
access: open
|
|
2103
|
+
num_parameters: 402000000000
|
|
2104
|
+
release_date: 2025-04-05
|
|
2105
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2106
|
+
|
|
1878
2107
|
- name: meta/llama-3-8b-chat
|
|
1879
2108
|
display_name: Llama 3 Instruct (8B)
|
|
1880
2109
|
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
|
|
@@ -2150,6 +2379,42 @@ models:
|
|
|
2150
2379
|
release_date: 2024-04-17
|
|
2151
2380
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2152
2381
|
|
|
2382
|
+
- name: allenai/olmo-2-1124-7b-instruct
|
|
2383
|
+
display_name: OLMo 2 7B Instruct November 2024
|
|
2384
|
+
description: OLMo 2 is a family of 7B and 13B models trained on up to 5T tokens. ([blog](https://allenai.org/blog/olmo2))
|
|
2385
|
+
creator_organization_name: Allen Institute for AI
|
|
2386
|
+
access: open
|
|
2387
|
+
num_parameters: 7300000000
|
|
2388
|
+
release_date: 2024-11-26
|
|
2389
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2390
|
+
|
|
2391
|
+
- name: allenai/olmo-2-1124-13b-instruct
|
|
2392
|
+
display_name: OLMo 2 13B Instruct November 2024
|
|
2393
|
+
description: OLMo 2 is a family of 7B and 13B models trained on up to 5T tokens. ([blog](https://allenai.org/blog/olmo2))
|
|
2394
|
+
creator_organization_name: Allen Institute for AI
|
|
2395
|
+
access: open
|
|
2396
|
+
num_parameters: 13700000000
|
|
2397
|
+
release_date: 2024-11-26
|
|
2398
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2399
|
+
|
|
2400
|
+
- name: allenai/olmo-2-0325-32b-instruct
|
|
2401
|
+
display_name: OLMo 2 32B Instruct March 2025
|
|
2402
|
+
description: OLMo 2 32B Instruct March 2025 is trained up to 6T tokens and post-trained using Tulu 3.1. ([blog](https://allenai.org/blog/olmo2-32B))
|
|
2403
|
+
creator_organization_name: Allen Institute for AI
|
|
2404
|
+
access: open
|
|
2405
|
+
num_parameters: 32200000000
|
|
2406
|
+
release_date: 2025-03-13
|
|
2407
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2408
|
+
|
|
2409
|
+
- name: allenai/olmoe-1b-7b-0125-instruct
|
|
2410
|
+
display_name: OLMoE 1B-7B Instruct January 2025
|
|
2411
|
+
description: OLMoE 1B-7B Instruct January 2025 is a fully open language model leveraging sparse Mixture-of-Experts (MoE). It has 7B parameters but uses only 1B per input token. It was pretrained on 5T tokens. ([blog](https://allenai.org/blog/olmoe-an-open-small-and-state-of-the-art-mixture-of-experts-model-c258432d0514), [paper](https://arxiv.org/abs/2409.02060))
|
|
2412
|
+
creator_organization_name: Allen Institute for AI
|
|
2413
|
+
access: open
|
|
2414
|
+
num_parameters: 32200000000
|
|
2415
|
+
release_date: 2025-03-13
|
|
2416
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2417
|
+
|
|
2153
2418
|
# Mistral AI
|
|
2154
2419
|
- name: mistralai/mistral-7b-v0.1
|
|
2155
2420
|
display_name: Mistral v0.1 (7B)
|
|
@@ -2300,6 +2565,14 @@ models:
|
|
|
2300
2565
|
release_date: 2023-12-11
|
|
2301
2566
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2302
2567
|
|
|
2568
|
+
- name: mistralai/mistral-medium-2505
|
|
2569
|
+
display_name: Mistral Medium 3 (2505)
|
|
2570
|
+
description: Mistral Medium 3 (2505) is a language model that is intended to to deliver state-of-the-art performance at lower cost. ([blog](https://mistral.ai/news/mistral-medium-3))
|
|
2571
|
+
creator_organization_name: Mistral AI
|
|
2572
|
+
access: limited
|
|
2573
|
+
release_date: 2025-05-07
|
|
2574
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2575
|
+
|
|
2303
2576
|
- name: mistralai/mistral-large-2402
|
|
2304
2577
|
display_name: Mistral Large (2402)
|
|
2305
2578
|
description: Mistral Large is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
|
|
@@ -2351,6 +2624,15 @@ models:
|
|
|
2351
2624
|
release_date: 2024-11-18
|
|
2352
2625
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2353
2626
|
|
|
2627
|
+
# Moonshot AI
|
|
2628
|
+
- name: moonshotai/kimi-k2-instruct
|
|
2629
|
+
display_name: Kimi K2 Instruct
|
|
2630
|
+
description: Kimi K2 Instruct is a mixture-of-experts (MoE) language model with 32 billion activated parameters and 1 trillion total parameters trained with the Muon optimizer on 15.5T tokens. ([blog](https://moonshotai.github.io/Kimi-K2/))
|
|
2631
|
+
creator_organization_name: Moonshot AI
|
|
2632
|
+
access: open
|
|
2633
|
+
num_parameters: 1029173256720
|
|
2634
|
+
release_date: 2024-07-14 # Blog post has no date, so use the date from this news article https://www.cnbc.com/2025/07/14/alibaba-backed-moonshot-releases-kimi-k2-ai-rivaling-chatgpt-claude.html
|
|
2635
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2354
2636
|
|
|
2355
2637
|
# MosaicML
|
|
2356
2638
|
- name: mosaicml/mpt-7b
|
|
@@ -2746,6 +3028,30 @@ models:
|
|
|
2746
3028
|
release_date: 2024-07-18
|
|
2747
3029
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2748
3030
|
|
|
3031
|
+
- name: openai/gpt-4.1-2025-04-14
|
|
3032
|
+
display_name: GPT-4.1 (2025-04-14)
|
|
3033
|
+
description: GPT-4.1 (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
|
|
3034
|
+
creator_organization_name: OpenAI
|
|
3035
|
+
access: limited
|
|
3036
|
+
release_date: 2025-04-14
|
|
3037
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3038
|
+
|
|
3039
|
+
- name: openai/gpt-4.1-mini-2025-04-14
|
|
3040
|
+
display_name: GPT-4.1 mini (2025-04-14)
|
|
3041
|
+
description: GPT-4.1 mini (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
|
|
3042
|
+
creator_organization_name: OpenAI
|
|
3043
|
+
access: limited
|
|
3044
|
+
release_date: 2025-04-14
|
|
3045
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3046
|
+
|
|
3047
|
+
- name: openai/gpt-4.1-nano-2025-04-14
|
|
3048
|
+
display_name: GPT-4.1 nano (2025-04-14)
|
|
3049
|
+
description: GPT-4.1 nano (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
|
|
3050
|
+
creator_organization_name: OpenAI
|
|
3051
|
+
access: limited
|
|
3052
|
+
release_date: 2025-04-14
|
|
3053
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3054
|
+
|
|
2749
3055
|
- name: openai/whisper-1_gpt-4o-2024-11-20
|
|
2750
3056
|
display_name: Whisper-1 + GPT-4o (2024-11-20)
|
|
2751
3057
|
description: Transcribes the text with Whisper-1 and then uses GPT-4o to generate a response.
|
|
@@ -2754,6 +3060,22 @@ models:
|
|
|
2754
3060
|
release_date: 2024-11-20
|
|
2755
3061
|
tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
|
|
2756
3062
|
|
|
3063
|
+
- name: openai/gpt-4o-transcribe_gpt-4o-2024-11-20
|
|
3064
|
+
display_name: GPT-4o Transcribe + GPT-4o (2024-11-20)
|
|
3065
|
+
description: Transcribes the text with GPT-4o Transcribe and then uses GPT-4o to generate a response.
|
|
3066
|
+
creator_organization_name: OpenAI
|
|
3067
|
+
access: limited
|
|
3068
|
+
release_date: 2025-03-20
|
|
3069
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
|
|
3070
|
+
|
|
3071
|
+
- name: openai/gpt-4o-mini-transcribe_gpt-4o-2024-11-20
|
|
3072
|
+
display_name: GPT-4o mini Transcribe + GPT-4o (2024-11-20)
|
|
3073
|
+
description: Transcribes the text with GPT-4o mini Transcribe and then uses GPT-4o to generate a response.
|
|
3074
|
+
creator_organization_name: OpenAI
|
|
3075
|
+
access: limited
|
|
3076
|
+
release_date: 2025-03-20
|
|
3077
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
|
|
3078
|
+
|
|
2757
3079
|
- name: openai/gpt-4o-audio-preview-2024-10-01
|
|
2758
3080
|
display_name: GPT-4o Audio (Preview 2024-10-01)
|
|
2759
3081
|
description: GPT-4o Audio (Preview 2024-10-01) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
|
|
@@ -2807,6 +3129,30 @@ models:
|
|
|
2807
3129
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2808
3130
|
|
|
2809
3131
|
## o1 Models
|
|
3132
|
+
- name: openai/o1-pro-2025-03-19
|
|
3133
|
+
display_name: o1 pro (2025-03-19)
|
|
3134
|
+
description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
|
|
3135
|
+
creator_organization_name: OpenAI
|
|
3136
|
+
access: limited
|
|
3137
|
+
release_date: 2025-03-19
|
|
3138
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3139
|
+
|
|
3140
|
+
- name: openai/o1-pro-2025-03-19-low-reasoning-effort
|
|
3141
|
+
display_name: o1 pro (2025-03-19, low reasoning effort)
|
|
3142
|
+
description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to low.
|
|
3143
|
+
creator_organization_name: OpenAI
|
|
3144
|
+
access: limited
|
|
3145
|
+
release_date: 2025-03-19
|
|
3146
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3147
|
+
|
|
3148
|
+
- name: openai/o1-pro-2025-03-19-high-reasoning-effort
|
|
3149
|
+
display_name: o1 pro (2025-03-19, high reasoning effort)
|
|
3150
|
+
description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to high.
|
|
3151
|
+
creator_organization_name: OpenAI
|
|
3152
|
+
access: limited
|
|
3153
|
+
release_date: 2025-03-19
|
|
3154
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3155
|
+
|
|
2810
3156
|
- name: openai/o1-2024-12-17
|
|
2811
3157
|
display_name: o1 (2024-12-17)
|
|
2812
3158
|
description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
|
|
@@ -2871,6 +3217,62 @@ models:
|
|
|
2871
3217
|
release_date: 2025-01-31
|
|
2872
3218
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2873
3219
|
|
|
3220
|
+
- name: openai/o3-2025-04-16
|
|
3221
|
+
display_name: o3 (2025-04-16)
|
|
3222
|
+
description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3223
|
+
creator_organization_name: OpenAI
|
|
3224
|
+
access: limited
|
|
3225
|
+
release_date: 2025-04-16
|
|
3226
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3227
|
+
|
|
3228
|
+
- name: openai/o3-2025-04-16-low-reasoning-effort
|
|
3229
|
+
display_name: o3 (2025-04-16, low reasoning effort)
|
|
3230
|
+
description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3231
|
+
creator_organization_name: OpenAI
|
|
3232
|
+
access: limited
|
|
3233
|
+
release_date: 2025-04-16
|
|
3234
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3235
|
+
|
|
3236
|
+
- name: openai/o3-2025-04-16-high-reasoning-effort
|
|
3237
|
+
display_name: o3 (2025-04-16, high reasoning effort)
|
|
3238
|
+
description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3239
|
+
creator_organization_name: OpenAI
|
|
3240
|
+
access: limited
|
|
3241
|
+
release_date: 2025-04-16
|
|
3242
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3243
|
+
|
|
3244
|
+
- name: openai/o4-mini-2025-04-16
|
|
3245
|
+
display_name: o4-mini (2025-04-16)
|
|
3246
|
+
description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3247
|
+
creator_organization_name: OpenAI
|
|
3248
|
+
access: limited
|
|
3249
|
+
release_date: 2025-04-16
|
|
3250
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3251
|
+
|
|
3252
|
+
- name: openai/o4-mini-2025-04-16-low-reasoning-effort
|
|
3253
|
+
display_name: o4-mini (2025-04-16, low reasoning effort)
|
|
3254
|
+
description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3255
|
+
creator_organization_name: OpenAI
|
|
3256
|
+
access: limited
|
|
3257
|
+
release_date: 2025-04-16
|
|
3258
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3259
|
+
|
|
3260
|
+
- name: openai/o4-mini-2025-04-16-high-reasoning-effort
|
|
3261
|
+
display_name: o4-mini (2025-04-16, high reasoning effort)
|
|
3262
|
+
description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3263
|
+
creator_organization_name: OpenAI
|
|
3264
|
+
access: limited
|
|
3265
|
+
release_date: 2025-04-16
|
|
3266
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3267
|
+
|
|
3268
|
+
- name: openai/o3-pro-2025-06-10-high-reasoning-effort
|
|
3269
|
+
display_name: o3-pro (2025-06-10, high reasoning effort)
|
|
3270
|
+
description: o3-pro is an o-series model designed to think longer and provide the most reliable responses. ([blog post](https://help.openai.com/en/articles/9624314-model-release-notes))
|
|
3271
|
+
creator_organization_name: OpenAI
|
|
3272
|
+
access: limited
|
|
3273
|
+
release_date: 2025-06-10
|
|
3274
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3275
|
+
|
|
2874
3276
|
## Codex Models
|
|
2875
3277
|
# DEPRECATED: Codex models have been shut down on March 23 2023.
|
|
2876
3278
|
|
|
@@ -3139,6 +3541,14 @@ models:
|
|
|
3139
3541
|
release_date: 2024-09-19
|
|
3140
3542
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3141
3543
|
|
|
3544
|
+
- name: qwen/qwen3-235b-a22b-fp8-tput
|
|
3545
|
+
display_name: Qwen3 235B A22B FP8 Throughput
|
|
3546
|
+
description: Qwen3 235B A22B FP8 Throughput is a hybrid instruct and reasoning mixture-of-experts model ([blog](https://qwenlm.github.io/blog/qwen3/)).
|
|
3547
|
+
creator_organization_name: Qwen
|
|
3548
|
+
access: open
|
|
3549
|
+
release_date: 2025-04-29
|
|
3550
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3551
|
+
|
|
3142
3552
|
- name: qwen/qwq-32b-preview
|
|
3143
3553
|
display_name: QwQ (32B Preview)
|
|
3144
3554
|
description: QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities. ([blog post](https://qwenlm.github.io/blog/qwq-32b-preview/)).
|
|
@@ -3180,6 +3590,38 @@ models:
|
|
|
3180
3590
|
release_date: 2024-08-29
|
|
3181
3591
|
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3182
3592
|
|
|
3593
|
+
- name: qwen/qwen2.5-vl-3b-instruct
|
|
3594
|
+
display_name: Qwen2.5-VL Instruct (3B)
|
|
3595
|
+
description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
|
|
3596
|
+
creator_organization_name: Alibaba Group
|
|
3597
|
+
access: open
|
|
3598
|
+
release_date: 2025-01-26
|
|
3599
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3600
|
+
|
|
3601
|
+
- name: qwen/qwen2.5-vl-7b-instruct
|
|
3602
|
+
display_name: Qwen2.5-VL Instruct (7B)
|
|
3603
|
+
description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
|
|
3604
|
+
creator_organization_name: Alibaba Group
|
|
3605
|
+
access: open
|
|
3606
|
+
release_date: 2025-01-26
|
|
3607
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3608
|
+
|
|
3609
|
+
- name: qwen/qwen2.5-vl-32b-instruct
|
|
3610
|
+
display_name: Qwen2.5-VL Instruct (32B)
|
|
3611
|
+
description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
|
|
3612
|
+
creator_organization_name: Alibaba Group
|
|
3613
|
+
access: open
|
|
3614
|
+
release_date: 2025-01-26
|
|
3615
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3616
|
+
|
|
3617
|
+
- name: qwen/qwen2.5-vl-72b-instruct
|
|
3618
|
+
display_name: Qwen2.5-VL Instruct (72B)
|
|
3619
|
+
description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
|
|
3620
|
+
creator_organization_name: Alibaba Group
|
|
3621
|
+
access: open
|
|
3622
|
+
release_date: 2025-01-26
|
|
3623
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3624
|
+
|
|
3183
3625
|
- name: qwen/qwen-audio-chat
|
|
3184
3626
|
display_name: Qwen-Audio Chat
|
|
3185
3627
|
description: Auditory multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2311.07919)).
|
|
@@ -3196,6 +3638,14 @@ models:
|
|
|
3196
3638
|
release_date: 2024-07-15
|
|
3197
3639
|
tags: [AUDIO_LANGUAGE_MODEL_TAG]
|
|
3198
3640
|
|
|
3641
|
+
- name: qwen/qwen2.5-omni-7b
|
|
3642
|
+
display_name: Qwen2.5-Omni (7B)
|
|
3643
|
+
description: The new flagship end-to-end multimodal model in the Qwen series that can process inputs including text, images, audio, and video ([paper](https://arxiv.org/abs/2503.20215)).
|
|
3644
|
+
creator_organization_name: Alibaba Cloud
|
|
3645
|
+
access: open
|
|
3646
|
+
release_date: 2025-03-27
|
|
3647
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3648
|
+
|
|
3199
3649
|
# SAIL (Sea AI Lab)
|
|
3200
3650
|
- name: sail/sailor-7b
|
|
3201
3651
|
display_name: Sailor (7B)
|
|
@@ -3670,6 +4120,14 @@ models:
|
|
|
3670
4120
|
release_date: 2024-09-12
|
|
3671
4121
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3672
4122
|
|
|
4123
|
+
- name: writer/palmyra-x5
|
|
4124
|
+
display_name: Palmyra X5
|
|
4125
|
+
description: Palmyra X5 is a language model for enterprise that uses a Mixture of Experts (MoE) architecture and a hybrid attention mechanism that blends linear and softmax attention. ([blog](https://writer.com/engineering/long-context-palmyra-x5/))
|
|
4126
|
+
creator_organization_name: Writer
|
|
4127
|
+
access: limited
|
|
4128
|
+
release_date: 2024-04-28
|
|
4129
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4130
|
+
|
|
3673
4131
|
- name: writer/palmyra-med-32k
|
|
3674
4132
|
display_name: Palmyra-Med 32K (70B)
|
|
3675
4133
|
description: Palmyra-Med 32K (70B) is a model finetuned from Palmyra-X-003 intended for medical applications.
|
|
@@ -3680,11 +4138,10 @@ models:
|
|
|
3680
4138
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3681
4139
|
|
|
3682
4140
|
- name: writer/palmyra-med
|
|
3683
|
-
display_name: Palmyra
|
|
3684
|
-
description: Palmyra
|
|
4141
|
+
display_name: Palmyra Med
|
|
4142
|
+
description: Palmyra Med is a model intended for medical applications.
|
|
3685
4143
|
creator_organization_name: Writer
|
|
3686
4144
|
access: open
|
|
3687
|
-
num_parameters: 70600000000
|
|
3688
4145
|
release_date: 2024-07-31
|
|
3689
4146
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3690
4147
|
|
|
@@ -3707,12 +4164,28 @@ models:
|
|
|
3707
4164
|
|
|
3708
4165
|
# xAI
|
|
3709
4166
|
|
|
3710
|
-
- name: xai/grok-beta
|
|
3711
|
-
display_name: Grok Beta
|
|
3712
|
-
description: Grok Beta is a model
|
|
4167
|
+
- name: xai/grok-3-beta
|
|
4168
|
+
display_name: Grok 3 Beta
|
|
4169
|
+
description: Grok 3 Beta is a model trained on xAI's Colossus supercluster with significant improvements in reasoning, mathematics, coding, world knowledge, and instruction-following tasks. ([blog](https://x.ai/news/grok-3))
|
|
3713
4170
|
creator_organization_name: xAI
|
|
3714
|
-
access:
|
|
3715
|
-
release_date:
|
|
4171
|
+
access: limited
|
|
4172
|
+
release_date: 2025-04-03 # https://docs.x.ai/docs/release-notes#april-2025
|
|
4173
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4174
|
+
|
|
4175
|
+
- name: xai/grok-3-mini-beta
|
|
4176
|
+
display_name: Grok 3 mini Beta
|
|
4177
|
+
description: Grok 3 mini Beta is a model trained on xAI's Colossus supercluster with significant improvements in reasoning, mathematics, coding, world knowledge, and instruction-following tasks. ([blog](https://x.ai/news/grok-3))
|
|
4178
|
+
creator_organization_name: xAI
|
|
4179
|
+
access: limited
|
|
4180
|
+
release_date: 2025-04-03 # https://docs.x.ai/docs/release-notes#april-2025
|
|
4181
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4182
|
+
|
|
4183
|
+
- name: xai/grok-4-0709
|
|
4184
|
+
display_name: Grok 4 (0709)
|
|
4185
|
+
description: Grok 4 (0709) is a model that includes native tool use and real-time search integration. ([blog](https://x.ai/news/grok-4))
|
|
4186
|
+
creator_organization_name: xAI
|
|
4187
|
+
access: limited
|
|
4188
|
+
release_date: 2025-07-09
|
|
3716
4189
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3717
4190
|
|
|
3718
4191
|
# Yandex
|
|
@@ -3807,6 +4280,42 @@ models:
|
|
|
3807
4280
|
release_date: 2024-09-10
|
|
3808
4281
|
tags: [AUDIO_LANGUAGE_MODEL_TAG]
|
|
3809
4282
|
|
|
4283
|
+
|
|
4284
|
+
# Maritaca AI
|
|
4285
|
+
- name: maritaca-ai/sabia-7b
|
|
4286
|
+
display_name: Sabia 7B
|
|
4287
|
+
description: Sabia 7B
|
|
4288
|
+
creator_organization_name: MARITACA-AI
|
|
4289
|
+
access: open
|
|
4290
|
+
num_parameters: 6740000000
|
|
4291
|
+
release_date: 2023-11-08
|
|
4292
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4293
|
+
|
|
4294
|
+
- name: maritaca-ai/sabiazinho-3
|
|
4295
|
+
display_name: Sabiazinho 3
|
|
4296
|
+
description: Sabiazinho-3 is a decoder-only language model designed for Portuguese text generation and understanding tasks. It supports a long context window of up to 128,000 tokens and is offered via API with scalable rate limits. The model is trained on diverse Portuguese corpora with knowledge up to july 2023.
|
|
4297
|
+
creator_organization_name: Maritaca AI
|
|
4298
|
+
access: limited
|
|
4299
|
+
release_date: 2025-02-06
|
|
4300
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4301
|
+
|
|
4302
|
+
- name: maritaca-ai/sabia-3
|
|
4303
|
+
display_name: Sabía 3
|
|
4304
|
+
description: Sabiá-3 is a decoder-only language model designed for Portuguese text generation and understanding tasks. It supports a long context window of up to 128,000 tokens and is offered via API with scalable rate limits. The model is trained on diverse Portuguese corpora with knowledge up to july 2023.
|
|
4305
|
+
creator_organization_name: Maritaca AI
|
|
4306
|
+
access: limited
|
|
4307
|
+
release_date: 2024-12-11
|
|
4308
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4309
|
+
|
|
4310
|
+
- name: maritaca-ai/sabia-3.1-2025-05-08
|
|
4311
|
+
display_name: Sabía 3.1
|
|
4312
|
+
description: Sabiá-3.1 is a decoder-only language model designed for Portuguese text generation and understanding tasks. It supports a long context window of up to 128,000 tokens and is offered via API with scalable rate limits. The model is trained on diverse Portuguese corpora with knowledge up to August 2024.
|
|
4313
|
+
creator_organization_name: Maritaca AI
|
|
4314
|
+
access: limited
|
|
4315
|
+
release_date: 2025-05-08
|
|
4316
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4317
|
+
|
|
4318
|
+
|
|
3810
4319
|
# Granite - IBM
|
|
3811
4320
|
# https://www.ibm.com/granite
|
|
3812
4321
|
# https://github.com/ibm-granite/granite-3.0-language-models
|
|
@@ -3882,17 +4391,7 @@ models:
|
|
|
3882
4391
|
num_parameters: 1380000000
|
|
3883
4392
|
release: 2024-10-21
|
|
3884
4393
|
tags: [TEXT_MODEL_TAG]
|
|
3885
|
-
|
|
3886
|
-
- name: maritaca-ai/sabia-7b
|
|
3887
|
-
display_name: Sabia 7B
|
|
3888
|
-
description: Sabia 7B
|
|
3889
|
-
creator_organization_name: MARITACA-AI
|
|
3890
|
-
access: open
|
|
3891
|
-
num_parameters: 6740000000
|
|
3892
|
-
release_date: 2023-11-08
|
|
3893
|
-
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3894
4394
|
|
|
3895
|
-
# Granite-3.1-8b-base
|
|
3896
4395
|
- name: ibm-granite/granite-3.1-8b-base
|
|
3897
4396
|
display_name: Granite 3.1 - 8B - Base
|
|
3898
4397
|
description: Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
|
|
@@ -3902,7 +4401,6 @@ models:
|
|
|
3902
4401
|
release_date: 2024-12-18
|
|
3903
4402
|
tags: [TEXT_MODEL_TAG]
|
|
3904
4403
|
|
|
3905
|
-
# Granite-3.1-8b-instruct
|
|
3906
4404
|
- name: ibm-granite/granite-3.1-8b-instruct
|
|
3907
4405
|
display_name: Granite 3.1 - 8B - Instruct
|
|
3908
4406
|
description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
@@ -3912,7 +4410,6 @@ models:
|
|
|
3912
4410
|
release_date: 2024-12-18
|
|
3913
4411
|
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3914
4412
|
|
|
3915
|
-
# Granite-3.1-2b-instruct
|
|
3916
4413
|
- name: ibm-granite/granite-3.1-2b-instruct
|
|
3917
4414
|
display_name: Granite 3.1 - 2B - Instruct
|
|
3918
4415
|
description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
@@ -3922,7 +4419,6 @@ models:
|
|
|
3922
4419
|
release_date: 2024-12-18
|
|
3923
4420
|
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3924
4421
|
|
|
3925
|
-
# Granite-3.1-2b-base
|
|
3926
4422
|
- name: ibm-granite/granite-3.1-2b-base
|
|
3927
4423
|
display_name: Granite 3.1 - 2B - Base
|
|
3928
4424
|
description: Granite-3.1-2B-Base extends the context length of Granite-3.0-2B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
|
|
@@ -3932,7 +4428,6 @@ models:
|
|
|
3932
4428
|
release_date: 2024-12-18
|
|
3933
4429
|
tags: [TEXT_MODEL_TAG]
|
|
3934
4430
|
|
|
3935
|
-
# Granite-3.1-3b-a800m-instruct
|
|
3936
4431
|
- name: ibm-granite/granite-3.1-3b-a800m-instruct
|
|
3937
4432
|
display_name: Granite 3.1 - 3B - A800M - Instruct
|
|
3938
4433
|
description: Granite-3.1-3B-A800M-Instruct is a 3B parameter long-context instruct model finetuned from Granite-3.1-3B-A800M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
@@ -3942,7 +4437,6 @@ models:
|
|
|
3942
4437
|
release_date: 2024-12-18
|
|
3943
4438
|
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3944
4439
|
|
|
3945
|
-
# Granite-3.1-3b-a800m-base
|
|
3946
4440
|
- name: ibm-granite/granite-3.1-3b-a800m-base
|
|
3947
4441
|
display_name: Granite 3.1 - 3B - A800M - Base
|
|
3948
4442
|
description: Granite-3.1-3B-A800M-Base extends the context length of Granite-3.0-3B-A800M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
|
|
@@ -3952,7 +4446,6 @@ models:
|
|
|
3952
4446
|
release_date: 2024-12-18
|
|
3953
4447
|
tags: [TEXT_MODEL_TAG]
|
|
3954
4448
|
|
|
3955
|
-
# Granite-3.1-1b-a400m-instruct
|
|
3956
4449
|
- name: ibm-granite/granite-3.1-1b-a400m-instruct
|
|
3957
4450
|
display_name: Granite 3.1 - 1B - A400M - Instruct
|
|
3958
4451
|
description: Granite-3.1-1B-A400M-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
@@ -3962,7 +4455,6 @@ models:
|
|
|
3962
4455
|
release_date: 2024-12-18
|
|
3963
4456
|
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3964
4457
|
|
|
3965
|
-
# Granite-3.1-1b-a400m-base
|
|
3966
4458
|
- name: ibm-granite/granite-3.1-1b-a400m-base
|
|
3967
4459
|
display_name: Granite 3.1 - 1B - A400M - Base
|
|
3968
4460
|
description: Granite-3.1-1B-A400M-Base extends the context length of Granite-3.0-1B-A400M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
|
|
@@ -3972,27 +4464,6 @@ models:
|
|
|
3972
4464
|
release_date: 2024-12-18
|
|
3973
4465
|
tags: [TEXT_MODEL_TAG]
|
|
3974
4466
|
|
|
3975
|
-
# DeepSeek-R1-Distill-Llama-3.1-8b
|
|
3976
|
-
- name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
|
|
3977
|
-
display_name: DeepSeek-R1-Distill-Llama-8b
|
|
3978
|
-
description: DeepSeek-R1-Distill-Llama-8b is a model that is distilled from LLaMA 8B model for the DeepSeek-R1 task.
|
|
3979
|
-
creator_organization_name: DeepSeek
|
|
3980
|
-
access: open
|
|
3981
|
-
num_parameters: 8000000000
|
|
3982
|
-
release_date: 2025-01-20
|
|
3983
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3984
|
-
|
|
3985
|
-
# deepseek-ai/deepseek-coder-6.7b-instruct
|
|
3986
|
-
- name: deepseek-ai/deepseek-coder-6.7b-instruct
|
|
3987
|
-
display_name: DeepSeek-Coder-6.7b-Instruct
|
|
3988
|
-
description: DeepSeek-Coder-6.7b-Instruct is a model that is fine-tuned from the LLaMA 6.7B model for the DeepSeek-Coder task.
|
|
3989
|
-
creator_organization_name: DeepSeek
|
|
3990
|
-
access: open
|
|
3991
|
-
num_parameters: 6740000000
|
|
3992
|
-
release_date: 2025-01-20
|
|
3993
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3994
|
-
|
|
3995
|
-
# WatsonX - IBM
|
|
3996
4467
|
- name: ibm/granite-13b-instruct-v2
|
|
3997
4468
|
display_name: Granite 13b instruct v2
|
|
3998
4469
|
description: Granite Base (13B) Instruct V2.0 is a large decoder-only transformer model.The following features were used in the design of the model Decoder-only model
|
|
@@ -4039,11 +4510,6 @@ models:
|
|
|
4039
4510
|
release: 2024-6-18
|
|
4040
4511
|
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4041
4512
|
|
|
4042
|
-
|
|
4043
|
-
|
|
4044
|
-
|
|
4045
|
-
|
|
4046
|
-
|
|
4047
4513
|
- name: ibm/granite-3.1-8b-instruct
|
|
4048
4514
|
display_name: Granite 3.1 - 8B - Instruct
|
|
4049
4515
|
description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
@@ -4062,10 +4528,265 @@ models:
|
|
|
4062
4528
|
release_date: 2024-12-18
|
|
4063
4529
|
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4064
4530
|
|
|
4065
|
-
- name:
|
|
4066
|
-
display_name:
|
|
4067
|
-
description:
|
|
4068
|
-
creator_organization_name:
|
|
4069
|
-
access:
|
|
4070
|
-
|
|
4531
|
+
- name: ibm/granite-3.3-8b-instruct
|
|
4532
|
+
display_name: IBM Granite 3.3 8B Instruct
|
|
4533
|
+
description: IBM Granite 3.3 8B Instruct is a 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities. ([model card](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct))
|
|
4534
|
+
creator_organization_name: IBM
|
|
4535
|
+
access: open
|
|
4536
|
+
num_parameters: 8170000000
|
|
4537
|
+
release_date: 2025-04-16
|
|
4071
4538
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4539
|
+
|
|
4540
|
+
- name: ibm/granite-3.3-8b-instruct-with-guardian
|
|
4541
|
+
display_name: IBM Granite 3.3 8B Instruct (with guardian)
|
|
4542
|
+
description: IBM Granite 3.3 8B Instruct is a 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities. ([model card](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct)) This model was run with an additional safety filter using [Granite Guardian 3.2](https://www.ibm.com/granite/docs/models/guardian/).
|
|
4543
|
+
creator_organization_name: IBM
|
|
4544
|
+
access: open
|
|
4545
|
+
num_parameters: 8170000000
|
|
4546
|
+
release_date: 2025-04-16
|
|
4547
|
+
# Unfortunately this setup is not easily reproducible, so we mark it with DEPRECATED_MODEL_TAG
|
|
4548
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4549
|
+
|
|
4550
|
+
- name: ura-hcmut/ura-llama-2.1-8b
|
|
4551
|
+
display_name: URA-Llama 2.1 (8B)
|
|
4552
|
+
description: URA-Llama 2.1 (8B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4553
|
+
creator_organization_name: URA
|
|
4554
|
+
access: open
|
|
4555
|
+
num_parameters: 8000000000
|
|
4556
|
+
release_date: 2024-08-04
|
|
4557
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4558
|
+
|
|
4559
|
+
- name: ura-hcmut/ura-llama-2-8b
|
|
4560
|
+
display_name: URA-Llama 2 (8B)
|
|
4561
|
+
description: URA-Llama 2 (8B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4562
|
+
creator_organization_name: URA
|
|
4563
|
+
access: open
|
|
4564
|
+
num_parameters: 8000000000
|
|
4565
|
+
release_date: 2024-08-04
|
|
4566
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4567
|
+
|
|
4568
|
+
- name: ura-hcmut/ura-llama-7b
|
|
4569
|
+
display_name: URA-Llama 7B (7B)
|
|
4570
|
+
description: URA-Llama 7B (7B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4571
|
+
creator_organization_name: URA
|
|
4572
|
+
access: open
|
|
4573
|
+
num_parameters: 7000000000
|
|
4574
|
+
release_date: 2023-10-10
|
|
4575
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4576
|
+
|
|
4577
|
+
- name: ura-hcmut/ura-llama-13b
|
|
4578
|
+
display_name: URA-Llama 13B (13B)
|
|
4579
|
+
description: URA-Llama 13B (13B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4580
|
+
creator_organization_name: URA
|
|
4581
|
+
access: open
|
|
4582
|
+
num_parameters: 13000000000
|
|
4583
|
+
release_date: 2023-10-10
|
|
4584
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4585
|
+
|
|
4586
|
+
- name: ura-hcmut/ura-llama-70b
|
|
4587
|
+
display_name: URA-Llama 70B (70B)
|
|
4588
|
+
description: URA-Llama 70B (70B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4589
|
+
creator_organization_name: URA
|
|
4590
|
+
access: open
|
|
4591
|
+
num_parameters: 70000000000
|
|
4592
|
+
release_date: 2023-10-10
|
|
4593
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4594
|
+
|
|
4595
|
+
- name: ura-hcmut/GemSUra-7B
|
|
4596
|
+
display_name: GemSUra 7B
|
|
4597
|
+
description: GemSUra 7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4598
|
+
creator_organization_name: URA
|
|
4599
|
+
access: open
|
|
4600
|
+
num_parameters: 7000000000
|
|
4601
|
+
release_date: 2024-03-12
|
|
4602
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4603
|
+
|
|
4604
|
+
- name: ura-hcmut/GemSUra-2B
|
|
4605
|
+
display_name: GemSUra 2B
|
|
4606
|
+
description: GemSUra 2B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4607
|
+
creator_organization_name: URA
|
|
4608
|
+
access: open
|
|
4609
|
+
num_parameters: 2000000000
|
|
4610
|
+
release_date: 2024-03-12
|
|
4611
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4612
|
+
|
|
4613
|
+
- name: ura-hcmut/MixSUra
|
|
4614
|
+
display_name: MixSUra
|
|
4615
|
+
description: MixSUra is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text. It is a mixture of experts model with 8 active experts.
|
|
4616
|
+
creator_organization_name: URA
|
|
4617
|
+
access: open
|
|
4618
|
+
num_parameters: 46700000000
|
|
4619
|
+
release_date: 2024-03-12
|
|
4620
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4621
|
+
|
|
4622
|
+
- name: vilm/vinallama-7b-chat
|
|
4623
|
+
display_name: VinaLLaMa
|
|
4624
|
+
description: VinaLLaMa is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4625
|
+
creator_organization_name: ViLM
|
|
4626
|
+
access: open
|
|
4627
|
+
num_parameters: 7000000000
|
|
4628
|
+
release_date: 2024-03-12
|
|
4629
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4630
|
+
|
|
4631
|
+
- name: vilm/vinallama-2.7b-chat
|
|
4632
|
+
display_name: VinaLLaMa 2.7B
|
|
4633
|
+
description: VinaLLaMa 2.7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4634
|
+
creator_organization_name: ViLM
|
|
4635
|
+
access: open
|
|
4636
|
+
num_parameters: 2700000000
|
|
4637
|
+
release_date: 2024-03-12
|
|
4638
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4639
|
+
|
|
4640
|
+
- name: vilm/vietcuna-7b-v3
|
|
4641
|
+
display_name: VietCuna 7B (v3)
|
|
4642
|
+
description: VietCuna 7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4643
|
+
creator_organization_name: ViLM
|
|
4644
|
+
access: open
|
|
4645
|
+
num_parameters: 7000000000
|
|
4646
|
+
release_date: 2023-08-07
|
|
4647
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4648
|
+
|
|
4649
|
+
- name: vilm/vietcuna-3b-v2
|
|
4650
|
+
display_name: VietCuna 3B (v2)
|
|
4651
|
+
description: VietCuna 3B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4652
|
+
creator_organization_name: ViLM
|
|
4653
|
+
access: open
|
|
4654
|
+
num_parameters: 3000000000
|
|
4655
|
+
release_date: 2023-07-15
|
|
4656
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4657
|
+
|
|
4658
|
+
- name: vilm/Quyen-v0.1
|
|
4659
|
+
display_name: Quyen (v0.1)
|
|
4660
|
+
description: Quyen is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4661
|
+
creator_organization_name: ViLM
|
|
4662
|
+
access: open
|
|
4663
|
+
num_parameters: 4000000000
|
|
4664
|
+
release_date: 2024-02-26
|
|
4665
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4666
|
+
|
|
4667
|
+
- name: vilm/Quyen-Plus-v0.1
|
|
4668
|
+
display_name: Quyen Plus (v0.1)
|
|
4669
|
+
description: Quyen Plus is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4670
|
+
creator_organization_name: ViLM
|
|
4671
|
+
access: open
|
|
4672
|
+
num_parameters: 7000000000
|
|
4673
|
+
release_date: 2024-02-26
|
|
4674
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4675
|
+
|
|
4676
|
+
- name: vilm/Quyen-Pro-v0.1
|
|
4677
|
+
display_name: Quyen Pro (v0.1)
|
|
4678
|
+
description: Quyen Pro is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4679
|
+
creator_organization_name: ViLM
|
|
4680
|
+
access: open
|
|
4681
|
+
num_parameters: 14000000000
|
|
4682
|
+
release_date: 2024-02-26
|
|
4683
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4684
|
+
|
|
4685
|
+
- name: vilm/Quyen-Pro-Max-v0.1
|
|
4686
|
+
display_name: Quyen Pro Max (v0.1)
|
|
4687
|
+
description: Quyen Pro Max is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4688
|
+
creator_organization_name: ViLM
|
|
4689
|
+
access: open
|
|
4690
|
+
num_parameters: 72000000000
|
|
4691
|
+
release_date: 2024-02-26
|
|
4692
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4693
|
+
|
|
4694
|
+
- name: vilm/Quyen-Mini-v0.1
|
|
4695
|
+
display_name: Quyen Mini (v0.1)
|
|
4696
|
+
description: Quyen Mini is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4697
|
+
creator_organization_name: ViLM
|
|
4698
|
+
access: open
|
|
4699
|
+
num_parameters: 1800000000
|
|
4700
|
+
release_date: 2024-02-26
|
|
4701
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4702
|
+
|
|
4703
|
+
- name: vilm/Quyen-SE-v0.1
|
|
4704
|
+
display_name: Quyen SE (v0.1)
|
|
4705
|
+
description: Quyen SE is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4706
|
+
creator_organization_name: ViLM
|
|
4707
|
+
access: open
|
|
4708
|
+
num_parameters: 500000000
|
|
4709
|
+
release_date: 2024-02-26
|
|
4710
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4711
|
+
|
|
4712
|
+
- name: Viet-Mistral/Vistral-7B-Chat
|
|
4713
|
+
display_name: Vistral 7B Chat
|
|
4714
|
+
description: Vistral 7B Chat is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4715
|
+
creator_organization_name: Viet-Mistral
|
|
4716
|
+
access: open
|
|
4717
|
+
num_parameters: 7000000000
|
|
4718
|
+
release_date: 2024-02-28
|
|
4719
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4720
|
+
|
|
4721
|
+
- name: vinai/PhoGPT-7B5-Instruct
|
|
4722
|
+
display_name: PhoGPT 7B5 Instruct
|
|
4723
|
+
description: PhoGPT 7B5 Instruct is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4724
|
+
creator_organization_name: VinAI
|
|
4725
|
+
access: open
|
|
4726
|
+
num_parameters: 7500000000
|
|
4727
|
+
release_date: 2024-02-19
|
|
4728
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4729
|
+
|
|
4730
|
+
- name: vinai/PhoGPT-4B-Chat
|
|
4731
|
+
display_name: PhoGPT 4B Chat
|
|
4732
|
+
description: PhoGPT 4B Chat is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4733
|
+
creator_organization_name: VinAI
|
|
4734
|
+
access: open
|
|
4735
|
+
num_parameters: 4000000000
|
|
4736
|
+
release_date: 2024-04-02
|
|
4737
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4738
|
+
|
|
4739
|
+
- name: CEIA-UFG/Gemma-3-Gaia-PT-BR-4b-it
|
|
4740
|
+
display_name: Gemma-3 Gaia PT-BR 4b Instruct
|
|
4741
|
+
description: Gemma-3 Gaia PT-BR 4b Instruct is a model trained by CEIA-UFG for understanding and generating Brazilian Portuguese text.
|
|
4742
|
+
creator_organization_name: CEIA-UFG
|
|
4743
|
+
access: open
|
|
4744
|
+
num_parameters: 4000000000
|
|
4745
|
+
release_date: 2025-06-01
|
|
4746
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4747
|
+
|
|
4748
|
+
- name: recogna-nlp/bode-13b-alpaca-pt-br-no-peft
|
|
4749
|
+
display_name: Bode 13B Alpaca PT-BR
|
|
4750
|
+
description: Bode is a language model (LLM) for Portuguese, based on LLaMA 2 and fine-tuned with the Alpaca dataset translated into Portuguese. Suitable for instruction, text generation, translation and tasks in Portuguese.
|
|
4751
|
+
creator_organization_name: Recogna NLP
|
|
4752
|
+
access: open
|
|
4753
|
+
num_parameters: 13000000000
|
|
4754
|
+
release_date: 2024-01-05
|
|
4755
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4756
|
+
|
|
4757
|
+
- name: 22h/cabrita_7b_pt_850000
|
|
4758
|
+
display_name: Cabrita PT-BR 7B
|
|
4759
|
+
description: Cabrita is an OpenLLaMA-based model, continuously trained in Portuguese (mC4-pt subset) for 850000 steps with efficient tokenization adapted to the language.
|
|
4760
|
+
creator_organization_name: 22h
|
|
4761
|
+
access: open
|
|
4762
|
+
num_parameters: 7000000000
|
|
4763
|
+
release_date: 2023-08-23
|
|
4764
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4765
|
+
|
|
4766
|
+
- name: PORTULAN/gervasio-7b-portuguese-ptbr-decoder
|
|
4767
|
+
display_name: Gervásio PT-BR/PT-PT 7B Decoder
|
|
4768
|
+
description: Gervásio PT* is a 7B parameter decoder model, adapted from LLaMA27B, trained for both Brazilian and European Portuguese. Fine-tuned with translated data from benchmarks such as GLUE and SuperGLUE.
|
|
4769
|
+
creator_organization_name: PORTULAN (University of Lisbon NLX)
|
|
4770
|
+
access: open
|
|
4771
|
+
num_parameters: 6740000000
|
|
4772
|
+
release_date: 2024-02-29
|
|
4773
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4774
|
+
|
|
4775
|
+
- name: TucanoBR/Tucano-2b4
|
|
4776
|
+
display_name: Tucano PT-BR 2b4
|
|
4777
|
+
description: Tucano is a series of decoder models based on LLaMA2, natively pre-trained in Portuguese using the GigaVerbo dataset (200B tokens), with the 2B model trained for 1.96M steps over 845h (515B tokens, 4 epochs).
|
|
4778
|
+
creator_organization_name: TucanoBR (University of Bonn)
|
|
4779
|
+
access: open
|
|
4780
|
+
num_parameters: 2444618240
|
|
4781
|
+
release_date: 2024-12-11
|
|
4782
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4783
|
+
|
|
4784
|
+
- name: nicholasKluge/TeenyTinyLlama-460m
|
|
4785
|
+
display_name: TeenyTinyLlama 460M PT-BR
|
|
4786
|
+
description: TeenyTinyLlama-460m is a lightweight and efficient model based on LLaMA2, trained exclusively on Brazilian Portuguese. It uses RoPE embeddings and SwiGLU activations, with a refined SentencePiece tokenizer and a low-resource optimized architecture.
|
|
4787
|
+
creator_organization_name: Nicholas Kluge.
|
|
4788
|
+
access: open
|
|
4789
|
+
num_parameters: 460000000
|
|
4790
|
+
release_date: 2024-01-30
|
|
4791
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4792
|
+
|