crfm-helm 0.5.5__py3-none-any.whl → 0.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.5.5.dist-info → crfm_helm-0.5.6.dist-info}/METADATA +27 -13
- {crfm_helm-0.5.5.dist-info → crfm_helm-0.5.6.dist-info}/RECORD +203 -156
- {crfm_helm-0.5.5.dist-info → crfm_helm-0.5.6.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +3 -3
- helm/benchmark/adaptation/adapters/test_adapter.py +4 -4
- helm/benchmark/annotation/air_bench_annotator.py +1 -1
- helm/benchmark/annotation/bigcodebench_annotator.py +3 -3
- helm/benchmark/annotation/bird_sql_annotator.py +2 -2
- helm/benchmark/annotation/chw_care_plan_annotator.py +7 -12
- helm/benchmark/annotation/ehr_sql_annotator.py +2 -2
- helm/benchmark/annotation/helpdesk_call_summarization_annotator.py +7 -7
- helm/benchmark/annotation/mimic_bhc_annotator.py +100 -0
- helm/benchmark/annotation/model_as_judge.py +12 -16
- helm/benchmark/annotation/omni_math_annotator.py +13 -14
- helm/benchmark/annotation/wildbench_annotator.py +9 -9
- helm/benchmark/executor.py +11 -12
- helm/benchmark/metrics/aci_bench_metrics.py +9 -29
- helm/benchmark/metrics/bias_word_lists.py +1 -1
- helm/benchmark/metrics/chw_care_plan_metrics.py +10 -30
- helm/benchmark/metrics/classification_metrics.py +3 -3
- helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
- helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +2 -2
- helm/benchmark/metrics/dischargeme_metrics.py +9 -29
- helm/benchmark/metrics/efficiency_metrics.py +3 -3
- helm/benchmark/metrics/gpt4_audio_refusal_metrics.py +145 -0
- helm/benchmark/metrics/ifeval_metrics.py +2 -2
- helm/benchmark/metrics/kpi_edgar_metrics.py +121 -0
- helm/benchmark/metrics/llm_jury_metrics.py +46 -0
- helm/benchmark/metrics/med_dialog_metrics.py +9 -29
- helm/benchmark/metrics/medalign_metrics.py +9 -29
- helm/benchmark/metrics/medi_qa_metrics.py +9 -29
- helm/benchmark/metrics/medication_qa_metrics.py +10 -30
- helm/benchmark/metrics/melt_bias_metric.py +234 -0
- helm/benchmark/metrics/melt_bias_word_lists.py +1367 -0
- helm/benchmark/metrics/melt_metric_specs.py +43 -0
- helm/benchmark/metrics/melt_toxicity_metric.py +107 -0
- helm/benchmark/metrics/mental_health_metrics.py +9 -29
- helm/benchmark/metrics/metric_service.py +11 -11
- helm/benchmark/metrics/mimic_bhc_metrics.py +14 -0
- helm/benchmark/metrics/mimic_rrs_metrics.py +9 -29
- helm/benchmark/metrics/mtsamples_procedures_metrics.py +9 -29
- helm/benchmark/metrics/mtsamples_replicate_metrics.py +9 -29
- helm/benchmark/metrics/openai_mrcr_metrics.py +52 -0
- helm/benchmark/metrics/ruler_qa_metrics.py +34 -0
- helm/benchmark/metrics/starr_patient_instructions_metrics.py +9 -29
- helm/benchmark/metrics/summac/model_summac.py +1 -2
- helm/benchmark/metrics/summarization_metrics.py +2 -1
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +2 -2
- helm/benchmark/metrics/toxicity_metrics.py +2 -2
- helm/benchmark/metrics/unitxt_metrics.py +3 -4
- helm/benchmark/metrics/vision_language/emd_utils.py +4 -2
- helm/benchmark/metrics/vision_language/image_utils.py +2 -2
- helm/benchmark/model_deployment_registry.py +6 -8
- helm/benchmark/presentation/contamination.py +3 -3
- helm/benchmark/presentation/create_plots.py +33 -12
- helm/benchmark/presentation/run_display.py +13 -0
- helm/benchmark/presentation/schema.py +2 -1
- helm/benchmark/presentation/summarize.py +76 -59
- helm/benchmark/reeval_run.py +3 -4
- helm/benchmark/reeval_runner.py +3 -3
- helm/benchmark/run.py +78 -73
- helm/benchmark/run_expander.py +12 -1
- helm/benchmark/run_spec_factory.py +7 -6
- helm/benchmark/run_specs/audio_run_specs.py +52 -8
- helm/benchmark/run_specs/enterprise_run_specs.py +20 -0
- helm/benchmark/run_specs/experimental_run_specs.py +31 -1
- helm/benchmark/run_specs/long_context_run_specs.py +67 -15
- helm/benchmark/run_specs/medhelm_run_specs.py +146 -41
- helm/benchmark/run_specs/melt_run_specs.py +783 -0
- helm/benchmark/run_specs/speech_disorder_audio_run_specs.py +169 -0
- helm/benchmark/run_specs/vlm_run_specs.py +28 -0
- helm/benchmark/runner.py +5 -5
- helm/benchmark/scenarios/aci_bench_scenario.py +7 -1
- helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py +3 -1
- helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py +5 -5
- helm/benchmark/scenarios/audio_language/corebench_scenario.py +77 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification.py +103 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py +110 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py +78 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py +109 -0
- helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py +15 -1
- helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py +1 -2
- helm/benchmark/scenarios/autobencher_capabilities_scenario.py +2 -2
- helm/benchmark/scenarios/chw_care_plan_scenario.py +14 -13
- helm/benchmark/scenarios/clear_scenario.py +11 -7
- helm/benchmark/scenarios/dischargeme_scenario.py +36 -21
- helm/benchmark/scenarios/ehr_sql_scenario.py +7 -1
- helm/benchmark/scenarios/ehrshot_scenario.py +28 -55
- helm/benchmark/scenarios/grammar.py +2 -2
- helm/benchmark/scenarios/headqa_scenario.py +6 -1
- helm/benchmark/scenarios/infinite_bench_en_qa_scenario.py +85 -0
- helm/benchmark/scenarios/{infinite_bench_sum_scenario.py → infinite_bench_en_sum_scenario.py} +10 -13
- helm/benchmark/scenarios/kpi_edgar_scenario.py +151 -0
- helm/benchmark/scenarios/med_dialog_scenario.py +6 -1
- helm/benchmark/scenarios/medalign_scenario.py +9 -3
- helm/benchmark/scenarios/medalign_scenario_helper.py +8 -5
- helm/benchmark/scenarios/medbullets_scenario.py +7 -2
- helm/benchmark/scenarios/medcalc_bench_scenario.py +4 -2
- helm/benchmark/scenarios/medec_scenario.py +6 -1
- helm/benchmark/scenarios/medhallu_scenario.py +7 -1
- helm/benchmark/scenarios/medi_qa_scenario.py +10 -4
- helm/benchmark/scenarios/medication_qa_scenario.py +7 -1
- helm/benchmark/scenarios/melt_ir_scenario.py +171 -0
- helm/benchmark/scenarios/melt_knowledge_scenario.py +246 -0
- helm/benchmark/scenarios/melt_lm_scenarios.py +252 -0
- helm/benchmark/scenarios/melt_scenarios.py +793 -0
- helm/benchmark/scenarios/melt_srn_scenario.py +342 -0
- helm/benchmark/scenarios/melt_synthetic_reasoning_scenario.py +222 -0
- helm/benchmark/scenarios/melt_translation_scenario.py +152 -0
- helm/benchmark/scenarios/mental_health_scenario.py +16 -5
- helm/benchmark/scenarios/mimic_bhc_scenario.py +12 -7
- helm/benchmark/scenarios/mimic_rrs_scenario.py +17 -8
- helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +14 -8
- helm/benchmark/scenarios/mmlu_pro_scenario.py +1 -1
- helm/benchmark/scenarios/mtsamples_procedures_scenario.py +5 -2
- helm/benchmark/scenarios/mtsamples_replicate_scenario.py +3 -2
- helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +11 -5
- helm/benchmark/scenarios/numeracy_scenario.py +2 -1
- helm/benchmark/scenarios/openai_mrcr_scenario.py +79 -0
- helm/benchmark/scenarios/pubmed_qa_scenario.py +6 -1
- helm/benchmark/scenarios/race_based_med_scenario.py +18 -8
- helm/benchmark/scenarios/ruler_qa_scenario_helper.py +2 -2
- helm/benchmark/scenarios/ruler_qa_scenarios.py +2 -2
- helm/benchmark/scenarios/shc_bmt_scenario.py +12 -6
- helm/benchmark/scenarios/shc_cdi_scenario.py +11 -6
- helm/benchmark/scenarios/shc_conf_scenario.py +12 -6
- helm/benchmark/scenarios/shc_ent_scenario.py +11 -6
- helm/benchmark/scenarios/shc_gip_scenario.py +13 -5
- helm/benchmark/scenarios/shc_privacy_scenario.py +78 -0
- helm/benchmark/scenarios/shc_proxy_scenario.py +76 -0
- helm/benchmark/scenarios/shc_ptbm_scenario.py +12 -7
- helm/benchmark/scenarios/shc_sei_scenario.py +12 -7
- helm/benchmark/scenarios/shc_sequoia_scenario.py +13 -5
- helm/benchmark/scenarios/starr_patient_instructions_scenario.py +15 -8
- helm/benchmark/scenarios/test_infinite_bench_en_qa_scenario.py +18 -0
- helm/benchmark/scenarios/test_infinite_bench_en_sum_scenario.py +31 -0
- helm/benchmark/scenarios/truthful_qa_scenario.py +2 -1
- helm/benchmark/scenarios/vision_language/msr_vtt_scenario.py +75 -0
- helm/benchmark/server.py +2 -1
- helm/benchmark/static/schema_audio.yaml +60 -49
- helm/benchmark/static/schema_enterprise.yaml +21 -0
- helm/benchmark/static/schema_long_context.yaml +63 -20
- helm/benchmark/static/schema_medhelm.yaml +272 -213
- helm/benchmark/static/schema_melt.yaml +1257 -0
- helm/benchmark/static/schema_slphelm.yaml +162 -0
- helm/benchmark/static/schema_vhelm.yaml +26 -26
- helm/benchmark/static/schema_video.yaml +219 -0
- helm/benchmark/static_build/assets/index-94295e78.js +10 -0
- helm/benchmark/static_build/assets/index-b9779128.css +1 -0
- helm/benchmark/static_build/assets/medhelm-overview-eac29843.png +0 -0
- helm/benchmark/static_build/assets/{tremor-9cefc3c5.js → tremor-38a10867.js} +1 -1
- helm/benchmark/static_build/index.html +4 -4
- helm/benchmark/window_services/encoder_decoder_window_service.py +3 -3
- helm/benchmark/window_services/test_utils.py +3 -4
- helm/benchmark/window_services/tokenizer_service.py +7 -8
- helm/clients/anthropic_client.py +69 -29
- helm/clients/audio_language/diva_llama_client.py +4 -2
- helm/clients/audio_language/qwen2_5_omni_client.py +197 -0
- helm/clients/audio_language/qwen2_audiolm_client.py +8 -6
- helm/clients/audio_language/qwen_audiolm_client.py +4 -2
- helm/clients/audio_language/test.py +62 -0
- helm/clients/bedrock_client.py +3 -1
- helm/clients/client.py +7 -7
- helm/clients/grok_client.py +36 -0
- helm/clients/huggingface_client.py +42 -3
- helm/clients/huggingface_pipeline_client.py +138 -0
- helm/clients/image_generation/dalle_mini/model/configuration.py +1 -1
- helm/clients/image_generation/dalle_mini/model/modeling.py +1 -1
- helm/clients/image_generation/dalle_mini/model/processor.py +1 -1
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +1 -1
- helm/clients/openai_client.py +100 -54
- helm/clients/openai_responses_client.py +174 -0
- helm/clients/palmyra_client.py +2 -5
- helm/clients/reka_client.py +2 -2
- helm/clients/together_client.py +31 -4
- helm/clients/vertexai_client.py +6 -0
- helm/clients/vision_language/huggingface_vision2seq_client.py +6 -4
- helm/clients/vision_language/huggingface_vlm_client.py +2 -2
- helm/clients/vision_language/idefics_client.py +6 -2
- helm/clients/vision_language/paligemma_client.py +2 -2
- helm/clients/vision_language/qwen2_vlm_client.py +66 -53
- helm/clients/vision_language/qwen_vlm_client.py +7 -5
- helm/clients/writer_client.py +102 -0
- helm/common/context.py +80 -0
- helm/common/credentials_utils.py +5 -5
- helm/common/general.py +9 -2
- helm/common/hierarchical_logger.py +46 -3
- helm/common/local_context.py +140 -0
- helm/common/remote_context.py +61 -0
- helm/common/request.py +8 -0
- helm/config/model_deployments.yaml +864 -193
- helm/config/model_metadata.yaml +667 -53
- helm/config/tokenizer_configs.yaml +144 -3
- helm/proxy/cli.py +3 -1
- helm/proxy/critique/mechanical_turk_utils.py +1 -1
- helm/proxy/services/server_service.py +21 -85
- helm/tokenizers/grok_tokenizer.py +53 -0
- helm/tokenizers/huggingface_tokenizer.py +1 -1
- helm/tokenizers/test_grok_tokenizer.py +33 -0
- helm/benchmark/scenarios/test_infinite_bench_sum_scenario.py +0 -46
- helm/benchmark/static_build/assets/index-262903c1.js +0 -10
- helm/benchmark/static_build/assets/index-42060d71.css +0 -1
- {crfm_helm-0.5.5.dist-info → crfm_helm-0.5.6.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.5.dist-info → crfm_helm-0.5.6.dist-info}/licenses/LICENSE +0 -0
- {crfm_helm-0.5.5.dist-info → crfm_helm-0.5.6.dist-info}/top_level.txt +0 -0
- /helm/benchmark/static_build/assets/{medhelm-overview-3ddfcd65.png → medhelm-v1-overview-3ddfcd65.png} +0 -0
helm/config/model_metadata.yaml
CHANGED
|
@@ -276,6 +276,14 @@ models:
|
|
|
276
276
|
# Amazon Nova models
|
|
277
277
|
# References for Amazon Nova models:
|
|
278
278
|
# https://aws.amazon.com/ai/generative-ai/nova/
|
|
279
|
+
- name: amazon/nova-premier-v1:0
|
|
280
|
+
display_name: Amazon Nova Premier
|
|
281
|
+
description: Amazon Nova Premier is the most capable model in the Nova family of foundation models. ([blog](https://aws.amazon.com/blogs/aws/amazon-nova-premier-our-most-capable-model-for-complex-tasks-and-teacher-for-model-distillation/))
|
|
282
|
+
creator_organization_name: Amazon
|
|
283
|
+
access: limited
|
|
284
|
+
release_date: 2025-04-30
|
|
285
|
+
tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
286
|
+
|
|
279
287
|
- name: amazon/nova-pro-v1:0
|
|
280
288
|
display_name: Amazon Nova Pro
|
|
281
289
|
description: Amazon Nova Pro Model
|
|
@@ -507,6 +515,46 @@ models:
|
|
|
507
515
|
release_date: 2025-02-24
|
|
508
516
|
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
509
517
|
|
|
518
|
+
- name: anthropic/claude-3-7-sonnet-20250219-thinking-10k
|
|
519
|
+
display_name: Claude 3.7 Sonnet (20250219, extended thinking)
|
|
520
|
+
description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)). Extended thinking is enabled with 10k budget tokens.
|
|
521
|
+
creator_organization_name: Anthropic
|
|
522
|
+
access: limited
|
|
523
|
+
release_date: 2025-02-24
|
|
524
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
525
|
+
|
|
526
|
+
- name: anthropic/claude-sonnet-4-20250514
|
|
527
|
+
display_name: Claude 4 Sonnet (20250514)
|
|
528
|
+
description: Claude 4 Sonnet is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)).
|
|
529
|
+
creator_organization_name: Anthropic
|
|
530
|
+
access: limited
|
|
531
|
+
release_date: 2025-05-14
|
|
532
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
533
|
+
|
|
534
|
+
- name: anthropic/claude-sonnet-4-20250514-thinking-10k
|
|
535
|
+
display_name: Claude 4 Sonnet (20250514, extended thinking)
|
|
536
|
+
description: Claude 4 Sonnet is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)). Extended thinking is enabled with 10k budget tokens.
|
|
537
|
+
creator_organization_name: Anthropic
|
|
538
|
+
access: limited
|
|
539
|
+
release_date: 2025-05-14
|
|
540
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
541
|
+
|
|
542
|
+
- name: anthropic/claude-opus-4-20250514
|
|
543
|
+
display_name: Claude 4 Opus (20250514)
|
|
544
|
+
description: Claude 4 Opus is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)).
|
|
545
|
+
creator_organization_name: Anthropic
|
|
546
|
+
access: limited
|
|
547
|
+
release_date: 2025-05-14
|
|
548
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
549
|
+
|
|
550
|
+
- name: anthropic/claude-opus-4-20250514-thinking-10k
|
|
551
|
+
display_name: Claude 4 Opus (20250514, extended thinking)
|
|
552
|
+
description: Claude 4 Opus is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)). Extended thinking is enabled with 10k budget tokens.
|
|
553
|
+
creator_organization_name: Anthropic
|
|
554
|
+
access: limited
|
|
555
|
+
release_date: 2025-05-14
|
|
556
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
557
|
+
|
|
510
558
|
- name: anthropic/stanford-online-all-v4-s3
|
|
511
559
|
display_name: Anthropic-LM v4-s3 (52B)
|
|
512
560
|
description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
|
|
@@ -868,7 +916,7 @@ models:
|
|
|
868
916
|
# NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
|
|
869
917
|
num_parameters: 685000000000
|
|
870
918
|
release_date: 2025-01-20
|
|
871
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
919
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
872
920
|
|
|
873
921
|
- name: deepseek-ai/deepseek-r1-hide-reasoning
|
|
874
922
|
display_name: DeepSeek R1 (hide reasoning)
|
|
@@ -878,8 +926,35 @@ models:
|
|
|
878
926
|
# NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
|
|
879
927
|
num_parameters: 685000000000
|
|
880
928
|
release_date: 2025-01-20
|
|
881
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
929
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
882
930
|
|
|
931
|
+
- name: deepseek-ai/deepseek-r1-0528
|
|
932
|
+
display_name: DeepSeek-R1-0528
|
|
933
|
+
description: DeepSeek-R1-0528 is a minor version upgrade from DeepSeek R1 that has improved its depth of reasoning and inference capabilities by leveraging increased computational resources and introducing algorithmic optimization mechanisms during post-training. ([paper](https://arxiv.org/abs/2501.12948))
|
|
934
|
+
creator_organization_name: DeepSeek
|
|
935
|
+
access: open
|
|
936
|
+
num_parameters: 685000000000
|
|
937
|
+
release_date: 2025-05-28
|
|
938
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
939
|
+
|
|
940
|
+
- name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
|
|
941
|
+
display_name: DeepSeek-R1-Distill-Llama-8b
|
|
942
|
+
description: DeepSeek-R1-Distill-Llama-8b is a model that is distilled from LLaMA 8B model for the DeepSeek-R1 task.
|
|
943
|
+
creator_organization_name: DeepSeek
|
|
944
|
+
access: open
|
|
945
|
+
num_parameters: 8000000000
|
|
946
|
+
release_date: 2025-01-20
|
|
947
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
948
|
+
|
|
949
|
+
- name: deepseek-ai/deepseek-coder-6.7b-instruct
|
|
950
|
+
display_name: DeepSeek-Coder-6.7b-Instruct
|
|
951
|
+
description: DeepSeek-Coder-6.7b-Instruct is a model that is fine-tuned from the LLaMA 6.7B model for the DeepSeek-Coder task.
|
|
952
|
+
creator_organization_name: DeepSeek
|
|
953
|
+
access: open
|
|
954
|
+
num_parameters: 6740000000
|
|
955
|
+
release_date: 2025-01-20
|
|
956
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
957
|
+
|
|
883
958
|
# EleutherAI
|
|
884
959
|
- name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together.
|
|
885
960
|
display_name: GPT-J (6B)
|
|
@@ -1146,6 +1221,14 @@ models:
|
|
|
1146
1221
|
release_date: 2025-02-05
|
|
1147
1222
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1148
1223
|
|
|
1224
|
+
- name: google/gemini-2.0-flash-lite-001
|
|
1225
|
+
display_name: Gemini 2.0 Flash Lite
|
|
1226
|
+
description: Gemini 2.0 Flash Lite ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1227
|
+
creator_organization_name: Google
|
|
1228
|
+
access: limited
|
|
1229
|
+
release_date: 2025-03-25
|
|
1230
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1231
|
+
|
|
1149
1232
|
- name: google/gemini-2.0-flash-thinking-exp-01-21
|
|
1150
1233
|
display_name: Gemini 2.0 Flash Thinking (01-21 preview)
|
|
1151
1234
|
description: Gemini 2.0 Flash Thinking (01-21 preview) ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/thinking))
|
|
@@ -1162,6 +1245,70 @@ models:
|
|
|
1162
1245
|
release_date: 2025-02-05
|
|
1163
1246
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1164
1247
|
|
|
1248
|
+
- name: google/gemini-2.5-flash-lite-preview-06-17
|
|
1249
|
+
display_name: Gemini 2.5 Flash-Lite (06-17 preview)
|
|
1250
|
+
description: Gemini 2.5 Flash-Lite (06-17 preview) ([blog](https://blog.google/products/gemini/gemini-2-5-model-family-expands/))
|
|
1251
|
+
creator_organization_name: Google
|
|
1252
|
+
access: limited
|
|
1253
|
+
release_date: 2025-06-17
|
|
1254
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1255
|
+
|
|
1256
|
+
- name: google/gemini-2.5-flash-preview-04-17
|
|
1257
|
+
display_name: Gemini 2.5 Flash (04-17 preview)
|
|
1258
|
+
description: Gemini 2.5 Flash (04-17 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1259
|
+
creator_organization_name: Google
|
|
1260
|
+
access: limited
|
|
1261
|
+
release_date: 2025-04-17
|
|
1262
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1263
|
+
|
|
1264
|
+
- name: google/gemini-2.5-flash-preview-05-20
|
|
1265
|
+
display_name: Gemini 2.5 Flash (05-20 preview)
|
|
1266
|
+
description: Gemini 2.5 Flash (05-20 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1267
|
+
creator_organization_name: Google
|
|
1268
|
+
access: limited
|
|
1269
|
+
release_date: 2025-04-17
|
|
1270
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1271
|
+
|
|
1272
|
+
- name: google/gemini-2.5-flash
|
|
1273
|
+
display_name: Gemini 2.5 Flash
|
|
1274
|
+
description: Gemini 2.5 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1275
|
+
creator_organization_name: Google
|
|
1276
|
+
access: limited
|
|
1277
|
+
release_date: 2025-06-17
|
|
1278
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1279
|
+
|
|
1280
|
+
- name: google/gemini-2.5-pro-exp-03-25
|
|
1281
|
+
display_name: Gemini 2.5 Pro (03-25 experimental)
|
|
1282
|
+
description: Gemini 2.5 Pro (03-25 experimental) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1283
|
+
creator_organization_name: Google
|
|
1284
|
+
access: limited
|
|
1285
|
+
release_date: 2025-03-25
|
|
1286
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1287
|
+
|
|
1288
|
+
- name: google/gemini-2.5-pro-preview-03-25
|
|
1289
|
+
display_name: Gemini 2.5 Pro (03-25 preview)
|
|
1290
|
+
description: Gemini 2.5 Pro (03-25 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1291
|
+
creator_organization_name: Google
|
|
1292
|
+
access: limited
|
|
1293
|
+
release_date: 2025-04-09 # source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
|
|
1294
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1295
|
+
|
|
1296
|
+
- name: google/gemini-2.5-pro-preview-05-06
|
|
1297
|
+
display_name: Gemini 2.5 Pro (05-06 preview)
|
|
1298
|
+
description: Gemini 2.5 Pro (05-06 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1299
|
+
creator_organization_name: Google
|
|
1300
|
+
access: limited
|
|
1301
|
+
release_date: 2025-05-06 # source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
|
|
1302
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1303
|
+
|
|
1304
|
+
- name: google/gemini-2.5-pro
|
|
1305
|
+
display_name: Gemini 2.5 Pro
|
|
1306
|
+
description: Gemini 2.5 Pro ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1307
|
+
creator_organization_name: Google
|
|
1308
|
+
access: limited
|
|
1309
|
+
release_date: 2025-06-17
|
|
1310
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1311
|
+
|
|
1165
1312
|
- name: google/gemma-2b
|
|
1166
1313
|
display_name: Gemma (2B)
|
|
1167
1314
|
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
|
|
@@ -1360,6 +1507,60 @@ models:
|
|
|
1360
1507
|
release_date: 2023-08-22
|
|
1361
1508
|
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, IDEFICS_INSTRUCT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
1362
1509
|
|
|
1510
|
+
- name: huggingface/smollm2-135m
|
|
1511
|
+
display_name: SmolLM2 (135M)
|
|
1512
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1513
|
+
creator_organization_name: HuggingFace
|
|
1514
|
+
access: open
|
|
1515
|
+
num_parameters: 135000000
|
|
1516
|
+
release_date: 2024-10-31
|
|
1517
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1518
|
+
|
|
1519
|
+
- name: huggingface/smollm2-360m
|
|
1520
|
+
display_name: SmolLM2 (360M)
|
|
1521
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1522
|
+
creator_organization_name: HuggingFace
|
|
1523
|
+
access: open
|
|
1524
|
+
num_parameters: 362000000
|
|
1525
|
+
release_date: 2024-10-31
|
|
1526
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1527
|
+
|
|
1528
|
+
- name: huggingface/smollm2-1.7b
|
|
1529
|
+
display_name: SmolLM2 (1.7B)
|
|
1530
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1531
|
+
creator_organization_name: HuggingFace
|
|
1532
|
+
access: open
|
|
1533
|
+
num_parameters: 1710000000
|
|
1534
|
+
release_date: 2024-10-31
|
|
1535
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1536
|
+
|
|
1537
|
+
- name: huggingface/smollm2-135m-instruct
|
|
1538
|
+
display_name: SmolLM2 Instruct (135M)
|
|
1539
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1540
|
+
creator_organization_name: HuggingFace
|
|
1541
|
+
access: open
|
|
1542
|
+
num_parameters: 135000000
|
|
1543
|
+
release_date: 2024-10-31
|
|
1544
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1545
|
+
|
|
1546
|
+
- name: huggingface/smollm2-360m-instruct
|
|
1547
|
+
display_name: SmolLM2 Instruct (360M)
|
|
1548
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1549
|
+
creator_organization_name: HuggingFace
|
|
1550
|
+
access: open
|
|
1551
|
+
num_parameters: 362000000
|
|
1552
|
+
release_date: 2024-10-31
|
|
1553
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1554
|
+
|
|
1555
|
+
- name: huggingface/smollm2-1.7b-instruct
|
|
1556
|
+
display_name: SmolLM2 Instruct (1.7B)
|
|
1557
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1558
|
+
creator_organization_name: HuggingFace
|
|
1559
|
+
access: open
|
|
1560
|
+
num_parameters: 1710000000
|
|
1561
|
+
release_date: 2024-10-31
|
|
1562
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1563
|
+
|
|
1363
1564
|
## Text-to-Image Diffusion Models
|
|
1364
1565
|
- name: huggingface/dreamlike-diffusion-v1-0
|
|
1365
1566
|
display_name: Dreamlike Diffusion v1.0 (1B)
|
|
@@ -1573,6 +1774,16 @@ models:
|
|
|
1573
1774
|
release_date: 2023-06-22
|
|
1574
1775
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1575
1776
|
|
|
1777
|
+
# Marin Community
|
|
1778
|
+
- name: marin-community/marin-8b-instruct
|
|
1779
|
+
display_name: Marin 8B Instruct
|
|
1780
|
+
description: Marin 8B Instruct is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT.
|
|
1781
|
+
creator_organization_name: Marin Community
|
|
1782
|
+
access: open
|
|
1783
|
+
num_parameters: 8030000000
|
|
1784
|
+
release_date: 2025-05-15
|
|
1785
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1786
|
+
|
|
1576
1787
|
# Meta
|
|
1577
1788
|
- name: meta/opt-iml-175b # NOT SUPPORTED
|
|
1578
1789
|
display_name: OPT-IML (175B)
|
|
@@ -1875,6 +2086,24 @@ models:
|
|
|
1875
2086
|
release_date: 2024-12-06
|
|
1876
2087
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1877
2088
|
|
|
2089
|
+
- name: meta/llama-4-scout-17b-16e-instruct
|
|
2090
|
+
display_name: Llama 4 Scout (17Bx16E) Instruct
|
|
2091
|
+
description: Llama 4 Scout (17Bx16E) Instruct is part of the Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences using a mixture-of-experts architecture. ([blog](https://ai.meta.com/blog/llama-4-multimodal-intelligence/))
|
|
2092
|
+
creator_organization_name: Meta
|
|
2093
|
+
access: open
|
|
2094
|
+
num_parameters: 109000000000
|
|
2095
|
+
release_date: 2025-04-05
|
|
2096
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2097
|
+
|
|
2098
|
+
- name: meta/llama-4-maverick-17b-128e-instruct-fp8
|
|
2099
|
+
display_name: Llama 4 Maverick (17Bx128E) Instruct FP8
|
|
2100
|
+
description: Llama 4 Maverick (17Bx128E) Instruct FP8 is part of the Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences using a mixture-of-experts architecture. ([blog](https://ai.meta.com/blog/llama-4-multimodal-intelligence/))
|
|
2101
|
+
creator_organization_name: Meta
|
|
2102
|
+
access: open
|
|
2103
|
+
num_parameters: 402000000000
|
|
2104
|
+
release_date: 2025-04-05
|
|
2105
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2106
|
+
|
|
1878
2107
|
- name: meta/llama-3-8b-chat
|
|
1879
2108
|
display_name: Llama 3 Instruct (8B)
|
|
1880
2109
|
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
|
|
@@ -2150,6 +2379,42 @@ models:
|
|
|
2150
2379
|
release_date: 2024-04-17
|
|
2151
2380
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2152
2381
|
|
|
2382
|
+
- name: allenai/olmo-2-1124-7b-instruct
|
|
2383
|
+
display_name: OLMo 2 7B Instruct November 2024
|
|
2384
|
+
description: OLMo 2 is a family of 7B and 13B models trained on up to 5T tokens. ([blog](https://allenai.org/blog/olmo2))
|
|
2385
|
+
creator_organization_name: Allen Institute for AI
|
|
2386
|
+
access: open
|
|
2387
|
+
num_parameters: 7300000000
|
|
2388
|
+
release_date: 2024-11-26
|
|
2389
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2390
|
+
|
|
2391
|
+
- name: allenai/olmo-2-1124-13b-instruct
|
|
2392
|
+
display_name: OLMo 2 13B Instruct November 2024
|
|
2393
|
+
description: OLMo 2 is a family of 7B and 13B models trained on up to 5T tokens. ([blog](https://allenai.org/blog/olmo2))
|
|
2394
|
+
creator_organization_name: Allen Institute for AI
|
|
2395
|
+
access: open
|
|
2396
|
+
num_parameters: 13700000000
|
|
2397
|
+
release_date: 2024-11-26
|
|
2398
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2399
|
+
|
|
2400
|
+
- name: allenai/olmo-2-0325-32b-instruct
|
|
2401
|
+
display_name: OLMo 2 32B Instruct March 2025
|
|
2402
|
+
description: OLMo 2 32B Instruct March 2025 is trained up to 6T tokens and post-trained using Tulu 3.1. ([blog](https://allenai.org/blog/olmo2-32B))
|
|
2403
|
+
creator_organization_name: Allen Institute for AI
|
|
2404
|
+
access: open
|
|
2405
|
+
num_parameters: 32200000000
|
|
2406
|
+
release_date: 2025-03-13
|
|
2407
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2408
|
+
|
|
2409
|
+
- name: allenai/olmoe-1b-7b-0125-instruct
|
|
2410
|
+
display_name: OLMoE 1B-7B Instruct January 2025
|
|
2411
|
+
description: OLMoE 1B-7B Instruct January 2025 is a fully open language model leveraging sparse Mixture-of-Experts (MoE). It has 7B parameters but uses only 1B per input token. It was pretrained on 5T tokens. ([blog](https://allenai.org/blog/olmoe-an-open-small-and-state-of-the-art-mixture-of-experts-model-c258432d0514), [paper](https://arxiv.org/abs/2409.02060))
|
|
2412
|
+
creator_organization_name: Allen Institute for AI
|
|
2413
|
+
access: open
|
|
2414
|
+
num_parameters: 32200000000
|
|
2415
|
+
release_date: 2025-03-13
|
|
2416
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2417
|
+
|
|
2153
2418
|
# Mistral AI
|
|
2154
2419
|
- name: mistralai/mistral-7b-v0.1
|
|
2155
2420
|
display_name: Mistral v0.1 (7B)
|
|
@@ -2300,6 +2565,14 @@ models:
|
|
|
2300
2565
|
release_date: 2023-12-11
|
|
2301
2566
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2302
2567
|
|
|
2568
|
+
- name: mistralai/mistral-medium-2505
|
|
2569
|
+
display_name: Mistral Medium 3 (2505)
|
|
2570
|
+
description: Mistral Medium 3 (2505) is a language model that is intended to to deliver state-of-the-art performance at lower cost. ([blog](https://mistral.ai/news/mistral-medium-3))
|
|
2571
|
+
creator_organization_name: Mistral AI
|
|
2572
|
+
access: limited
|
|
2573
|
+
release_date: 2025-05-07
|
|
2574
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2575
|
+
|
|
2303
2576
|
- name: mistralai/mistral-large-2402
|
|
2304
2577
|
display_name: Mistral Large (2402)
|
|
2305
2578
|
description: Mistral Large is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
|
|
@@ -2746,6 +3019,30 @@ models:
|
|
|
2746
3019
|
release_date: 2024-07-18
|
|
2747
3020
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2748
3021
|
|
|
3022
|
+
- name: openai/gpt-4.1-2025-04-14
|
|
3023
|
+
display_name: GPT-4.1 (2025-04-14)
|
|
3024
|
+
description: GPT-4.1 (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
|
|
3025
|
+
creator_organization_name: OpenAI
|
|
3026
|
+
access: limited
|
|
3027
|
+
release_date: 2025-04-14
|
|
3028
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3029
|
+
|
|
3030
|
+
- name: openai/gpt-4.1-mini-2025-04-14
|
|
3031
|
+
display_name: GPT-4.1 mini (2025-04-14)
|
|
3032
|
+
description: GPT-4.1 mini (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
|
|
3033
|
+
creator_organization_name: OpenAI
|
|
3034
|
+
access: limited
|
|
3035
|
+
release_date: 2025-04-14
|
|
3036
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3037
|
+
|
|
3038
|
+
- name: openai/gpt-4.1-nano-2025-04-14
|
|
3039
|
+
display_name: GPT-4.1 nano (2025-04-14)
|
|
3040
|
+
description: GPT-4.1 nano (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
|
|
3041
|
+
creator_organization_name: OpenAI
|
|
3042
|
+
access: limited
|
|
3043
|
+
release_date: 2025-04-14
|
|
3044
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3045
|
+
|
|
2749
3046
|
- name: openai/whisper-1_gpt-4o-2024-11-20
|
|
2750
3047
|
display_name: Whisper-1 + GPT-4o (2024-11-20)
|
|
2751
3048
|
description: Transcribes the text with Whisper-1 and then uses GPT-4o to generate a response.
|
|
@@ -2754,6 +3051,22 @@ models:
|
|
|
2754
3051
|
release_date: 2024-11-20
|
|
2755
3052
|
tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
|
|
2756
3053
|
|
|
3054
|
+
- name: openai/gpt-4o-transcribe_gpt-4o-2024-11-20
|
|
3055
|
+
display_name: GPT-4o Transcribe + GPT-4o (2024-11-20)
|
|
3056
|
+
description: Transcribes the text with GPT-4o Transcribe and then uses GPT-4o to generate a response.
|
|
3057
|
+
creator_organization_name: OpenAI
|
|
3058
|
+
access: limited
|
|
3059
|
+
release_date: 2025-03-20
|
|
3060
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
|
|
3061
|
+
|
|
3062
|
+
- name: openai/gpt-4o-mini-transcribe_gpt-4o-2024-11-20
|
|
3063
|
+
display_name: GPT-4o mini Transcribe + GPT-4o (2024-11-20)
|
|
3064
|
+
description: Transcribes the text with GPT-4o mini Transcribe and then uses GPT-4o to generate a response.
|
|
3065
|
+
creator_organization_name: OpenAI
|
|
3066
|
+
access: limited
|
|
3067
|
+
release_date: 2025-03-20
|
|
3068
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
|
|
3069
|
+
|
|
2757
3070
|
- name: openai/gpt-4o-audio-preview-2024-10-01
|
|
2758
3071
|
display_name: GPT-4o Audio (Preview 2024-10-01)
|
|
2759
3072
|
description: GPT-4o Audio (Preview 2024-10-01) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
|
|
@@ -2807,6 +3120,30 @@ models:
|
|
|
2807
3120
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2808
3121
|
|
|
2809
3122
|
## o1 Models
|
|
3123
|
+
- name: openai/o1-pro-2025-03-19
|
|
3124
|
+
display_name: o1 pro (2025-03-19)
|
|
3125
|
+
description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
|
|
3126
|
+
creator_organization_name: OpenAI
|
|
3127
|
+
access: limited
|
|
3128
|
+
release_date: 2025-03-19
|
|
3129
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3130
|
+
|
|
3131
|
+
- name: openai/o1-pro-2025-03-19-low-reasoning-effort
|
|
3132
|
+
display_name: o1 pro (2025-03-19, low reasoning effort)
|
|
3133
|
+
description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to low.
|
|
3134
|
+
creator_organization_name: OpenAI
|
|
3135
|
+
access: limited
|
|
3136
|
+
release_date: 2025-03-19
|
|
3137
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3138
|
+
|
|
3139
|
+
- name: openai/o1-pro-2025-03-19-high-reasoning-effort
|
|
3140
|
+
display_name: o1 pro (2025-03-19, high reasoning effort)
|
|
3141
|
+
description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to high.
|
|
3142
|
+
creator_organization_name: OpenAI
|
|
3143
|
+
access: limited
|
|
3144
|
+
release_date: 2025-03-19
|
|
3145
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3146
|
+
|
|
2810
3147
|
- name: openai/o1-2024-12-17
|
|
2811
3148
|
display_name: o1 (2024-12-17)
|
|
2812
3149
|
description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
|
|
@@ -2871,6 +3208,54 @@ models:
|
|
|
2871
3208
|
release_date: 2025-01-31
|
|
2872
3209
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2873
3210
|
|
|
3211
|
+
- name: openai/o3-2025-04-16
|
|
3212
|
+
display_name: o3 (2025-04-16)
|
|
3213
|
+
description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3214
|
+
creator_organization_name: OpenAI
|
|
3215
|
+
access: limited
|
|
3216
|
+
release_date: 2025-04-16
|
|
3217
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3218
|
+
|
|
3219
|
+
- name: openai/o3-2025-04-16-low-reasoning-effort
|
|
3220
|
+
display_name: o3 (2025-04-16, low reasoning effort)
|
|
3221
|
+
description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3222
|
+
creator_organization_name: OpenAI
|
|
3223
|
+
access: limited
|
|
3224
|
+
release_date: 2025-04-16
|
|
3225
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3226
|
+
|
|
3227
|
+
- name: openai/o3-2025-04-16-high-reasoning-effort
|
|
3228
|
+
display_name: o3 (2025-04-16, high reasoning effort)
|
|
3229
|
+
description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3230
|
+
creator_organization_name: OpenAI
|
|
3231
|
+
access: limited
|
|
3232
|
+
release_date: 2025-04-16
|
|
3233
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3234
|
+
|
|
3235
|
+
- name: openai/o4-mini-2025-04-16
|
|
3236
|
+
display_name: o4-mini (2025-04-16)
|
|
3237
|
+
description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3238
|
+
creator_organization_name: OpenAI
|
|
3239
|
+
access: limited
|
|
3240
|
+
release_date: 2025-04-16
|
|
3241
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3242
|
+
|
|
3243
|
+
- name: openai/o4-mini-2025-04-16-low-reasoning-effort
|
|
3244
|
+
display_name: o4-mini (2025-04-16, low reasoning effort)
|
|
3245
|
+
description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3246
|
+
creator_organization_name: OpenAI
|
|
3247
|
+
access: limited
|
|
3248
|
+
release_date: 2025-04-16
|
|
3249
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3250
|
+
|
|
3251
|
+
- name: openai/o4-mini-2025-04-16-high-reasoning-effort
|
|
3252
|
+
display_name: o4-mini (2025-04-16, high reasoning effort)
|
|
3253
|
+
description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3254
|
+
creator_organization_name: OpenAI
|
|
3255
|
+
access: limited
|
|
3256
|
+
release_date: 2025-04-16
|
|
3257
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3258
|
+
|
|
2874
3259
|
## Codex Models
|
|
2875
3260
|
# DEPRECATED: Codex models have been shut down on March 23 2023.
|
|
2876
3261
|
|
|
@@ -3139,6 +3524,14 @@ models:
|
|
|
3139
3524
|
release_date: 2024-09-19
|
|
3140
3525
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3141
3526
|
|
|
3527
|
+
- name: qwen/qwen3-235b-a22b-fp8-tput
|
|
3528
|
+
display_name: Qwen3 235B A22B FP8 Throughput
|
|
3529
|
+
description: Qwen3 235B A22B FP8 Throughput is a hybrid instruct and reasoning mixture-of-experts model ([blog](https://qwenlm.github.io/blog/qwen3/)).
|
|
3530
|
+
creator_organization_name: Qwen
|
|
3531
|
+
access: open
|
|
3532
|
+
release_date: 2025-04-29
|
|
3533
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3534
|
+
|
|
3142
3535
|
- name: qwen/qwq-32b-preview
|
|
3143
3536
|
display_name: QwQ (32B Preview)
|
|
3144
3537
|
description: QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities. ([blog post](https://qwenlm.github.io/blog/qwq-32b-preview/)).
|
|
@@ -3180,6 +3573,38 @@ models:
|
|
|
3180
3573
|
release_date: 2024-08-29
|
|
3181
3574
|
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3182
3575
|
|
|
3576
|
+
- name: qwen/qwen2.5-vl-3b-instruct
|
|
3577
|
+
display_name: Qwen2.5-VL Instruct (3B)
|
|
3578
|
+
description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
|
|
3579
|
+
creator_organization_name: Alibaba Group
|
|
3580
|
+
access: open
|
|
3581
|
+
release_date: 2025-01-26
|
|
3582
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3583
|
+
|
|
3584
|
+
- name: qwen/qwen2.5-vl-7b-instruct
|
|
3585
|
+
display_name: Qwen2.5-VL Instruct (7B)
|
|
3586
|
+
description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
|
|
3587
|
+
creator_organization_name: Alibaba Group
|
|
3588
|
+
access: open
|
|
3589
|
+
release_date: 2025-01-26
|
|
3590
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3591
|
+
|
|
3592
|
+
- name: qwen/qwen2.5-vl-32b-instruct
|
|
3593
|
+
display_name: Qwen2.5-VL Instruct (32B)
|
|
3594
|
+
description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
|
|
3595
|
+
creator_organization_name: Alibaba Group
|
|
3596
|
+
access: open
|
|
3597
|
+
release_date: 2025-01-26
|
|
3598
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3599
|
+
|
|
3600
|
+
- name: qwen/qwen2.5-vl-72b-instruct
|
|
3601
|
+
display_name: Qwen2.5-VL Instruct (72B)
|
|
3602
|
+
description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
|
|
3603
|
+
creator_organization_name: Alibaba Group
|
|
3604
|
+
access: open
|
|
3605
|
+
release_date: 2025-01-26
|
|
3606
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3607
|
+
|
|
3183
3608
|
- name: qwen/qwen-audio-chat
|
|
3184
3609
|
display_name: Qwen-Audio Chat
|
|
3185
3610
|
description: Auditory multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2311.07919)).
|
|
@@ -3196,6 +3621,14 @@ models:
|
|
|
3196
3621
|
release_date: 2024-07-15
|
|
3197
3622
|
tags: [AUDIO_LANGUAGE_MODEL_TAG]
|
|
3198
3623
|
|
|
3624
|
+
- name: qwen/qwen2.5-omni-7b
|
|
3625
|
+
display_name: Qwen2.5-Omni (7B)
|
|
3626
|
+
description: The new flagship end-to-end multimodal model in the Qwen series that can process inputs including text, images, audio, and video ([paper](https://arxiv.org/abs/2503.20215)).
|
|
3627
|
+
creator_organization_name: Alibaba Cloud
|
|
3628
|
+
access: open
|
|
3629
|
+
release_date: 2025-03-27
|
|
3630
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3631
|
+
|
|
3199
3632
|
# SAIL (Sea AI Lab)
|
|
3200
3633
|
- name: sail/sailor-7b
|
|
3201
3634
|
display_name: Sailor (7B)
|
|
@@ -3670,6 +4103,14 @@ models:
|
|
|
3670
4103
|
release_date: 2024-09-12
|
|
3671
4104
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3672
4105
|
|
|
4106
|
+
- name: writer/palmyra-x5
|
|
4107
|
+
display_name: Palmyra X5
|
|
4108
|
+
description: Palmyra X5 is a language model for enterprise that uses a Mixture of Experts (MoE) architecture and a hybrid attention mechanism that blends linear and softmax attention. ([blog](https://writer.com/engineering/long-context-palmyra-x5/))
|
|
4109
|
+
creator_organization_name: Writer
|
|
4110
|
+
access: limited
|
|
4111
|
+
release_date: 2024-04-28
|
|
4112
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4113
|
+
|
|
3673
4114
|
- name: writer/palmyra-med-32k
|
|
3674
4115
|
display_name: Palmyra-Med 32K (70B)
|
|
3675
4116
|
description: Palmyra-Med 32K (70B) is a model finetuned from Palmyra-X-003 intended for medical applications.
|
|
@@ -3680,11 +4121,10 @@ models:
|
|
|
3680
4121
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3681
4122
|
|
|
3682
4123
|
- name: writer/palmyra-med
|
|
3683
|
-
display_name: Palmyra
|
|
3684
|
-
description: Palmyra
|
|
4124
|
+
display_name: Palmyra Med
|
|
4125
|
+
description: Palmyra Med is a model intended for medical applications.
|
|
3685
4126
|
creator_organization_name: Writer
|
|
3686
4127
|
access: open
|
|
3687
|
-
num_parameters: 70600000000
|
|
3688
4128
|
release_date: 2024-07-31
|
|
3689
4129
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3690
4130
|
|
|
@@ -3707,12 +4147,20 @@ models:
|
|
|
3707
4147
|
|
|
3708
4148
|
# xAI
|
|
3709
4149
|
|
|
3710
|
-
- name: xai/grok-beta
|
|
3711
|
-
display_name: Grok Beta
|
|
3712
|
-
description: Grok Beta is a model
|
|
4150
|
+
- name: xai/grok-3-beta
|
|
4151
|
+
display_name: Grok 3 Beta
|
|
4152
|
+
description: Grok 3 Beta is a model trained on xAI's Colossus supercluster with significant improvements in reasoning, mathematics, coding, world knowledge, and instruction-following tasks. ([blog](https://x.ai/news/grok-3))
|
|
3713
4153
|
creator_organization_name: xAI
|
|
3714
|
-
access:
|
|
3715
|
-
release_date:
|
|
4154
|
+
access: limited
|
|
4155
|
+
release_date: 2025-04-03 # https://docs.x.ai/docs/release-notes#april-2025
|
|
4156
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4157
|
+
|
|
4158
|
+
- name: xai/grok-3-mini-beta
|
|
4159
|
+
display_name: Grok 3 mini Beta
|
|
4160
|
+
description: Grok 3 mini Beta is a model trained on xAI's Colossus supercluster with significant improvements in reasoning, mathematics, coding, world knowledge, and instruction-following tasks. ([blog](https://x.ai/news/grok-3))
|
|
4161
|
+
creator_organization_name: xAI
|
|
4162
|
+
access: limited
|
|
4163
|
+
release_date: 2025-04-03 # https://docs.x.ai/docs/release-notes#april-2025
|
|
3716
4164
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3717
4165
|
|
|
3718
4166
|
# Yandex
|
|
@@ -3807,6 +4255,17 @@ models:
|
|
|
3807
4255
|
release_date: 2024-09-10
|
|
3808
4256
|
tags: [AUDIO_LANGUAGE_MODEL_TAG]
|
|
3809
4257
|
|
|
4258
|
+
|
|
4259
|
+
# Maritaca AI
|
|
4260
|
+
- name: maritaca-ai/sabia-7b
|
|
4261
|
+
display_name: Sabia 7B
|
|
4262
|
+
description: Sabia 7B
|
|
4263
|
+
creator_organization_name: MARITACA-AI
|
|
4264
|
+
access: open
|
|
4265
|
+
num_parameters: 6740000000
|
|
4266
|
+
release_date: 2023-11-08
|
|
4267
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4268
|
+
|
|
3810
4269
|
# Granite - IBM
|
|
3811
4270
|
# https://www.ibm.com/granite
|
|
3812
4271
|
# https://github.com/ibm-granite/granite-3.0-language-models
|
|
@@ -3882,17 +4341,7 @@ models:
|
|
|
3882
4341
|
num_parameters: 1380000000
|
|
3883
4342
|
release: 2024-10-21
|
|
3884
4343
|
tags: [TEXT_MODEL_TAG]
|
|
3885
|
-
|
|
3886
|
-
- name: maritaca-ai/sabia-7b
|
|
3887
|
-
display_name: Sabia 7B
|
|
3888
|
-
description: Sabia 7B
|
|
3889
|
-
creator_organization_name: MARITACA-AI
|
|
3890
|
-
access: open
|
|
3891
|
-
num_parameters: 6740000000
|
|
3892
|
-
release_date: 2023-11-08
|
|
3893
|
-
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3894
4344
|
|
|
3895
|
-
# Granite-3.1-8b-base
|
|
3896
4345
|
- name: ibm-granite/granite-3.1-8b-base
|
|
3897
4346
|
display_name: Granite 3.1 - 8B - Base
|
|
3898
4347
|
description: Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
|
|
@@ -3902,7 +4351,6 @@ models:
|
|
|
3902
4351
|
release_date: 2024-12-18
|
|
3903
4352
|
tags: [TEXT_MODEL_TAG]
|
|
3904
4353
|
|
|
3905
|
-
# Granite-3.1-8b-instruct
|
|
3906
4354
|
- name: ibm-granite/granite-3.1-8b-instruct
|
|
3907
4355
|
display_name: Granite 3.1 - 8B - Instruct
|
|
3908
4356
|
description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
@@ -3912,7 +4360,6 @@ models:
|
|
|
3912
4360
|
release_date: 2024-12-18
|
|
3913
4361
|
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3914
4362
|
|
|
3915
|
-
# Granite-3.1-2b-instruct
|
|
3916
4363
|
- name: ibm-granite/granite-3.1-2b-instruct
|
|
3917
4364
|
display_name: Granite 3.1 - 2B - Instruct
|
|
3918
4365
|
description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
@@ -3922,7 +4369,6 @@ models:
|
|
|
3922
4369
|
release_date: 2024-12-18
|
|
3923
4370
|
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3924
4371
|
|
|
3925
|
-
# Granite-3.1-2b-base
|
|
3926
4372
|
- name: ibm-granite/granite-3.1-2b-base
|
|
3927
4373
|
display_name: Granite 3.1 - 2B - Base
|
|
3928
4374
|
description: Granite-3.1-2B-Base extends the context length of Granite-3.0-2B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
|
|
@@ -3932,7 +4378,6 @@ models:
|
|
|
3932
4378
|
release_date: 2024-12-18
|
|
3933
4379
|
tags: [TEXT_MODEL_TAG]
|
|
3934
4380
|
|
|
3935
|
-
# Granite-3.1-3b-a800m-instruct
|
|
3936
4381
|
- name: ibm-granite/granite-3.1-3b-a800m-instruct
|
|
3937
4382
|
display_name: Granite 3.1 - 3B - A800M - Instruct
|
|
3938
4383
|
description: Granite-3.1-3B-A800M-Instruct is a 3B parameter long-context instruct model finetuned from Granite-3.1-3B-A800M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
@@ -3942,7 +4387,6 @@ models:
|
|
|
3942
4387
|
release_date: 2024-12-18
|
|
3943
4388
|
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3944
4389
|
|
|
3945
|
-
# Granite-3.1-3b-a800m-base
|
|
3946
4390
|
- name: ibm-granite/granite-3.1-3b-a800m-base
|
|
3947
4391
|
display_name: Granite 3.1 - 3B - A800M - Base
|
|
3948
4392
|
description: Granite-3.1-3B-A800M-Base extends the context length of Granite-3.0-3B-A800M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
|
|
@@ -3952,7 +4396,6 @@ models:
|
|
|
3952
4396
|
release_date: 2024-12-18
|
|
3953
4397
|
tags: [TEXT_MODEL_TAG]
|
|
3954
4398
|
|
|
3955
|
-
# Granite-3.1-1b-a400m-instruct
|
|
3956
4399
|
- name: ibm-granite/granite-3.1-1b-a400m-instruct
|
|
3957
4400
|
display_name: Granite 3.1 - 1B - A400M - Instruct
|
|
3958
4401
|
description: Granite-3.1-1B-A400M-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
@@ -3962,7 +4405,6 @@ models:
|
|
|
3962
4405
|
release_date: 2024-12-18
|
|
3963
4406
|
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3964
4407
|
|
|
3965
|
-
# Granite-3.1-1b-a400m-base
|
|
3966
4408
|
- name: ibm-granite/granite-3.1-1b-a400m-base
|
|
3967
4409
|
display_name: Granite 3.1 - 1B - A400M - Base
|
|
3968
4410
|
description: Granite-3.1-1B-A400M-Base extends the context length of Granite-3.0-1B-A400M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
|
|
@@ -3972,27 +4414,6 @@ models:
|
|
|
3972
4414
|
release_date: 2024-12-18
|
|
3973
4415
|
tags: [TEXT_MODEL_TAG]
|
|
3974
4416
|
|
|
3975
|
-
# DeepSeek-R1-Distill-Llama-3.1-8b
|
|
3976
|
-
- name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
|
|
3977
|
-
display_name: DeepSeek-R1-Distill-Llama-8b
|
|
3978
|
-
description: DeepSeek-R1-Distill-Llama-8b is a model that is distilled from LLaMA 8B model for the DeepSeek-R1 task.
|
|
3979
|
-
creator_organization_name: DeepSeek
|
|
3980
|
-
access: open
|
|
3981
|
-
num_parameters: 8000000000
|
|
3982
|
-
release_date: 2025-01-20
|
|
3983
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3984
|
-
|
|
3985
|
-
# deepseek-ai/deepseek-coder-6.7b-instruct
|
|
3986
|
-
- name: deepseek-ai/deepseek-coder-6.7b-instruct
|
|
3987
|
-
display_name: DeepSeek-Coder-6.7b-Instruct
|
|
3988
|
-
description: DeepSeek-Coder-6.7b-Instruct is a model that is fine-tuned from the LLaMA 6.7B model for the DeepSeek-Coder task.
|
|
3989
|
-
creator_organization_name: DeepSeek
|
|
3990
|
-
access: open
|
|
3991
|
-
num_parameters: 6740000000
|
|
3992
|
-
release_date: 2025-01-20
|
|
3993
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3994
|
-
|
|
3995
|
-
# WatsonX - IBM
|
|
3996
4417
|
- name: ibm/granite-13b-instruct-v2
|
|
3997
4418
|
display_name: Granite 13b instruct v2
|
|
3998
4419
|
description: Granite Base (13B) Instruct V2.0 is a large decoder-only transformer model.The following features were used in the design of the model Decoder-only model
|
|
@@ -4039,11 +4460,6 @@ models:
|
|
|
4039
4460
|
release: 2024-6-18
|
|
4040
4461
|
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4041
4462
|
|
|
4042
|
-
|
|
4043
|
-
|
|
4044
|
-
|
|
4045
|
-
|
|
4046
|
-
|
|
4047
4463
|
- name: ibm/granite-3.1-8b-instruct
|
|
4048
4464
|
display_name: Granite 3.1 - 8B - Instruct
|
|
4049
4465
|
description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
@@ -4062,6 +4478,15 @@ models:
|
|
|
4062
4478
|
release_date: 2024-12-18
|
|
4063
4479
|
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4064
4480
|
|
|
4481
|
+
- name: ibm/granite-3.3-8b-instruct
|
|
4482
|
+
display_name: Granite 3.3 8B Instruct
|
|
4483
|
+
description: Granite 3.3 8B Instruct is a 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities. ([model card](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct))
|
|
4484
|
+
creator_organization_name: IBM
|
|
4485
|
+
access: open
|
|
4486
|
+
num_parameters: 8170000000
|
|
4487
|
+
release_date: 2025-04-16
|
|
4488
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4489
|
+
|
|
4065
4490
|
- name: mistralai/mixtral-8x7b-instruct-v0:1
|
|
4066
4491
|
display_name: Mixtral 8x7B Instruct on IBM WatsonX
|
|
4067
4492
|
description: A 7B sparse Mixture-of-Experts model with stronger capabilities than Mistral 7B. Uses 12B active parameters out of 45B total. Supports multiple languages, code and 32k context window.
|
|
@@ -4069,3 +4494,192 @@ models:
|
|
|
4069
4494
|
access: limited
|
|
4070
4495
|
release_date: 2023-12-11
|
|
4071
4496
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4497
|
+
|
|
4498
|
+
- name: ura-hcmut/ura-llama-2.1-8b
|
|
4499
|
+
display_name: URA-Llama 2.1 (8B)
|
|
4500
|
+
description: URA-Llama 2.1 (8B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4501
|
+
creator_organization_name: URA
|
|
4502
|
+
access: open
|
|
4503
|
+
num_parameters: 8000000000
|
|
4504
|
+
release_date: 2024-08-04
|
|
4505
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4506
|
+
|
|
4507
|
+
- name: ura-hcmut/ura-llama-2-8b
|
|
4508
|
+
display_name: URA-Llama 2 (8B)
|
|
4509
|
+
description: URA-Llama 2 (8B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4510
|
+
creator_organization_name: URA
|
|
4511
|
+
access: open
|
|
4512
|
+
num_parameters: 8000000000
|
|
4513
|
+
release_date: 2024-08-04
|
|
4514
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4515
|
+
|
|
4516
|
+
- name: ura-hcmut/ura-llama-7b
|
|
4517
|
+
display_name: URA-Llama 7B (7B)
|
|
4518
|
+
description: URA-Llama 7B (7B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4519
|
+
creator_organization_name: URA
|
|
4520
|
+
access: open
|
|
4521
|
+
num_parameters: 7000000000
|
|
4522
|
+
release_date: 2023-10-10
|
|
4523
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4524
|
+
|
|
4525
|
+
- name: ura-hcmut/ura-llama-13b
|
|
4526
|
+
display_name: URA-Llama 13B (13B)
|
|
4527
|
+
description: URA-Llama 13B (13B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4528
|
+
creator_organization_name: URA
|
|
4529
|
+
access: open
|
|
4530
|
+
num_parameters: 13000000000
|
|
4531
|
+
release_date: 2023-10-10
|
|
4532
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4533
|
+
|
|
4534
|
+
- name: ura-hcmut/ura-llama-70b
|
|
4535
|
+
display_name: URA-Llama 70B (70B)
|
|
4536
|
+
description: URA-Llama 70B (70B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4537
|
+
creator_organization_name: URA
|
|
4538
|
+
access: open
|
|
4539
|
+
num_parameters: 70000000000
|
|
4540
|
+
release_date: 2023-10-10
|
|
4541
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4542
|
+
|
|
4543
|
+
- name: ura-hcmut/GemSUra-7B
|
|
4544
|
+
display_name: GemSUra 7B
|
|
4545
|
+
description: GemSUra 7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4546
|
+
creator_organization_name: URA
|
|
4547
|
+
access: open
|
|
4548
|
+
num_parameters: 7000000000
|
|
4549
|
+
release_date: 2024-03-12
|
|
4550
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4551
|
+
|
|
4552
|
+
- name: ura-hcmut/GemSUra-2B
|
|
4553
|
+
display_name: GemSUra 2B
|
|
4554
|
+
description: GemSUra 2B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4555
|
+
creator_organization_name: URA
|
|
4556
|
+
access: open
|
|
4557
|
+
num_parameters: 2000000000
|
|
4558
|
+
release_date: 2024-03-12
|
|
4559
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4560
|
+
|
|
4561
|
+
- name: ura-hcmut/MixSUra
|
|
4562
|
+
display_name: MixSUra
|
|
4563
|
+
description: MixSUra is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text. It is a mixture of experts model with 8 active experts.
|
|
4564
|
+
creator_organization_name: URA
|
|
4565
|
+
access: open
|
|
4566
|
+
num_parameters: 46700000000
|
|
4567
|
+
release_date: 2024-03-12
|
|
4568
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4569
|
+
|
|
4570
|
+
- name: vilm/vinallama-7b-chat
|
|
4571
|
+
display_name: VinaLLaMa
|
|
4572
|
+
description: VinaLLaMa is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4573
|
+
creator_organization_name: ViLM
|
|
4574
|
+
access: open
|
|
4575
|
+
num_parameters: 7000000000
|
|
4576
|
+
release_date: 2024-03-12
|
|
4577
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4578
|
+
|
|
4579
|
+
- name: vilm/vinallama-2.7b-chat
|
|
4580
|
+
display_name: VinaLLaMa 2.7B
|
|
4581
|
+
description: VinaLLaMa 2.7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4582
|
+
creator_organization_name: ViLM
|
|
4583
|
+
access: open
|
|
4584
|
+
num_parameters: 2700000000
|
|
4585
|
+
release_date: 2024-03-12
|
|
4586
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4587
|
+
|
|
4588
|
+
- name: vilm/vietcuna-7b-v3
|
|
4589
|
+
display_name: VietCuna 7B (v3)
|
|
4590
|
+
description: VietCuna 7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4591
|
+
creator_organization_name: ViLM
|
|
4592
|
+
access: open
|
|
4593
|
+
num_parameters: 7000000000
|
|
4594
|
+
release_date: 2023-08-07
|
|
4595
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4596
|
+
|
|
4597
|
+
- name: vilm/vietcuna-3b-v2
|
|
4598
|
+
display_name: VietCuna 3B (v2)
|
|
4599
|
+
description: VietCuna 3B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4600
|
+
creator_organization_name: ViLM
|
|
4601
|
+
access: open
|
|
4602
|
+
num_parameters: 3000000000
|
|
4603
|
+
release_date: 2023-07-15
|
|
4604
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4605
|
+
|
|
4606
|
+
- name: vilm/Quyen-v0.1
|
|
4607
|
+
display_name: Quyen (v0.1)
|
|
4608
|
+
description: Quyen is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4609
|
+
creator_organization_name: ViLM
|
|
4610
|
+
access: open
|
|
4611
|
+
num_parameters: 4000000000
|
|
4612
|
+
release_date: 2024-02-26
|
|
4613
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4614
|
+
|
|
4615
|
+
- name: vilm/Quyen-Plus-v0.1
|
|
4616
|
+
display_name: Quyen Plus (v0.1)
|
|
4617
|
+
description: Quyen Plus is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4618
|
+
creator_organization_name: ViLM
|
|
4619
|
+
access: open
|
|
4620
|
+
num_parameters: 7000000000
|
|
4621
|
+
release_date: 2024-02-26
|
|
4622
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4623
|
+
|
|
4624
|
+
- name: vilm/Quyen-Pro-v0.1
|
|
4625
|
+
display_name: Quyen Pro (v0.1)
|
|
4626
|
+
description: Quyen Pro is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4627
|
+
creator_organization_name: ViLM
|
|
4628
|
+
access: open
|
|
4629
|
+
num_parameters: 14000000000
|
|
4630
|
+
release_date: 2024-02-26
|
|
4631
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4632
|
+
|
|
4633
|
+
- name: vilm/Quyen-Pro-Max-v0.1
|
|
4634
|
+
display_name: Quyen Pro Max (v0.1)
|
|
4635
|
+
description: Quyen Pro Max is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4636
|
+
creator_organization_name: ViLM
|
|
4637
|
+
access: open
|
|
4638
|
+
num_parameters: 72000000000
|
|
4639
|
+
release_date: 2024-02-26
|
|
4640
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4641
|
+
|
|
4642
|
+
- name: vilm/Quyen-Mini-v0.1
|
|
4643
|
+
display_name: Quyen Mini (v0.1)
|
|
4644
|
+
description: Quyen Mini is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4645
|
+
creator_organization_name: ViLM
|
|
4646
|
+
access: open
|
|
4647
|
+
num_parameters: 1800000000
|
|
4648
|
+
release_date: 2024-02-26
|
|
4649
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4650
|
+
|
|
4651
|
+
- name: vilm/Quyen-SE-v0.1
|
|
4652
|
+
display_name: Quyen SE (v0.1)
|
|
4653
|
+
description: Quyen SE is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4654
|
+
creator_organization_name: ViLM
|
|
4655
|
+
access: open
|
|
4656
|
+
num_parameters: 500000000
|
|
4657
|
+
release_date: 2024-02-26
|
|
4658
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4659
|
+
|
|
4660
|
+
- name: Viet-Mistral/Vistral-7B-Chat
|
|
4661
|
+
display_name: Vistral 7B Chat
|
|
4662
|
+
description: Vistral 7B Chat is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4663
|
+
creator_organization_name: Viet-Mistral
|
|
4664
|
+
access: open
|
|
4665
|
+
num_parameters: 7000000000
|
|
4666
|
+
release_date: 2024-02-28
|
|
4667
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4668
|
+
|
|
4669
|
+
- name: vinai/PhoGPT-7B5-Instruct
|
|
4670
|
+
display_name: PhoGPT 7B5 Instruct
|
|
4671
|
+
description: PhoGPT 7B5 Instruct is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4672
|
+
creator_organization_name: VinAI
|
|
4673
|
+
access: open
|
|
4674
|
+
num_parameters: 7500000000
|
|
4675
|
+
release_date: 2024-02-19
|
|
4676
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4677
|
+
|
|
4678
|
+
- name: vinai/PhoGPT-4B-Chat
|
|
4679
|
+
display_name: PhoGPT 4B Chat
|
|
4680
|
+
description: PhoGPT 4B Chat is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4681
|
+
creator_organization_name: VinAI
|
|
4682
|
+
access: open
|
|
4683
|
+
num_parameters: 4000000000
|
|
4684
|
+
release_date: 2024-04-02
|
|
4685
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|