crfm-helm 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- crfm_helm-0.5.6.dist-info/METADATA +427 -0
- crfm_helm-0.5.6.dist-info/RECORD +941 -0
- {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +13 -1
- helm/benchmark/adaptation/adapters/adapter_factory.py +15 -1
- helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/chat_adapter.py +49 -0
- helm/benchmark/adaptation/adapters/ehr_instruction_adapter.py +108 -0
- helm/benchmark/adaptation/adapters/generation_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +4 -4
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +4 -2
- helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +4 -2
- helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +1 -1
- helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +2 -2
- helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py +87 -0
- helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/test_adapter.py +4 -4
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +3 -3
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +2 -2
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +2 -2
- helm/benchmark/adaptation/common_adapter_specs.py +69 -4
- helm/benchmark/adaptation/prompt.py +1 -1
- helm/benchmark/annotation/aci_bench_annotator.py +95 -0
- helm/benchmark/annotation/air_bench_annotator.py +21 -6
- helm/benchmark/annotation/annotator.py +5 -0
- helm/benchmark/annotation/annotator_factory.py +3 -20
- helm/benchmark/annotation/autobencher_capabilities_annotator.py +107 -0
- helm/benchmark/annotation/autobencher_safety_annotator.py +98 -0
- helm/benchmark/annotation/bigcodebench_annotator.py +108 -0
- helm/benchmark/annotation/bird_sql_annotator.py +58 -0
- helm/benchmark/annotation/chw_care_plan_annotator.py +93 -0
- helm/benchmark/annotation/czech_bank_qa_annotator.py +78 -0
- helm/benchmark/annotation/dischargeme_annotator.py +107 -0
- helm/benchmark/annotation/ehr_sql_annotator.py +87 -0
- helm/benchmark/annotation/helpdesk_call_summarization_annotator.py +131 -0
- helm/benchmark/annotation/image2struct/image_compiler_annotator.py +6 -1
- helm/benchmark/annotation/live_qa_annotator.py +1 -1
- helm/benchmark/annotation/med_dialog_annotator.py +99 -0
- helm/benchmark/annotation/medalign_annotator.py +100 -0
- helm/benchmark/annotation/medi_qa_annotator.py +98 -0
- helm/benchmark/annotation/medication_qa_annotator.py +87 -63
- helm/benchmark/annotation/mental_health_annotator.py +98 -0
- helm/benchmark/annotation/mimic_bhc_annotator.py +100 -0
- helm/benchmark/annotation/mimic_rrs_annotator.py +100 -0
- helm/benchmark/annotation/model_as_judge.py +214 -6
- helm/benchmark/annotation/mtsamples_procedures_annotator.py +98 -0
- helm/benchmark/annotation/mtsamples_replicate_annotator.py +101 -0
- helm/benchmark/annotation/omni_math/gpt_evaluation_template.txt +152 -0
- helm/benchmark/annotation/omni_math/gpt_evaluation_zero_shot_template.txt +36 -0
- helm/benchmark/annotation/omni_math_annotator.py +131 -0
- helm/benchmark/annotation/spider_annotator.py +18 -0
- helm/benchmark/annotation/starr_patient_instructions_annotator.py +98 -0
- helm/benchmark/annotation/wildbench/eval_template.pairwise.v2.md +75 -0
- helm/benchmark/annotation/wildbench/eval_template.score.v2.md +66 -0
- helm/benchmark/annotation/wildbench_annotator.py +119 -0
- helm/benchmark/annotation_executor.py +35 -15
- helm/benchmark/augmentations/cleva_perturbation.py +9 -8
- helm/benchmark/augmentations/contraction_expansion_perturbation.py +2 -2
- helm/benchmark/augmentations/contrast_sets_perturbation.py +2 -2
- helm/benchmark/augmentations/dialect_perturbation.py +4 -5
- helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
- helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
- helm/benchmark/augmentations/gender_perturbation.py +2 -2
- helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
- helm/benchmark/augmentations/mild_mix_perturbation.py +6 -6
- helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
- helm/benchmark/augmentations/person_name_perturbation.py +4 -5
- helm/benchmark/augmentations/perturbation.py +1 -1
- helm/benchmark/augmentations/space_perturbation.py +2 -2
- helm/benchmark/augmentations/suffix_perturbation.py +2 -2
- helm/benchmark/augmentations/synonym_perturbation.py +4 -3
- helm/benchmark/augmentations/test_perturbation.py +16 -13
- helm/benchmark/augmentations/translate_perturbation.py +2 -2
- helm/benchmark/augmentations/typos_perturbation.py +2 -2
- helm/benchmark/data_preprocessor.py +2 -2
- helm/benchmark/executor.py +11 -12
- helm/benchmark/huggingface_registration.py +2 -7
- helm/benchmark/metrics/aci_bench_metrics.py +14 -0
- helm/benchmark/metrics/basic_metrics.py +6 -6
- helm/benchmark/metrics/bbq_metrics.py +2 -2
- helm/benchmark/metrics/bias_metrics.py +12 -3
- helm/benchmark/metrics/bias_word_lists.py +1 -1
- helm/benchmark/metrics/bigcodebench_metrics.py +25 -0
- helm/benchmark/metrics/bird_sql_metrics.py +28 -0
- helm/benchmark/metrics/chw_care_plan_metrics.py +14 -0
- helm/benchmark/metrics/classification_metrics.py +76 -12
- helm/benchmark/metrics/cleva_harms_metrics.py +10 -9
- helm/benchmark/metrics/code_metrics.py +5 -5
- helm/benchmark/metrics/comet_metric.py +125 -0
- helm/benchmark/metrics/common_metric_specs.py +9 -2
- helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +72 -0
- helm/benchmark/metrics/copyright_metrics.py +4 -4
- helm/benchmark/metrics/czech_bank_qa_metrics.py +29 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +2 -2
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +2 -2
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +2 -2
- helm/benchmark/metrics/dischargeme_metrics.py +14 -0
- helm/benchmark/metrics/disinformation_metrics.py +4 -4
- helm/benchmark/metrics/dry_run_metrics.py +5 -5
- helm/benchmark/metrics/efficiency_metrics.py +6 -6
- helm/benchmark/metrics/ehr_sql_metrics.py +103 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +3 -3
- helm/benchmark/metrics/evaluate_reference_metrics.py +144 -16
- helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +103 -0
- helm/benchmark/metrics/gpt4_audio_critique_metrics.py +167 -0
- helm/benchmark/metrics/gpt4_audio_refusal_metrics.py +145 -0
- helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +36 -0
- helm/benchmark/metrics/ifeval/__init__.py +0 -0
- helm/benchmark/metrics/ifeval/instructions.py +1574 -0
- helm/benchmark/metrics/ifeval/instructions_registry.py +182 -0
- helm/benchmark/metrics/ifeval/instructions_registry.pyi +3 -0
- helm/benchmark/metrics/ifeval/instructions_util.py +153 -0
- helm/benchmark/metrics/ifeval_metrics.py +55 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/detection_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +1 -1
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +1 -1
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +1 -1
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/q16/test_q16.py +3 -1
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +2 -2
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +1 -1
- helm/benchmark/metrics/image_generation/watermark_metrics.py +1 -1
- helm/benchmark/metrics/instruction_following_critique_metrics.py +4 -4
- helm/benchmark/metrics/kpi_edgar_metrics.py +121 -0
- helm/benchmark/metrics/language_modeling_metrics.py +4 -4
- helm/benchmark/metrics/llm_jury_metrics.py +46 -0
- helm/benchmark/metrics/machine_translation_metrics.py +2 -2
- helm/benchmark/metrics/med_dialog_metrics.py +14 -0
- helm/benchmark/metrics/medalign_metrics.py +14 -0
- helm/benchmark/metrics/medcalc_bench_metrics.py +124 -0
- helm/benchmark/metrics/medec_metrics.py +101 -0
- helm/benchmark/metrics/medi_qa_metrics.py +14 -0
- helm/benchmark/metrics/medication_qa_metrics.py +10 -19
- helm/benchmark/metrics/melt_bias_metric.py +234 -0
- helm/benchmark/metrics/melt_bias_word_lists.py +1367 -0
- helm/benchmark/metrics/melt_metric_specs.py +43 -0
- helm/benchmark/metrics/melt_toxicity_metric.py +107 -0
- helm/benchmark/metrics/mental_health_metrics.py +14 -0
- helm/benchmark/metrics/metric.py +3 -3
- helm/benchmark/metrics/metric_service.py +11 -11
- helm/benchmark/metrics/mimic_bhc_metrics.py +14 -0
- helm/benchmark/metrics/mimic_rrs_metrics.py +14 -0
- helm/benchmark/metrics/mimiciv_billing_code_metrics.py +96 -0
- helm/benchmark/metrics/mtsamples_procedures_metrics.py +14 -0
- helm/benchmark/metrics/mtsamples_replicate_metrics.py +14 -0
- helm/benchmark/metrics/nltk_helper.py +32 -0
- helm/benchmark/metrics/numeracy_metrics.py +4 -4
- helm/benchmark/metrics/omni_math_metrics.py +32 -0
- helm/benchmark/metrics/openai_mrcr_metrics.py +52 -0
- helm/benchmark/metrics/output_processing_metric.py +60 -0
- helm/benchmark/metrics/output_processors.py +15 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +2 -2
- helm/benchmark/metrics/ranking_metrics.py +3 -3
- helm/benchmark/metrics/reference_metric.py +3 -3
- helm/benchmark/metrics/ruler_qa_metrics.py +34 -0
- helm/benchmark/metrics/{bhasa_metrics.py → seahelm_metrics.py} +3 -3
- helm/benchmark/metrics/seahelm_metrics_specs.py +10 -0
- helm/benchmark/metrics/spider_metrics.py +7 -0
- helm/benchmark/metrics/starr_patient_instructions_metrics.py +14 -0
- helm/benchmark/metrics/statistic.py +1 -1
- helm/benchmark/metrics/summac/model_summac.py +2 -3
- helm/benchmark/metrics/summarization_critique_metrics.py +4 -4
- helm/benchmark/metrics/summarization_metrics.py +20 -9
- helm/benchmark/metrics/test_bias_metrics.py +5 -1
- helm/benchmark/metrics/test_classification_metrics.py +140 -68
- helm/benchmark/metrics/test_evaluate_reference_metrics.py +15 -0
- helm/benchmark/metrics/test_metric.py +1 -1
- helm/benchmark/metrics/test_statistic.py +2 -2
- helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +6 -6
- helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/free_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +3 -3
- helm/benchmark/metrics/toxicity_metrics.py +6 -6
- helm/benchmark/metrics/unitxt_metrics.py +7 -5
- helm/benchmark/metrics/vision_language/emd_utils.py +4 -2
- helm/benchmark/metrics/vision_language/image_metrics.py +1 -1
- helm/benchmark/metrics/vision_language/image_utils.py +2 -2
- helm/benchmark/metrics/wildbench_metrics.py +34 -0
- helm/benchmark/model_deployment_registry.py +6 -8
- helm/benchmark/model_metadata_registry.py +16 -0
- helm/benchmark/presentation/contamination.py +3 -3
- helm/benchmark/presentation/create_plots.py +33 -12
- helm/benchmark/presentation/run_display.py +13 -0
- helm/benchmark/presentation/schema.py +2 -1
- helm/benchmark/presentation/summarize.py +97 -67
- helm/benchmark/presentation/torr_robustness_summarizer.py +178 -0
- helm/benchmark/reeval_run.py +202 -0
- helm/benchmark/reeval_runner.py +355 -0
- helm/benchmark/run.py +86 -90
- helm/benchmark/run_expander.py +90 -9
- helm/benchmark/run_spec_factory.py +13 -0
- helm/benchmark/run_specs/air_bench_run_specs.py +21 -3
- helm/benchmark/run_specs/audio_run_specs.py +657 -0
- helm/benchmark/run_specs/call_center_run_specs.py +49 -0
- helm/benchmark/run_specs/capabilities_run_specs.py +308 -0
- helm/benchmark/run_specs/classic_run_specs.py +1 -69
- helm/benchmark/run_specs/enem_challenge_specs.py +31 -0
- helm/benchmark/run_specs/enterprise_run_specs.py +280 -0
- helm/benchmark/run_specs/experimental_run_specs.py +142 -3
- helm/benchmark/run_specs/imdb_ptbr_run_specs.py +30 -0
- helm/benchmark/run_specs/lite_run_specs.py +2 -2
- helm/benchmark/run_specs/long_context_run_specs.py +141 -0
- helm/benchmark/run_specs/medhelm_run_specs.py +1260 -0
- helm/benchmark/run_specs/melt_run_specs.py +783 -0
- helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py +49 -0
- helm/benchmark/run_specs/oab_exams_specs.py +32 -0
- helm/benchmark/run_specs/safety_run_specs.py +37 -0
- helm/benchmark/run_specs/{bhasa_run_specs.py → seahelm_run_specs.py} +44 -44
- helm/benchmark/run_specs/speech_disorder_audio_run_specs.py +169 -0
- helm/benchmark/run_specs/sql_run_specs.py +54 -0
- helm/benchmark/run_specs/tweetsentbr_run_specs.py +32 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +14 -5
- helm/benchmark/run_specs/vlm_run_specs.py +103 -2
- helm/benchmark/run_specs/winogrande_afr_run_specs.py +47 -0
- helm/benchmark/runner.py +5 -5
- helm/benchmark/scenarios/aci_bench_scenario.py +126 -0
- helm/benchmark/scenarios/air_bench_scenario.py +6 -1
- helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +5 -3
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +1 -1
- helm/benchmark/scenarios/audio_language/__init__.py +0 -0
- helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py +130 -0
- helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py +154 -0
- helm/benchmark/scenarios/audio_language/ami_scenario.py +96 -0
- helm/benchmark/scenarios/audio_language/audio_mnist_scenario.py +62 -0
- helm/benchmark/scenarios/audio_language/audio_pairs_scenario.py +62 -0
- helm/benchmark/scenarios/audio_language/audiocaps_scenario.py +59 -0
- helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py +152 -0
- helm/benchmark/scenarios/audio_language/common_voice_15_scenario.py +99 -0
- helm/benchmark/scenarios/audio_language/corebench_scenario.py +77 -0
- helm/benchmark/scenarios/audio_language/covost2_scenario.py +163 -0
- helm/benchmark/scenarios/audio_language/fleurs_fairness_scenario.py +83 -0
- helm/benchmark/scenarios/audio_language/fleurs_scenario.py +312 -0
- helm/benchmark/scenarios/audio_language/iemocap_audio_scenario.py +83 -0
- helm/benchmark/scenarios/audio_language/librispeech_fairness_scenario.py +96 -0
- helm/benchmark/scenarios/audio_language/librispeech_scenario.py +80 -0
- helm/benchmark/scenarios/audio_language/meld_audio_scenario.py +113 -0
- helm/benchmark/scenarios/audio_language/multilingual_librispeech_scenario.py +80 -0
- helm/benchmark/scenarios/audio_language/mustard_scenario.py +142 -0
- helm/benchmark/scenarios/audio_language/mutox_scenario.py +254 -0
- helm/benchmark/scenarios/audio_language/parade_scenario.py +97 -0
- helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py +124 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification.py +103 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py +110 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py +78 -0
- helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py +109 -0
- helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py +83 -0
- helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py +87 -0
- helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py +105 -0
- helm/benchmark/scenarios/autobencher_capabilities_scenario.py +68 -0
- helm/benchmark/scenarios/autobencher_safety_scenario.py +51 -0
- helm/benchmark/scenarios/babi_qa_scenario.py +1 -1
- helm/benchmark/scenarios/banking77_scenario.py +6 -1
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/big_bench_scenario.py +11 -1
- helm/benchmark/scenarios/bigcodebench_scenario.py +58 -0
- helm/benchmark/scenarios/bird_sql_scenario.py +94 -0
- helm/benchmark/scenarios/bird_sql_scenario_helper.py +118 -0
- helm/benchmark/scenarios/blimp_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +1 -1
- helm/benchmark/scenarios/boolq_scenario.py +1 -1
- helm/benchmark/scenarios/casehold_scenario.py +79 -0
- helm/benchmark/scenarios/chw_care_plan_scenario.py +106 -0
- helm/benchmark/scenarios/civil_comments_scenario.py +1 -1
- helm/benchmark/scenarios/clear_scenario.py +157 -0
- helm/benchmark/scenarios/cleva_scenario.py +2 -2
- helm/benchmark/scenarios/code_scenario.py +17 -4
- helm/benchmark/scenarios/commonsense_scenario.py +1 -1
- helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +97 -0
- helm/benchmark/scenarios/copyright_scenario.py +1 -1
- helm/benchmark/scenarios/covid_dialog_scenario.py +10 -1
- helm/benchmark/scenarios/cti_to_mitre_scenario.py +240 -0
- helm/benchmark/scenarios/custom_mcqa_scenario.py +1 -1
- helm/benchmark/scenarios/czech_bank_qa_scenario.py +130 -0
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +1 -1
- helm/benchmark/scenarios/dialogue_scenarios.py +13 -2
- helm/benchmark/scenarios/dischargeme_scenario.py +172 -0
- helm/benchmark/scenarios/disinformation_scenario.py +10 -1
- helm/benchmark/scenarios/dyck_language_scenario.py +10 -1
- helm/benchmark/scenarios/echr_judgment_classification_scenario.py +113 -0
- helm/benchmark/scenarios/ehr_sql_scenario.py +137 -0
- helm/benchmark/scenarios/ehrshot_scenario.py +1519 -0
- helm/benchmark/scenarios/enem_challenge_scenario.py +58 -0
- helm/benchmark/scenarios/entity_data_imputation_scenario.py +11 -1
- helm/benchmark/scenarios/entity_matching_scenario.py +12 -2
- helm/benchmark/scenarios/financial_phrasebank_scenario.py +94 -0
- helm/benchmark/scenarios/gold_commodity_news_scenario.py +124 -0
- helm/benchmark/scenarios/gpqa_scenario.py +80 -0
- helm/benchmark/scenarios/grammar.py +2 -2
- helm/benchmark/scenarios/grammar_scenario.py +2 -2
- helm/benchmark/scenarios/gsm_scenario.py +10 -1
- helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +50 -0
- helm/benchmark/scenarios/harm_bench_scenario.py +1 -1
- helm/benchmark/scenarios/headqa_scenario.py +136 -0
- helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +37 -0
- helm/benchmark/scenarios/ice_scenario.py +8 -4
- helm/benchmark/scenarios/ifeval_scenario.py +53 -0
- helm/benchmark/scenarios/imdb_ptbr_scenario.py +60 -0
- helm/benchmark/scenarios/imdb_scenario.py +11 -2
- helm/benchmark/scenarios/infinite_bench_en_qa_scenario.py +85 -0
- helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py +79 -0
- helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py +2 -2
- helm/benchmark/scenarios/koala_scenario.py +1 -1
- helm/benchmark/scenarios/kpi_edgar_scenario.py +151 -0
- helm/benchmark/scenarios/legal_contract_summarization_scenario.py +129 -0
- helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py +77 -0
- helm/benchmark/scenarios/legal_summarization_scenario.py +11 -1
- helm/benchmark/scenarios/legal_support_scenario.py +11 -1
- helm/benchmark/scenarios/legalbench_scenario.py +22 -3
- helm/benchmark/scenarios/lex_glue_scenario.py +12 -2
- helm/benchmark/scenarios/lextreme_scenario.py +11 -1
- helm/benchmark/scenarios/live_qa_scenario.py +1 -1
- helm/benchmark/scenarios/lm_entry_scenario.py +1 -1
- helm/benchmark/scenarios/lsat_qa_scenario.py +1 -1
- helm/benchmark/scenarios/math_scenario.py +9 -1
- helm/benchmark/scenarios/me_q_sum_scenario.py +10 -1
- helm/benchmark/scenarios/med_dialog_scenario.py +25 -22
- helm/benchmark/scenarios/med_mcqa_scenario.py +10 -1
- helm/benchmark/scenarios/med_paragraph_simplification_scenario.py +10 -1
- helm/benchmark/scenarios/med_qa_scenario.py +10 -1
- helm/benchmark/scenarios/medalign_scenario.py +94 -0
- helm/benchmark/scenarios/medalign_scenario_helper.py +432 -0
- helm/benchmark/scenarios/medbullets_scenario.py +145 -0
- helm/benchmark/scenarios/medcalc_bench_scenario.py +127 -0
- helm/benchmark/scenarios/medec_scenario.py +125 -0
- helm/benchmark/scenarios/medhallu_scenario.py +72 -0
- helm/benchmark/scenarios/medi_qa_scenario.py +111 -0
- helm/benchmark/scenarios/medication_qa_scenario.py +8 -2
- helm/benchmark/scenarios/melt_ir_scenario.py +171 -0
- helm/benchmark/scenarios/melt_knowledge_scenario.py +246 -0
- helm/benchmark/scenarios/melt_lm_scenarios.py +252 -0
- helm/benchmark/scenarios/melt_scenarios.py +793 -0
- helm/benchmark/scenarios/melt_srn_scenario.py +342 -0
- helm/benchmark/scenarios/melt_synthetic_reasoning_scenario.py +222 -0
- helm/benchmark/scenarios/melt_translation_scenario.py +152 -0
- helm/benchmark/scenarios/mental_health_scenario.py +123 -0
- helm/benchmark/scenarios/mimic_bhc_scenario.py +103 -0
- helm/benchmark/scenarios/mimic_rrs_scenario.py +98 -0
- helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +77 -0
- helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py +74 -0
- helm/benchmark/scenarios/mmlu_pro_scenario.py +95 -0
- helm/benchmark/scenarios/mmlu_scenario.py +11 -1
- helm/benchmark/scenarios/msmarco_scenario.py +1 -1
- helm/benchmark/scenarios/mtsamples_procedures_scenario.py +144 -0
- helm/benchmark/scenarios/mtsamples_replicate_scenario.py +142 -0
- helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +277 -0
- helm/benchmark/scenarios/narrativeqa_scenario.py +1 -1
- helm/benchmark/scenarios/natural_qa_scenario.py +1 -1
- helm/benchmark/scenarios/newsqa_scenario.py +1 -1
- helm/benchmark/scenarios/numeracy_scenario.py +12 -2
- helm/benchmark/scenarios/oab_exams_scenario.py +57 -0
- helm/benchmark/scenarios/omni_math_scenario.py +53 -0
- helm/benchmark/scenarios/open_assistant_scenario.py +11 -2
- helm/benchmark/scenarios/openai_mrcr_scenario.py +79 -0
- helm/benchmark/scenarios/opinions_qa_scenario.py +1 -1
- helm/benchmark/scenarios/pubmed_qa_scenario.py +59 -43
- helm/benchmark/scenarios/quac_scenario.py +10 -1
- helm/benchmark/scenarios/race_based_med_scenario.py +152 -0
- helm/benchmark/scenarios/raft_scenario.py +17 -2
- helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +1 -1
- helm/benchmark/scenarios/ruler_qa_scenario_helper.py +171 -0
- helm/benchmark/scenarios/ruler_qa_scenarios.py +88 -0
- helm/benchmark/scenarios/scenario.py +9 -1
- helm/benchmark/scenarios/{bhasa_scenario.py → seahelm_scenario.py} +7 -2
- helm/benchmark/scenarios/self_instruct_scenario.py +1 -1
- helm/benchmark/scenarios/shc_bmt_scenario.py +75 -0
- helm/benchmark/scenarios/shc_cdi_scenario.py +75 -0
- helm/benchmark/scenarios/shc_conf_scenario.py +76 -0
- helm/benchmark/scenarios/shc_ent_scenario.py +77 -0
- helm/benchmark/scenarios/shc_gip_scenario.py +74 -0
- helm/benchmark/scenarios/shc_privacy_scenario.py +78 -0
- helm/benchmark/scenarios/shc_proxy_scenario.py +76 -0
- helm/benchmark/scenarios/shc_ptbm_scenario.py +81 -0
- helm/benchmark/scenarios/shc_sei_scenario.py +94 -0
- helm/benchmark/scenarios/shc_sequoia_scenario.py +77 -0
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +1 -1
- helm/benchmark/scenarios/spider_scenario.py +91 -0
- helm/benchmark/scenarios/starr_patient_instructions_scenario.py +97 -0
- helm/benchmark/scenarios/summarization_scenario.py +11 -1
- helm/benchmark/scenarios/sumosum_scenario.py +157 -0
- helm/benchmark/scenarios/synthetic_efficiency_scenario.py +1 -1
- helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +11 -1
- helm/benchmark/scenarios/synthetic_reasoning_scenario.py +11 -1
- helm/benchmark/scenarios/test_bigcodebench_scenario.py +26 -0
- helm/benchmark/scenarios/test_czech_bank_qa_scenario.py +18 -0
- helm/benchmark/scenarios/test_enem_challenge_scenario.py +53 -0
- helm/benchmark/scenarios/test_ewok_scenario.py +6 -2
- helm/benchmark/scenarios/test_gold_commodity_news_scenario.py +18 -0
- helm/benchmark/scenarios/test_gpqa_scenario.py +44 -0
- helm/benchmark/scenarios/test_ifeval_scenario.py +36 -0
- helm/benchmark/scenarios/test_imdb_ptbr_scenario.py +27 -0
- helm/benchmark/scenarios/test_infinite_bench_en_qa_scenario.py +18 -0
- helm/benchmark/scenarios/test_infinite_bench_en_sum_scenario.py +31 -0
- helm/benchmark/scenarios/test_math_scenario.py +1 -0
- helm/benchmark/scenarios/test_mmlu_clinical_afr_scenario.py +21 -0
- helm/benchmark/scenarios/test_mmlu_pro_scenario.py +53 -0
- helm/benchmark/scenarios/test_oab_exams_scenario.py +51 -0
- helm/benchmark/scenarios/test_omni_math_scenario.py +27 -0
- helm/benchmark/scenarios/test_tweetsentbr_scenario.py +24 -0
- helm/benchmark/scenarios/test_wildbench_scenario.py +15 -0
- helm/benchmark/scenarios/test_winogrande_afr_scenario.py +19 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +10 -1
- helm/benchmark/scenarios/the_pile_scenario.py +1 -1
- helm/benchmark/scenarios/truthful_qa_scenario.py +12 -2
- helm/benchmark/scenarios/tweetsentbr_scenario.py +66 -0
- helm/benchmark/scenarios/twitter_aae_scenario.py +1 -1
- helm/benchmark/scenarios/unitxt_scenario.py +8 -2
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +1 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/blink_scenario.py +140 -0
- helm/benchmark/scenarios/vision_language/mm_star_scenario.py +95 -0
- helm/benchmark/scenarios/vision_language/msr_vtt_scenario.py +75 -0
- helm/benchmark/scenarios/vision_language/vqa_rad_scenario.py +88 -0
- helm/benchmark/scenarios/wikifact_scenario.py +11 -1
- helm/benchmark/scenarios/wikitext_103_scenario.py +1 -1
- helm/benchmark/scenarios/wildbench_scenario.py +83 -0
- helm/benchmark/scenarios/winogrande_afr_scenario.py +78 -0
- helm/benchmark/scenarios/wmt_14_scenario.py +14 -2
- helm/benchmark/scenarios/xstest_scenario.py +1 -1
- helm/benchmark/server.py +13 -1
- helm/benchmark/slurm_runner.py +1 -1
- helm/benchmark/static/schema_audio.yaml +763 -0
- helm/benchmark/static/schema_autobencher.yaml +150 -0
- helm/benchmark/static/schema_call_center.yaml +97 -60
- helm/benchmark/static/{schema_medical.yaml → schema_capabilities.yaml} +100 -101
- helm/benchmark/static/schema_czech_bank.yaml +148 -0
- helm/benchmark/static/schema_enem_challenge.yaml +146 -0
- helm/benchmark/static/schema_enterprise.yaml +319 -0
- helm/benchmark/static/schema_finance.yaml +14 -12
- helm/benchmark/static/schema_heim.yaml +1389 -0
- helm/benchmark/static/schema_long_context.yaml +283 -0
- helm/benchmark/static/schema_medhelm.yaml +1140 -0
- helm/benchmark/static/schema_melt.yaml +1257 -0
- helm/benchmark/static/schema_mmlu_winogrande_afr.yaml +1045 -0
- helm/benchmark/static/schema_safety.yaml +18 -1
- helm/benchmark/static/{schema_bhasa.yaml → schema_seahelm.yaml} +30 -16
- helm/benchmark/static/schema_slphelm.yaml +162 -0
- helm/benchmark/static/schema_social_audio.yaml +224 -0
- helm/benchmark/static/schema_sql.yaml +171 -0
- helm/benchmark/static/{schema_tables.yaml → schema_torr.yaml} +169 -36
- helm/benchmark/static/schema_tweetsentbr.yaml +146 -0
- helm/benchmark/static/schema_vhelm.yaml +129 -56
- helm/benchmark/static/schema_video.yaml +219 -0
- helm/benchmark/static_build/assets/helm-safety-2907a7b6.png +0 -0
- helm/benchmark/static_build/assets/index-94295e78.js +10 -0
- helm/benchmark/static_build/assets/index-b9779128.css +1 -0
- helm/benchmark/static_build/assets/medhelm-overview-eac29843.png +0 -0
- helm/benchmark/static_build/assets/medhelm-v1-overview-3ddfcd65.png +0 -0
- helm/benchmark/static_build/assets/{react-d4a0b69b.js → react-f82877fd.js} +1 -1
- helm/benchmark/static_build/assets/{recharts-6d337683.js → recharts-4037aff0.js} +1 -1
- helm/benchmark/static_build/assets/{tremor-54a99cc4.js → tremor-38a10867.js} +2 -2
- helm/benchmark/static_build/config.js +1 -1
- helm/benchmark/static_build/index.html +6 -6
- helm/benchmark/window_services/default_window_service.py +1 -1
- helm/benchmark/window_services/encoder_decoder_window_service.py +4 -4
- helm/benchmark/window_services/ice_window_service.py +1 -1
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +1 -1
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +1 -1
- helm/benchmark/window_services/local_window_service.py +2 -2
- helm/benchmark/window_services/test_anthropic_window_service.py +3 -3
- helm/benchmark/window_services/test_bloom_window_service.py +3 -3
- helm/benchmark/window_services/test_gpt2_window_service.py +7 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +8 -3
- helm/benchmark/window_services/test_gptj_window_service.py +8 -3
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -3
- helm/benchmark/window_services/test_openai_window_service.py +8 -3
- helm/benchmark/window_services/test_opt_window_service.py +3 -3
- helm/benchmark/window_services/test_palmyra_window_service.py +3 -3
- helm/benchmark/window_services/test_t0pp_window_service.py +3 -3
- helm/benchmark/window_services/test_t511b_window_service.py +3 -3
- helm/benchmark/window_services/test_ul2_window_service.py +3 -3
- helm/benchmark/window_services/test_utils.py +4 -5
- helm/benchmark/window_services/test_yalm_window_service.py +3 -3
- helm/benchmark/window_services/tokenizer_service.py +7 -8
- helm/benchmark/window_services/yalm_window_service.py +1 -1
- helm/clients/ai21_client.py +3 -3
- helm/clients/aleph_alpha_client.py +1 -1
- helm/clients/anthropic_client.py +69 -29
- helm/clients/audio_language/__init__.py +0 -0
- helm/clients/audio_language/diva_llama_client.py +120 -0
- helm/clients/audio_language/llama_omni_client.py +198 -0
- helm/clients/audio_language/qwen2_5_omni_client.py +197 -0
- helm/clients/audio_language/qwen2_audiolm_client.py +190 -0
- helm/clients/audio_language/qwen_audiolm_client.py +152 -0
- helm/clients/audio_language/test.py +62 -0
- helm/clients/auto_client.py +4 -2
- helm/clients/azure_openai_client.py +55 -0
- helm/clients/bedrock_client.py +203 -7
- helm/clients/bedrock_utils.py +33 -0
- helm/clients/client.py +7 -7
- helm/clients/clip_scorers/clip_scorer.py +1 -1
- helm/clients/clip_scorers/multilingual_clip_scorer.py +1 -1
- helm/clients/cohere_client.py +3 -3
- helm/clients/google_client.py +1 -1
- helm/clients/grok_client.py +36 -0
- helm/clients/http_model_client.py +1 -1
- helm/clients/huggingface_client.py +52 -21
- helm/clients/huggingface_pipeline_client.py +138 -0
- helm/clients/ibm_client.py +267 -0
- helm/clients/image_generation/adobe_vision_client.py +1 -1
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +1 -1
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +3 -3
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +5 -2
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +5 -2
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +2 -2
- helm/clients/image_generation/cogview2_client.py +1 -1
- helm/clients/image_generation/dalle2_client.py +1 -1
- helm/clients/image_generation/dalle3_client.py +2 -2
- helm/clients/image_generation/dalle_mini/__init__.py +1 -1
- helm/clients/image_generation/dalle_mini/data.py +1 -1
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -5
- helm/clients/image_generation/dalle_mini/model/configuration.py +2 -2
- helm/clients/image_generation/dalle_mini/model/modeling.py +3 -3
- helm/clients/image_generation/dalle_mini/model/processor.py +5 -5
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +2 -2
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -1
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +2 -2
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +1 -1
- helm/clients/image_generation/dalle_mini_client.py +1 -1
- helm/clients/image_generation/deep_floyd_client.py +1 -1
- helm/clients/image_generation/huggingface_diffusers_client.py +1 -1
- helm/clients/image_generation/lexica_client.py +1 -1
- helm/clients/image_generation/mindalle/models/__init__.py +6 -6
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +1 -1
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +1 -1
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -3
- helm/clients/image_generation/mindalle_client.py +1 -1
- helm/clients/image_generation/together_image_generation_client.py +1 -1
- helm/clients/lit_gpt_client.py +2 -2
- helm/clients/mistral_client.py +62 -18
- helm/clients/nvidia_nim_client.py +0 -3
- helm/clients/openai_client.py +308 -43
- helm/clients/openai_responses_client.py +174 -0
- helm/clients/palmyra_client.py +3 -9
- helm/clients/reka_client.py +3 -3
- helm/clients/stanfordhealthcare_azure_openai_client.py +58 -0
- helm/clients/stanfordhealthcare_claude_client.py +31 -0
- helm/clients/stanfordhealthcare_google_client.py +43 -0
- helm/clients/stanfordhealthcare_http_model_client.py +93 -0
- helm/clients/stanfordhealthcare_openai_client.py +62 -0
- helm/clients/stanfordhealthcare_shc_openai_client.py +42 -0
- helm/clients/test_client.py +1 -1
- helm/clients/test_together_client.py +6 -1
- helm/clients/together_client.py +76 -9
- helm/clients/upstage_client.py +23 -0
- helm/clients/vertexai_client.py +45 -13
- helm/clients/vision_language/huggingface_vision2seq_client.py +6 -4
- helm/clients/vision_language/huggingface_vlm_client.py +2 -2
- helm/clients/vision_language/idefics_client.py +6 -2
- helm/clients/vision_language/open_flamingo/__init__.py +2 -2
- helm/clients/vision_language/open_flamingo/src/factory.py +3 -3
- helm/clients/vision_language/open_flamingo/src/flamingo.py +2 -2
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +2 -2
- helm/clients/vision_language/paligemma_client.py +2 -2
- helm/clients/vision_language/qwen2_vlm_client.py +188 -0
- helm/clients/vision_language/qwen_vlm_client.py +7 -5
- helm/clients/vllm_client.py +4 -6
- helm/clients/writer_client.py +102 -0
- helm/clients/yi_client.py +0 -3
- helm/common/audio_utils.py +111 -0
- helm/common/context.py +80 -0
- helm/common/credentials_utils.py +5 -5
- helm/common/file_caches/local_file_cache.py +1 -1
- helm/common/file_caches/test_local_file_cache.py +1 -1
- helm/common/general.py +9 -2
- helm/common/hierarchical_logger.py +46 -3
- helm/common/images_utils.py +2 -2
- helm/common/local_context.py +140 -0
- helm/common/media_object.py +2 -2
- helm/common/multimodal_request_utils.py +26 -0
- helm/common/reeval_parameters.py +12 -0
- helm/common/remote_context.py +61 -0
- helm/common/request.py +14 -2
- helm/common/response_format.py +18 -0
- helm/common/test_media_object.py +1 -1
- helm/config/model_deployments.yaml +1792 -28
- helm/config/model_metadata.yaml +1606 -51
- helm/config/tokenizer_configs.yaml +521 -4
- helm/proxy/cli.py +5 -3
- helm/proxy/critique/mechanical_turk_utils.py +1 -1
- helm/proxy/example_queries.py +1 -1
- helm/proxy/server.py +11 -4
- helm/proxy/services/remote_service.py +1 -1
- helm/proxy/services/server_service.py +22 -86
- helm/proxy/services/test_remote_service.py +2 -2
- helm/proxy/services/test_service.py +1 -1
- helm/proxy/static/general.js +122 -0
- helm/proxy/static/help.html +99 -0
- helm/proxy/static/index.css +57 -0
- helm/proxy/static/index.html +40 -0
- helm/proxy/static/index.js +456 -0
- helm/proxy/static/info-icon.png +0 -0
- helm/proxy/test_retry.py +1 -1
- helm/proxy/token_counters/auto_token_counter.py +1 -1
- helm/tokenizers/aleph_alpha_tokenizer.py +1 -1
- helm/tokenizers/caching_tokenizer.py +2 -30
- helm/tokenizers/grok_tokenizer.py +53 -0
- helm/tokenizers/http_model_tokenizer.py +1 -1
- helm/tokenizers/huggingface_tokenizer.py +3 -3
- helm/tokenizers/lit_gpt_tokenizer.py +1 -1
- helm/tokenizers/test_anthropic_tokenizer.py +6 -2
- helm/tokenizers/test_grok_tokenizer.py +33 -0
- helm/tokenizers/test_huggingface_tokenizer.py +1 -1
- helm/tokenizers/test_yalm_tokenizer.py +1 -1
- helm/tokenizers/tiktoken_tokenizer.py +1 -1
- helm/tokenizers/tokenizer.py +3 -1
- helm/tokenizers/yalm_tokenizer.py +3 -3
- helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py +1 -1
- crfm_helm-0.5.4.dist-info/METADATA +0 -350
- crfm_helm-0.5.4.dist-info/RECORD +0 -697
- helm/benchmark/metrics/bhasa_metrics_specs.py +0 -10
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
- helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/index-05c76bb1.css +0 -1
- helm/benchmark/static_build/assets/index-3ee38b3d.js +0 -10
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/tokenizers/anthropic_tokenizer.py +0 -52
- {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info/licenses}/LICENSE +0 -0
- {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info}/top_level.txt +0 -0
helm/config/model_metadata.yaml
CHANGED
|
@@ -18,7 +18,7 @@ models:
|
|
|
18
18
|
access: open
|
|
19
19
|
release_date: 2023-01-01
|
|
20
20
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
# Adobe
|
|
23
23
|
- name: adobe/giga-gan
|
|
24
24
|
display_name: GigaGAN (1B)
|
|
@@ -128,7 +128,7 @@ models:
|
|
|
128
128
|
|
|
129
129
|
# AI Singapore
|
|
130
130
|
- name: aisingapore/sea-lion-7b
|
|
131
|
-
display_name: SEA-LION
|
|
131
|
+
display_name: SEA-LION 7B
|
|
132
132
|
description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
|
|
133
133
|
creator_organization_name: AI Singapore
|
|
134
134
|
access: open
|
|
@@ -137,7 +137,7 @@ models:
|
|
|
137
137
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
138
138
|
|
|
139
139
|
- name: aisingapore/sea-lion-7b-instruct
|
|
140
|
-
display_name: SEA-LION Instruct
|
|
140
|
+
display_name: SEA-LION 7B Instruct
|
|
141
141
|
description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
|
|
142
142
|
creator_organization_name: AI Singapore
|
|
143
143
|
access: open
|
|
@@ -146,23 +146,77 @@ models:
|
|
|
146
146
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
147
147
|
|
|
148
148
|
- name: aisingapore/llama3-8b-cpt-sea-lionv2-base
|
|
149
|
-
display_name:
|
|
150
|
-
description:
|
|
149
|
+
display_name: Llama3 8B CPT SEA-LIONv2
|
|
150
|
+
description: Llama3 8B CPT SEA-LIONv2 is a multilingual model which was continued pre-trained on 48B additional tokens, including tokens in Southeast Asian languages.
|
|
151
151
|
creator_organization_name: AI Singapore
|
|
152
152
|
access: open
|
|
153
|
-
num_parameters:
|
|
153
|
+
num_parameters: 8030000000
|
|
154
154
|
release_date: 2024-07-31
|
|
155
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG
|
|
155
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
156
156
|
|
|
157
157
|
- name: aisingapore/llama3-8b-cpt-sea-lionv2.1-instruct
|
|
158
|
-
display_name:
|
|
159
|
-
description:
|
|
158
|
+
display_name: Llama3 8B CPT SEA-LIONv2.1 Instruct
|
|
159
|
+
description: Llama3 8B CPT SEA-LIONv2.1 Instruct is a multilingual model which has been fine-tuned with around 100,000 English instruction-completion pairs alongside a smaller pool of around 50,000 instruction-completion pairs from other Southeast Asian languages, such as Indonesian, Thai and Vietnamese.
|
|
160
160
|
creator_organization_name: AI Singapore
|
|
161
161
|
access: open
|
|
162
|
-
num_parameters:
|
|
162
|
+
num_parameters: 8030000000
|
|
163
163
|
release_date: 2024-08-21
|
|
164
164
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
165
165
|
|
|
166
|
+
- name: aisingapore/gemma2-9b-cpt-sea-lionv3-base
|
|
167
|
+
display_name: Gemma2 9B CPT SEA-LIONv3
|
|
168
|
+
description: Gemma2 9B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across the 11 official Southeast Asian languages, such as English, Chinese, Vietnamese, Indonesian, Thai, Tamil, Filipino, Malay, Khmer, Lao, Burmese.
|
|
169
|
+
creator_organization_name: AI Singapore
|
|
170
|
+
access: open
|
|
171
|
+
num_parameters: 9240000000
|
|
172
|
+
release_date: 2024-10-30
|
|
173
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
174
|
+
|
|
175
|
+
- name: aisingapore/gemma2-9b-cpt-sea-lionv3-instruct
|
|
176
|
+
display_name: Gemma2 9B CPT SEA-LIONv3 Instruct
|
|
177
|
+
description: Gemma2 9B CPT SEA-LIONv3 Instruct is a multilingual model which has been fine-tuned with around 500,000 English instruction-completion pairs alongside a larger pool of around 1,000,000 instruction-completion pairs from other ASEAN languages, such as Indonesian, Thai and Vietnamese.
|
|
178
|
+
creator_organization_name: AI Singapore
|
|
179
|
+
access: open
|
|
180
|
+
num_parameters: 9240000000
|
|
181
|
+
release_date: 2024-10-30
|
|
182
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
183
|
+
|
|
184
|
+
- name: aisingapore/llama3.1-8b-cpt-sea-lionv3-base
|
|
185
|
+
display_name: Llama3.1 8B CPT SEA-LIONv3
|
|
186
|
+
description: Llama3.1 8B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across 11 SEA languages, such as Burmese, Chinese, English, Filipino, Indonesia, Khmer, Lao, Malay, Tamil, Thai and Vietnamese.
|
|
187
|
+
creator_organization_name: AI Singapore
|
|
188
|
+
access: open
|
|
189
|
+
num_parameters: 9240000000
|
|
190
|
+
release_date: 2024-12-11
|
|
191
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
192
|
+
|
|
193
|
+
- name: aisingapore/llama3.1-8b-cpt-sea-lionv3-instruct
|
|
194
|
+
display_name: Llama3.1 8B CPT SEA-LIONv3 Instruct
|
|
195
|
+
description: Llama3.1 8B CPT SEA-LIONv3 Instruct is a multilingual model that has been fine-tuned in two stages on approximately 12.3M English instruction-completion pairs alongside a pool of 4.5M Southeast Asian instruction-completion pairs from SEA languages such as Indonesian, Javanese, Sundanese, Tamil, Thai and Vietnamese.
|
|
196
|
+
creator_organization_name: AI Singapore
|
|
197
|
+
access: open
|
|
198
|
+
num_parameters: 9240000000
|
|
199
|
+
release_date: 2024-12-11
|
|
200
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
201
|
+
|
|
202
|
+
- name: aisingapore/llama3.1-70b-cpt-sea-lionv3-base
|
|
203
|
+
display_name: Llama3.1 70B CPT SEA-LIONv3
|
|
204
|
+
description: Llama3.1 70B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across 11 SEA languages, such as Burmese, Chinese, English, Filipino, Indonesia, Khmer, Lao, Malay, Tamil, Thai and Vietnamese.
|
|
205
|
+
creator_organization_name: AI Singapore
|
|
206
|
+
access: open
|
|
207
|
+
num_parameters: 70600000000
|
|
208
|
+
release_date: 2024-12-11
|
|
209
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
210
|
+
|
|
211
|
+
- name: aisingapore/llama3.1-70b-cpt-sea-lionv3-instruct
|
|
212
|
+
display_name: Llama3.1 70B CPT SEA-LIONv3 Instruct
|
|
213
|
+
description: Llama3.1 70B CPT SEA-LIONv3 Instruct is a multilingual model that has been fine-tuned in two stages on approximately 12.3M English instruction-completion pairs alongside a pool of 4.5M Southeast Asian instruction-completion pairs from SEA languages such as Indonesian, Javanese, Sundanese, Tamil, Thai, and Vietnamese.
|
|
214
|
+
creator_organization_name: AI Singapore
|
|
215
|
+
access: open
|
|
216
|
+
num_parameters: 70600000000
|
|
217
|
+
release_date: 2024-12-11
|
|
218
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
219
|
+
|
|
166
220
|
# Aleph Alpha
|
|
167
221
|
# Aleph Alpha's Luminous models: https://docs.aleph-alpha.com/docs/introduction/luminous
|
|
168
222
|
# TODO: add Luminous World when it's released
|
|
@@ -219,7 +273,42 @@ models:
|
|
|
219
273
|
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
220
274
|
|
|
221
275
|
|
|
222
|
-
# Amazon
|
|
276
|
+
# Amazon Nova models
|
|
277
|
+
# References for Amazon Nova models:
|
|
278
|
+
# https://aws.amazon.com/ai/generative-ai/nova/
|
|
279
|
+
- name: amazon/nova-premier-v1:0
|
|
280
|
+
display_name: Amazon Nova Premier
|
|
281
|
+
description: Amazon Nova Premier is the most capable model in the Nova family of foundation models. ([blog](https://aws.amazon.com/blogs/aws/amazon-nova-premier-our-most-capable-model-for-complex-tasks-and-teacher-for-model-distillation/))
|
|
282
|
+
creator_organization_name: Amazon
|
|
283
|
+
access: limited
|
|
284
|
+
release_date: 2025-04-30
|
|
285
|
+
tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
286
|
+
|
|
287
|
+
- name: amazon/nova-pro-v1:0
|
|
288
|
+
display_name: Amazon Nova Pro
|
|
289
|
+
description: Amazon Nova Pro Model
|
|
290
|
+
creator_organization_name: Amazon
|
|
291
|
+
access: limited
|
|
292
|
+
release_date: 2024-12-03
|
|
293
|
+
tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
294
|
+
|
|
295
|
+
- name: amazon/nova-lite-v1:0
|
|
296
|
+
display_name: Amazon Nova Lite
|
|
297
|
+
description: Amazon Nova Lite Model
|
|
298
|
+
creator_organization_name: Amazon
|
|
299
|
+
access: limited
|
|
300
|
+
release_date: 2024-12-03
|
|
301
|
+
tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
302
|
+
|
|
303
|
+
- name: amazon/nova-micro-v1:0
|
|
304
|
+
display_name: Amazon Nova Micro
|
|
305
|
+
description: Amazon Nova Micro Model
|
|
306
|
+
creator_organization_name: Amazon
|
|
307
|
+
access: limited
|
|
308
|
+
release_date: 2024-12-03
|
|
309
|
+
tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
310
|
+
|
|
311
|
+
# Titan Models
|
|
223
312
|
# References for Amazon Titan models:
|
|
224
313
|
# - https://aws.amazon.com/bedrock/titan/
|
|
225
314
|
# - https://community.aws/content/2ZUVD3fkNtqEOYIa2iUJAFArS7c/family-of-titan-text-models---cli-demo
|
|
@@ -230,16 +319,8 @@ models:
|
|
|
230
319
|
creator_organization_name: Amazon
|
|
231
320
|
access: limited
|
|
232
321
|
release_date: 2023-11-29
|
|
233
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
234
|
-
|
|
235
|
-
- name: amazon/titan-tg1-large
|
|
236
|
-
display_name: Amazon Titan Large
|
|
237
|
-
description: Amazon Titan Large is efficient model perfect for fine-tuning English-language tasks like summarization, create article, marketing campaign.
|
|
238
|
-
creator_organization_name: Amazon
|
|
239
|
-
access: limited
|
|
240
|
-
release_date: 2023-11-29
|
|
241
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
242
|
-
|
|
322
|
+
tags: [BEDROCK_MODEL_TAG,TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
323
|
+
|
|
243
324
|
- name: amazon/titan-text-express-v1
|
|
244
325
|
display_name: Amazon Titan Text Express
|
|
245
326
|
description: Amazon Titan Text Express, with a context length of up to 8,000 tokens, excels in advanced language tasks like open-ended text generation and conversational chat. It's also optimized for Retrieval Augmented Generation (RAG). Initially designed for English, the model offers preview multilingual support for over 100 additional languages.
|
|
@@ -248,6 +329,93 @@ models:
|
|
|
248
329
|
release_date: 2023-11-29
|
|
249
330
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
250
331
|
|
|
332
|
+
# Mistral Models on Bedrock
|
|
333
|
+
# References for Mistral on Amazon Bedrock
|
|
334
|
+
# https://aws.amazon.com/bedrock/mistral/
|
|
335
|
+
|
|
336
|
+
- name: mistralai/amazon-mistral-7b-instruct-v0:2
|
|
337
|
+
display_name: Mistral 7B Instruct on Amazon Bedrock
|
|
338
|
+
description: A 7B dense Transformer, fast-deployed and easily customisable. Small, yet powerful for a variety of use cases. Supports English and code, and a 32k context window.
|
|
339
|
+
creator_organization_name: Mistral
|
|
340
|
+
access: limited
|
|
341
|
+
release_date: 2024-03-23
|
|
342
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
343
|
+
|
|
344
|
+
- name: mistralai/amazon-mixtral-8x7b-instruct-v0:1
|
|
345
|
+
display_name: Mixtral 8x7B Instruct on Amazon Bedrock
|
|
346
|
+
description: A 7B sparse Mixture-of-Experts model with stronger capabilities than Mistral 7B. Uses 12B active parameters out of 45B total. Supports multiple languages, code and 32k context window.
|
|
347
|
+
creator_organization_name: Mistral
|
|
348
|
+
access: limited
|
|
349
|
+
release_date: 2023-12-11
|
|
350
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
351
|
+
|
|
352
|
+
- name: mistralai/amazon-mistral-large-2402-v1:0
|
|
353
|
+
display_name: Mistral Large(2402) on Amazon Bedrock
|
|
354
|
+
description: The most advanced Mistral AI Large Language model capable of handling any language task including complex multilingual reasoning, text understanding, transformation, and code generation.
|
|
355
|
+
creator_organization_name: Mistral
|
|
356
|
+
access: limited
|
|
357
|
+
release_date: 2023-07-26
|
|
358
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
359
|
+
|
|
360
|
+
- name: mistralai/amazon-mistral-small-2402-v1:0
|
|
361
|
+
display_name: Mistral Small on Amazon Bedrock
|
|
362
|
+
description: Mistral Small is perfectly suited for straightforward tasks that can be performed in bulk, such as classification, customer support, or text generation. It provides outstanding performance at a cost-effective price point.
|
|
363
|
+
creator_organization_name: Mistral
|
|
364
|
+
access: limited
|
|
365
|
+
release_date: 2023-02-26
|
|
366
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
367
|
+
|
|
368
|
+
- name: mistralai/amazon-mistral-large-2407-v1:0
|
|
369
|
+
display_name: Mistral Large(2407) on Amazon Bedrock
|
|
370
|
+
description: Mistral Large 2407 is an advanced Large Language Model (LLM) that supports dozens of languages and is trained on 80+ coding languages. It has best-in-class agentic capabilities with native function calling JSON outputting and reasoning capabilities.
|
|
371
|
+
creator_organization_name: Mistral
|
|
372
|
+
access: limited
|
|
373
|
+
release_date: 2024-07-24
|
|
374
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
375
|
+
|
|
376
|
+
# Llama3 on Amazon Bedrock
|
|
377
|
+
# References for Llama3 on Amazon Bedrock
|
|
378
|
+
# https://aws.amazon.com/bedrock/llama/
|
|
379
|
+
|
|
380
|
+
- name: meta/amazon-llama3-8b-instruct-v1:0
|
|
381
|
+
display_name: Llama 3 8B Instruct on Amazon Bedrock
|
|
382
|
+
description: Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for limited computational power and resources, edge devices, and faster training times.
|
|
383
|
+
creator_organization_name: Meta
|
|
384
|
+
access: limited
|
|
385
|
+
release_date: 2024-04-23
|
|
386
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
387
|
+
|
|
388
|
+
- name: meta/amazon-llama3-70b-instruct-v1:0
|
|
389
|
+
display_name: Llama 3 70B Instruct on Amazon Bedrock
|
|
390
|
+
description: Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for content creation, conversational AI, language understanding, R&D, and Enterprise applications.
|
|
391
|
+
creator_organization_name: Meta
|
|
392
|
+
access: limited
|
|
393
|
+
release_date: 2024-04-23
|
|
394
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
395
|
+
|
|
396
|
+
- name: meta/amazon-llama3-1-405b-instruct-v1:0
|
|
397
|
+
display_name: Llama 3.1 405b Instruct on Amazon Bedrock.
|
|
398
|
+
description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
|
|
399
|
+
creator_organization_name: Meta
|
|
400
|
+
access: limited
|
|
401
|
+
release_date: 2024-07-26
|
|
402
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
403
|
+
|
|
404
|
+
- name: meta/amazon-llama3-1-70b-instruct-v1:0
|
|
405
|
+
display_name: Llama 3.1 70b Instruct on Amazon Bedrock.
|
|
406
|
+
description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
|
|
407
|
+
creator_organization_name: Meta
|
|
408
|
+
access: limited
|
|
409
|
+
release_date: 2024-07-26
|
|
410
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
411
|
+
|
|
412
|
+
- name: meta/amazon-llama3-1-8b-instruct-v1:0
|
|
413
|
+
display_name: Llama 3.1 8b Instruct on Amazon Bedrock.
|
|
414
|
+
description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
|
|
415
|
+
creator_organization_name: Meta
|
|
416
|
+
access: limited
|
|
417
|
+
release_date: 2024-07-26
|
|
418
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
251
419
|
|
|
252
420
|
# Anthropic
|
|
253
421
|
- name: anthropic/claude-v1.3
|
|
@@ -315,6 +483,14 @@ models:
|
|
|
315
483
|
release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
|
|
316
484
|
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
317
485
|
|
|
486
|
+
- name: anthropic/claude-3-5-haiku-20241022
|
|
487
|
+
display_name: Claude 3.5 Haiku (20241022)
|
|
488
|
+
description: Claude 3.5 Haiku is a Claude 3 family model which matches the performance of Claude 3 Opus at a similar speed to the previous generation of Haiku ([blog](https://www.anthropic.com/news/3-5-models-and-computer-use)).
|
|
489
|
+
creator_organization_name: Anthropic
|
|
490
|
+
access: limited
|
|
491
|
+
release_date: 2024-11-04 # Released after the blog post
|
|
492
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
493
|
+
|
|
318
494
|
- name: anthropic/claude-3-5-sonnet-20240620
|
|
319
495
|
display_name: Claude 3.5 Sonnet (20240620)
|
|
320
496
|
description: Claude 3.5 Sonnet is a Claude 3 family model which outperforms Claude 3 Opus while operating faster and at a lower cost. ([blog](https://www.anthropic.com/news/claude-3-5-sonnet))
|
|
@@ -323,6 +499,62 @@ models:
|
|
|
323
499
|
release_date: 2024-06-20
|
|
324
500
|
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
325
501
|
|
|
502
|
+
- name: anthropic/claude-3-5-sonnet-20241022
|
|
503
|
+
display_name: Claude 3.5 Sonnet (20241022)
|
|
504
|
+
description: Claude 3.5 Sonnet is a Claude 3 family model which outperforms Claude 3 Opus while operating faster and at a lower cost ([blog](https://www.anthropic.com/news/claude-3-5-sonnet)). This is an upgraded snapshot released on 2024-10-22 ([blog](https://www.anthropic.com/news/3-5-models-and-computer-use)).
|
|
505
|
+
creator_organization_name: Anthropic
|
|
506
|
+
access: limited
|
|
507
|
+
release_date: 2024-10-22
|
|
508
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
509
|
+
|
|
510
|
+
- name: anthropic/claude-3-7-sonnet-20250219
|
|
511
|
+
display_name: Claude 3.7 Sonnet (20250219)
|
|
512
|
+
description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)).
|
|
513
|
+
creator_organization_name: Anthropic
|
|
514
|
+
access: limited
|
|
515
|
+
release_date: 2025-02-24
|
|
516
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
517
|
+
|
|
518
|
+
- name: anthropic/claude-3-7-sonnet-20250219-thinking-10k
|
|
519
|
+
display_name: Claude 3.7 Sonnet (20250219, extended thinking)
|
|
520
|
+
description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)). Extended thinking is enabled with 10k budget tokens.
|
|
521
|
+
creator_organization_name: Anthropic
|
|
522
|
+
access: limited
|
|
523
|
+
release_date: 2025-02-24
|
|
524
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
525
|
+
|
|
526
|
+
- name: anthropic/claude-sonnet-4-20250514
|
|
527
|
+
display_name: Claude 4 Sonnet (20250514)
|
|
528
|
+
description: Claude 4 Sonnet is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)).
|
|
529
|
+
creator_organization_name: Anthropic
|
|
530
|
+
access: limited
|
|
531
|
+
release_date: 2025-05-14
|
|
532
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
533
|
+
|
|
534
|
+
- name: anthropic/claude-sonnet-4-20250514-thinking-10k
|
|
535
|
+
display_name: Claude 4 Sonnet (20250514, extended thinking)
|
|
536
|
+
description: Claude 4 Sonnet is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)). Extended thinking is enabled with 10k budget tokens.
|
|
537
|
+
creator_organization_name: Anthropic
|
|
538
|
+
access: limited
|
|
539
|
+
release_date: 2025-05-14
|
|
540
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
541
|
+
|
|
542
|
+
- name: anthropic/claude-opus-4-20250514
|
|
543
|
+
display_name: Claude 4 Opus (20250514)
|
|
544
|
+
description: Claude 4 Opus is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)).
|
|
545
|
+
creator_organization_name: Anthropic
|
|
546
|
+
access: limited
|
|
547
|
+
release_date: 2025-05-14
|
|
548
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
549
|
+
|
|
550
|
+
- name: anthropic/claude-opus-4-20250514-thinking-10k
|
|
551
|
+
display_name: Claude 4 Opus (20250514, extended thinking)
|
|
552
|
+
description: Claude 4 Opus is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)). Extended thinking is enabled with 10k budget tokens.
|
|
553
|
+
creator_organization_name: Anthropic
|
|
554
|
+
access: limited
|
|
555
|
+
release_date: 2025-05-14
|
|
556
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
557
|
+
|
|
326
558
|
- name: anthropic/stanford-online-all-v4-s3
|
|
327
559
|
display_name: Anthropic-LM v4-s3 (52B)
|
|
328
560
|
description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
|
|
@@ -342,7 +574,7 @@ models:
|
|
|
342
574
|
access: open
|
|
343
575
|
num_parameters: 13000000000
|
|
344
576
|
release_date: 2022-04-03
|
|
345
|
-
tags: [] # TODO: add tags
|
|
577
|
+
tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
|
|
346
578
|
|
|
347
579
|
|
|
348
580
|
|
|
@@ -363,7 +595,7 @@ models:
|
|
|
363
595
|
access: open
|
|
364
596
|
num_parameters: 176000000000
|
|
365
597
|
release_date: 2022-11-03
|
|
366
|
-
tags: [] # TODO: add tags
|
|
598
|
+
tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
|
|
367
599
|
|
|
368
600
|
- name: bigscience/t0pp
|
|
369
601
|
display_name: T0pp (11B)
|
|
@@ -418,7 +650,7 @@ models:
|
|
|
418
650
|
access: limited
|
|
419
651
|
num_parameters: 6700000000
|
|
420
652
|
release_date: 2023-04-06
|
|
421
|
-
tags: [] # TODO: add tags
|
|
653
|
+
tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
|
|
422
654
|
|
|
423
655
|
- name: cerebras/cerebras-gpt-13b # NOT SUPPORTED
|
|
424
656
|
display_name: Cerebras GPT (13B)
|
|
@@ -427,7 +659,7 @@ models:
|
|
|
427
659
|
access: limited
|
|
428
660
|
num_parameters: 13000000000
|
|
429
661
|
release_date: 2023-04-06
|
|
430
|
-
tags: [] # TODO: add tags
|
|
662
|
+
tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
|
|
431
663
|
|
|
432
664
|
|
|
433
665
|
|
|
@@ -644,7 +876,7 @@ models:
|
|
|
644
876
|
access: closed
|
|
645
877
|
num_parameters: 280000000000
|
|
646
878
|
release_date: 2021-12-08
|
|
647
|
-
tags: []
|
|
879
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
648
880
|
|
|
649
881
|
- name: deepmind/chinchilla # NOT SUPPORTED
|
|
650
882
|
display_name: Chinchilla (70B)
|
|
@@ -653,7 +885,7 @@ models:
|
|
|
653
885
|
access: closed
|
|
654
886
|
num_parameters: 70000000000
|
|
655
887
|
release_date: 2022-03-31
|
|
656
|
-
tags: []
|
|
888
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
657
889
|
|
|
658
890
|
|
|
659
891
|
# Deepseek
|
|
@@ -666,6 +898,62 @@ models:
|
|
|
666
898
|
release_date: 2024-01-05
|
|
667
899
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
668
900
|
|
|
901
|
+
- name: deepseek-ai/deepseek-v3
|
|
902
|
+
display_name: DeepSeek v3
|
|
903
|
+
description: DeepSeek v3 a Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. It adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures. ([paper](https://github.com/deepseek-ai/DeepSeek-V3/blob/main/DeepSeek_V3.pdf))
|
|
904
|
+
creator_organization_name: DeepSeek
|
|
905
|
+
access: open
|
|
906
|
+
# NOTE: The total size of DeepSeek-V3 models on HuggingFace is 685B, which includes 671B of the Main Model weights and 14B of the Multi-Token Prediction (MTP) Module weights.
|
|
907
|
+
num_parameters: 685000000000
|
|
908
|
+
release_date: 2024-12-24
|
|
909
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
910
|
+
|
|
911
|
+
- name: deepseek-ai/deepseek-r1
|
|
912
|
+
display_name: DeepSeek R1
|
|
913
|
+
description: DeepSeek R1 is DeepSeek's first-generation reasoning model which incoporates which incorporates multi-stage training and cold-start data before RL. ([paper](https://arxiv.org/abs/2501.12948))
|
|
914
|
+
creator_organization_name: DeepSeek
|
|
915
|
+
access: open
|
|
916
|
+
# NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
|
|
917
|
+
num_parameters: 685000000000
|
|
918
|
+
release_date: 2025-01-20
|
|
919
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
920
|
+
|
|
921
|
+
- name: deepseek-ai/deepseek-r1-hide-reasoning
|
|
922
|
+
display_name: DeepSeek R1 (hide reasoning)
|
|
923
|
+
description: DeepSeek R1 is DeepSeek's first-generation reasoning model which incoporates which incorporates multi-stage training and cold-start data before RL. ([paper](https://arxiv.org/abs/2501.12948)) The reasoning tokens are hidden from the output of the model.
|
|
924
|
+
creator_organization_name: DeepSeek
|
|
925
|
+
access: open
|
|
926
|
+
# NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
|
|
927
|
+
num_parameters: 685000000000
|
|
928
|
+
release_date: 2025-01-20
|
|
929
|
+
tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
930
|
+
|
|
931
|
+
- name: deepseek-ai/deepseek-r1-0528
|
|
932
|
+
display_name: DeepSeek-R1-0528
|
|
933
|
+
description: DeepSeek-R1-0528 is a minor version upgrade from DeepSeek R1 that has improved its depth of reasoning and inference capabilities by leveraging increased computational resources and introducing algorithmic optimization mechanisms during post-training. ([paper](https://arxiv.org/abs/2501.12948))
|
|
934
|
+
creator_organization_name: DeepSeek
|
|
935
|
+
access: open
|
|
936
|
+
num_parameters: 685000000000
|
|
937
|
+
release_date: 2025-05-28
|
|
938
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
939
|
+
|
|
940
|
+
- name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
|
|
941
|
+
display_name: DeepSeek-R1-Distill-Llama-8b
|
|
942
|
+
description: DeepSeek-R1-Distill-Llama-8b is a model that is distilled from LLaMA 8B model for the DeepSeek-R1 task.
|
|
943
|
+
creator_organization_name: DeepSeek
|
|
944
|
+
access: open
|
|
945
|
+
num_parameters: 8000000000
|
|
946
|
+
release_date: 2025-01-20
|
|
947
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
948
|
+
|
|
949
|
+
- name: deepseek-ai/deepseek-coder-6.7b-instruct
|
|
950
|
+
display_name: DeepSeek-Coder-6.7b-Instruct
|
|
951
|
+
description: DeepSeek-Coder-6.7b-Instruct is a model that is fine-tuned from the LLaMA 6.7B model for the DeepSeek-Coder task.
|
|
952
|
+
creator_organization_name: DeepSeek
|
|
953
|
+
access: open
|
|
954
|
+
num_parameters: 6740000000
|
|
955
|
+
release_date: 2025-01-20
|
|
956
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
669
957
|
|
|
670
958
|
# EleutherAI
|
|
671
959
|
- name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together.
|
|
@@ -769,7 +1057,7 @@ models:
|
|
|
769
1057
|
access: closed
|
|
770
1058
|
num_parameters: 540000000000
|
|
771
1059
|
release_date: 2023-03-01 # was first announced on 2022-04 but remained private.
|
|
772
|
-
tags: []
|
|
1060
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
773
1061
|
|
|
774
1062
|
# Note: This is aliased to a snapshot of gemini-pro. When possible, please use a versioned snapshot instead.
|
|
775
1063
|
- name: google/gemini-pro
|
|
@@ -819,7 +1107,7 @@ models:
|
|
|
819
1107
|
creator_organization_name: Google
|
|
820
1108
|
access: limited
|
|
821
1109
|
release_date: 2024-05-24
|
|
822
|
-
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1110
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
823
1111
|
|
|
824
1112
|
- name: google/gemini-1.5-flash-001
|
|
825
1113
|
display_name: Gemini 1.5 Flash (001)
|
|
@@ -827,7 +1115,7 @@ models:
|
|
|
827
1115
|
creator_organization_name: Google
|
|
828
1116
|
access: limited
|
|
829
1117
|
release_date: 2024-05-24
|
|
830
|
-
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1118
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
831
1119
|
|
|
832
1120
|
- name: google/gemini-1.5-pro-preview-0409
|
|
833
1121
|
display_name: Gemini 1.5 Pro (0409 preview)
|
|
@@ -885,6 +1173,142 @@ models:
|
|
|
885
1173
|
release_date: 2024-05-24
|
|
886
1174
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
887
1175
|
|
|
1176
|
+
- name: google/gemini-1.5-pro-002
|
|
1177
|
+
display_name: Gemini 1.5 Pro (002)
|
|
1178
|
+
description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
|
|
1179
|
+
creator_organization_name: Google
|
|
1180
|
+
access: limited
|
|
1181
|
+
release_date: 2024-09-24
|
|
1182
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1183
|
+
|
|
1184
|
+
- name: google/gemini-1.5-flash-002
|
|
1185
|
+
display_name: Gemini 1.5 Flash (002)
|
|
1186
|
+
description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
|
|
1187
|
+
creator_organization_name: Google
|
|
1188
|
+
access: limited
|
|
1189
|
+
release_date: 2024-09-24
|
|
1190
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1191
|
+
|
|
1192
|
+
- name: google/gemini-2.0-flash-exp
|
|
1193
|
+
display_name: Gemini 2.0 Flash (Experimental)
|
|
1194
|
+
description: Gemini 2.0 Flash (Experimental) is a Gemini model that supports multimodal inputs like images, video and audio, as well as multimodal output like natively generated images mixed with text and steerable text-to-speech (TTS) multilingual audio. ([blog](https://blog.google/technology/google-deepmind/google-gemini-ai-update-december-2024/#gemini-2-0-flash))
|
|
1195
|
+
creator_organization_name: Google
|
|
1196
|
+
access: limited
|
|
1197
|
+
release_date: 2024-12-11
|
|
1198
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1199
|
+
|
|
1200
|
+
- name: google/gemini-1.5-flash-8b-001
|
|
1201
|
+
display_name: Gemini 1.5 Flash 8B
|
|
1202
|
+
description: Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks. ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1203
|
+
creator_organization_name: Google
|
|
1204
|
+
access: limited
|
|
1205
|
+
release_date: 2024-10-01
|
|
1206
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1207
|
+
|
|
1208
|
+
- name: google/gemini-2.0-flash-001
|
|
1209
|
+
display_name: Gemini 2.0 Flash
|
|
1210
|
+
description: Gemini 2.0 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1211
|
+
creator_organization_name: Google
|
|
1212
|
+
access: limited
|
|
1213
|
+
release_date: 2025-02-01
|
|
1214
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1215
|
+
|
|
1216
|
+
- name: google/gemini-2.0-flash-lite-preview-02-05
|
|
1217
|
+
display_name: Gemini 2.0 Flash Lite (02-05 preview)
|
|
1218
|
+
description: Gemini 2.0 Flash Lite (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1219
|
+
creator_organization_name: Google
|
|
1220
|
+
access: limited
|
|
1221
|
+
release_date: 2025-02-05
|
|
1222
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1223
|
+
|
|
1224
|
+
- name: google/gemini-2.0-flash-lite-001
|
|
1225
|
+
display_name: Gemini 2.0 Flash Lite
|
|
1226
|
+
description: Gemini 2.0 Flash Lite ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1227
|
+
creator_organization_name: Google
|
|
1228
|
+
access: limited
|
|
1229
|
+
release_date: 2025-03-25
|
|
1230
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1231
|
+
|
|
1232
|
+
- name: google/gemini-2.0-flash-thinking-exp-01-21
|
|
1233
|
+
display_name: Gemini 2.0 Flash Thinking (01-21 preview)
|
|
1234
|
+
description: Gemini 2.0 Flash Thinking (01-21 preview) ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/thinking))
|
|
1235
|
+
creator_organization_name: Google
|
|
1236
|
+
access: limited
|
|
1237
|
+
release_date: 2025-01-21
|
|
1238
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1239
|
+
|
|
1240
|
+
- name: google/gemini-2.0-pro-exp-02-05
|
|
1241
|
+
display_name: Gemini 2.0 Pro (02-05 preview)
|
|
1242
|
+
description: Gemini 2.0 Pro (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1243
|
+
creator_organization_name: Google
|
|
1244
|
+
access: limited
|
|
1245
|
+
release_date: 2025-02-05
|
|
1246
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1247
|
+
|
|
1248
|
+
- name: google/gemini-2.5-flash-lite-preview-06-17
|
|
1249
|
+
display_name: Gemini 2.5 Flash-Lite (06-17 preview)
|
|
1250
|
+
description: Gemini 2.5 Flash-Lite (06-17 preview) ([blog](https://blog.google/products/gemini/gemini-2-5-model-family-expands/))
|
|
1251
|
+
creator_organization_name: Google
|
|
1252
|
+
access: limited
|
|
1253
|
+
release_date: 2025-06-17
|
|
1254
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1255
|
+
|
|
1256
|
+
- name: google/gemini-2.5-flash-preview-04-17
|
|
1257
|
+
display_name: Gemini 2.5 Flash (04-17 preview)
|
|
1258
|
+
description: Gemini 2.5 Flash (04-17 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1259
|
+
creator_organization_name: Google
|
|
1260
|
+
access: limited
|
|
1261
|
+
release_date: 2025-04-17
|
|
1262
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1263
|
+
|
|
1264
|
+
- name: google/gemini-2.5-flash-preview-05-20
|
|
1265
|
+
display_name: Gemini 2.5 Flash (05-20 preview)
|
|
1266
|
+
description: Gemini 2.5 Flash (05-20 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1267
|
+
creator_organization_name: Google
|
|
1268
|
+
access: limited
|
|
1269
|
+
release_date: 2025-04-17
|
|
1270
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1271
|
+
|
|
1272
|
+
- name: google/gemini-2.5-flash
|
|
1273
|
+
display_name: Gemini 2.5 Flash
|
|
1274
|
+
description: Gemini 2.5 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1275
|
+
creator_organization_name: Google
|
|
1276
|
+
access: limited
|
|
1277
|
+
release_date: 2025-06-17
|
|
1278
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1279
|
+
|
|
1280
|
+
- name: google/gemini-2.5-pro-exp-03-25
|
|
1281
|
+
display_name: Gemini 2.5 Pro (03-25 experimental)
|
|
1282
|
+
description: Gemini 2.5 Pro (03-25 experimental) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1283
|
+
creator_organization_name: Google
|
|
1284
|
+
access: limited
|
|
1285
|
+
release_date: 2025-03-25
|
|
1286
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1287
|
+
|
|
1288
|
+
- name: google/gemini-2.5-pro-preview-03-25
|
|
1289
|
+
display_name: Gemini 2.5 Pro (03-25 preview)
|
|
1290
|
+
description: Gemini 2.5 Pro (03-25 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1291
|
+
creator_organization_name: Google
|
|
1292
|
+
access: limited
|
|
1293
|
+
release_date: 2025-04-09 # source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
|
|
1294
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1295
|
+
|
|
1296
|
+
- name: google/gemini-2.5-pro-preview-05-06
|
|
1297
|
+
display_name: Gemini 2.5 Pro (05-06 preview)
|
|
1298
|
+
description: Gemini 2.5 Pro (05-06 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1299
|
+
creator_organization_name: Google
|
|
1300
|
+
access: limited
|
|
1301
|
+
release_date: 2025-05-06 # source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
|
|
1302
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1303
|
+
|
|
1304
|
+
- name: google/gemini-2.5-pro
|
|
1305
|
+
display_name: Gemini 2.5 Pro
|
|
1306
|
+
description: Gemini 2.5 Pro ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1307
|
+
creator_organization_name: Google
|
|
1308
|
+
access: limited
|
|
1309
|
+
release_date: 2025-06-17
|
|
1310
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1311
|
+
|
|
888
1312
|
- name: google/gemma-2b
|
|
889
1313
|
display_name: Gemma (2B)
|
|
890
1314
|
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
|
|
@@ -1083,6 +1507,60 @@ models:
|
|
|
1083
1507
|
release_date: 2023-08-22
|
|
1084
1508
|
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, IDEFICS_INSTRUCT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
1085
1509
|
|
|
1510
|
+
- name: huggingface/smollm2-135m
|
|
1511
|
+
display_name: SmolLM2 (135M)
|
|
1512
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1513
|
+
creator_organization_name: HuggingFace
|
|
1514
|
+
access: open
|
|
1515
|
+
num_parameters: 135000000
|
|
1516
|
+
release_date: 2024-10-31
|
|
1517
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1518
|
+
|
|
1519
|
+
- name: huggingface/smollm2-360m
|
|
1520
|
+
display_name: SmolLM2 (360M)
|
|
1521
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1522
|
+
creator_organization_name: HuggingFace
|
|
1523
|
+
access: open
|
|
1524
|
+
num_parameters: 362000000
|
|
1525
|
+
release_date: 2024-10-31
|
|
1526
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1527
|
+
|
|
1528
|
+
- name: huggingface/smollm2-1.7b
|
|
1529
|
+
display_name: SmolLM2 (1.7B)
|
|
1530
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1531
|
+
creator_organization_name: HuggingFace
|
|
1532
|
+
access: open
|
|
1533
|
+
num_parameters: 1710000000
|
|
1534
|
+
release_date: 2024-10-31
|
|
1535
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1536
|
+
|
|
1537
|
+
- name: huggingface/smollm2-135m-instruct
|
|
1538
|
+
display_name: SmolLM2 Instruct (135M)
|
|
1539
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1540
|
+
creator_organization_name: HuggingFace
|
|
1541
|
+
access: open
|
|
1542
|
+
num_parameters: 135000000
|
|
1543
|
+
release_date: 2024-10-31
|
|
1544
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1545
|
+
|
|
1546
|
+
- name: huggingface/smollm2-360m-instruct
|
|
1547
|
+
display_name: SmolLM2 Instruct (360M)
|
|
1548
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1549
|
+
creator_organization_name: HuggingFace
|
|
1550
|
+
access: open
|
|
1551
|
+
num_parameters: 362000000
|
|
1552
|
+
release_date: 2024-10-31
|
|
1553
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1554
|
+
|
|
1555
|
+
- name: huggingface/smollm2-1.7b-instruct
|
|
1556
|
+
display_name: SmolLM2 Instruct (1.7B)
|
|
1557
|
+
description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
|
|
1558
|
+
creator_organization_name: HuggingFace
|
|
1559
|
+
access: open
|
|
1560
|
+
num_parameters: 1710000000
|
|
1561
|
+
release_date: 2024-10-31
|
|
1562
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1563
|
+
|
|
1086
1564
|
## Text-to-Image Diffusion Models
|
|
1087
1565
|
- name: huggingface/dreamlike-diffusion-v1-0
|
|
1088
1566
|
display_name: Dreamlike Diffusion v1.0 (1B)
|
|
@@ -1296,6 +1774,16 @@ models:
|
|
|
1296
1774
|
release_date: 2023-06-22
|
|
1297
1775
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1298
1776
|
|
|
1777
|
+
# Marin Community
|
|
1778
|
+
- name: marin-community/marin-8b-instruct
|
|
1779
|
+
display_name: Marin 8B Instruct
|
|
1780
|
+
description: Marin 8B Instruct is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT.
|
|
1781
|
+
creator_organization_name: Marin Community
|
|
1782
|
+
access: open
|
|
1783
|
+
num_parameters: 8030000000
|
|
1784
|
+
release_date: 2025-05-15
|
|
1785
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1786
|
+
|
|
1299
1787
|
# Meta
|
|
1300
1788
|
- name: meta/opt-iml-175b # NOT SUPPORTED
|
|
1301
1789
|
display_name: OPT-IML (175B)
|
|
@@ -1304,7 +1792,7 @@ models:
|
|
|
1304
1792
|
access: open
|
|
1305
1793
|
num_parameters: 175000000000
|
|
1306
1794
|
release_date: 2022-12-22
|
|
1307
|
-
tags: []
|
|
1795
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
1308
1796
|
|
|
1309
1797
|
- name: meta/opt-iml-30b # NOT SUPPORTED
|
|
1310
1798
|
display_name: OPT-IML (30B)
|
|
@@ -1313,7 +1801,7 @@ models:
|
|
|
1313
1801
|
access: open
|
|
1314
1802
|
num_parameters: 30000000000
|
|
1315
1803
|
release_date: 2022-12-22
|
|
1316
|
-
tags: []
|
|
1804
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
1317
1805
|
|
|
1318
1806
|
- name: meta/opt-175b
|
|
1319
1807
|
display_name: OPT (175B)
|
|
@@ -1360,7 +1848,7 @@ models:
|
|
|
1360
1848
|
access: open
|
|
1361
1849
|
num_parameters: 120000000000
|
|
1362
1850
|
release_date: 2022-11-15
|
|
1363
|
-
tags: []
|
|
1851
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
1364
1852
|
|
|
1365
1853
|
- name: meta/galactica-30b # NOT SUPPORTED
|
|
1366
1854
|
display_name: Galactica (30B)
|
|
@@ -1369,7 +1857,7 @@ models:
|
|
|
1369
1857
|
access: open
|
|
1370
1858
|
num_parameters: 30000000000
|
|
1371
1859
|
release_date: 2022-11-15
|
|
1372
|
-
tags: []
|
|
1860
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
1373
1861
|
|
|
1374
1862
|
- name: meta/llama-7b
|
|
1375
1863
|
display_name: LLaMA (7B)
|
|
@@ -1490,6 +1978,33 @@ models:
|
|
|
1490
1978
|
release_date: 2024-07-18
|
|
1491
1979
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1492
1980
|
|
|
1981
|
+
- name: meta/llama-3.1-8b-instruct
|
|
1982
|
+
display_name: Llama 3.1 Instruct (8B)
|
|
1983
|
+
description: Llama 3.1 (8B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
|
|
1984
|
+
creator_organization_name: Meta
|
|
1985
|
+
access: open
|
|
1986
|
+
num_parameters: 8000000000
|
|
1987
|
+
release_date: 2024-07-23
|
|
1988
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1989
|
+
|
|
1990
|
+
- name: meta/llama-3.1-70b-instruct
|
|
1991
|
+
display_name: Llama 3.1 Instruct (70B)
|
|
1992
|
+
description: Llama 3.1 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
|
|
1993
|
+
creator_organization_name: Meta
|
|
1994
|
+
access: open
|
|
1995
|
+
num_parameters: 70000000000
|
|
1996
|
+
release_date: 2024-07-23
|
|
1997
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1998
|
+
|
|
1999
|
+
- name: meta/llama-3.1-405b-instruct
|
|
2000
|
+
display_name: Llama 3.1 Instruct (405B)
|
|
2001
|
+
description: Llama 3.1 (405B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
|
|
2002
|
+
creator_organization_name: Meta
|
|
2003
|
+
access: open
|
|
2004
|
+
num_parameters: 405000000000
|
|
2005
|
+
release_date: 2024-07-23
|
|
2006
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2007
|
+
|
|
1493
2008
|
- name: meta/llama-3.1-8b-instruct-turbo
|
|
1494
2009
|
display_name: Llama 3.1 Instruct Turbo (8B)
|
|
1495
2010
|
description: Llama 3.1 (8B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
|
|
@@ -1517,6 +2032,15 @@ models:
|
|
|
1517
2032
|
release_date: 2024-07-23
|
|
1518
2033
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1519
2034
|
|
|
2035
|
+
- name: meta/llama-3.2-1b-instruct
|
|
2036
|
+
display_name: Llama 3.2 Instruct (1.23B)
|
|
2037
|
+
description: The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned text-only generative models in 1B and 3B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/))
|
|
2038
|
+
creator_organization_name: Meta
|
|
2039
|
+
access: open
|
|
2040
|
+
num_parameters: 1230000000
|
|
2041
|
+
release_date: 2024-09-25
|
|
2042
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2043
|
+
|
|
1520
2044
|
- name: meta/llama-3.2-3b-instruct-turbo
|
|
1521
2045
|
display_name: Llama 3.2 Instruct Turbo (3B)
|
|
1522
2046
|
description: The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned text-only generative models in 1B and 3B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
|
|
@@ -1533,7 +2057,7 @@ models:
|
|
|
1533
2057
|
access: open
|
|
1534
2058
|
num_parameters: 10700000000
|
|
1535
2059
|
release_date: 2024-09-25
|
|
1536
|
-
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG
|
|
2060
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1537
2061
|
|
|
1538
2062
|
- name: meta/llama-3.2-90b-vision-instruct-turbo
|
|
1539
2063
|
display_name: Llama 3.2 Vision Instruct Turbo (90B)
|
|
@@ -1542,7 +2066,43 @@ models:
|
|
|
1542
2066
|
access: open
|
|
1543
2067
|
num_parameters: 88600000000
|
|
1544
2068
|
release_date: 2024-09-25
|
|
1545
|
-
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG
|
|
2069
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2070
|
+
|
|
2071
|
+
- name: meta/llama-3.3-70b-instruct-turbo
|
|
2072
|
+
display_name: Llama 3.3 Instruct Turbo (70B)
|
|
2073
|
+
description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
|
|
2074
|
+
creator_organization_name: Meta
|
|
2075
|
+
access: open
|
|
2076
|
+
num_parameters: 70000000000
|
|
2077
|
+
release_date: 2024-12-06
|
|
2078
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2079
|
+
|
|
2080
|
+
- name: meta/llama-3.3-70b-instruct
|
|
2081
|
+
display_name: Llama 3.3 Instruct (70B)
|
|
2082
|
+
description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
|
|
2083
|
+
creator_organization_name: Meta
|
|
2084
|
+
access: open
|
|
2085
|
+
num_parameters: 70000000000
|
|
2086
|
+
release_date: 2024-12-06
|
|
2087
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2088
|
+
|
|
2089
|
+
- name: meta/llama-4-scout-17b-16e-instruct
|
|
2090
|
+
display_name: Llama 4 Scout (17Bx16E) Instruct
|
|
2091
|
+
description: Llama 4 Scout (17Bx16E) Instruct is part of the Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences using a mixture-of-experts architecture. ([blog](https://ai.meta.com/blog/llama-4-multimodal-intelligence/))
|
|
2092
|
+
creator_organization_name: Meta
|
|
2093
|
+
access: open
|
|
2094
|
+
num_parameters: 109000000000
|
|
2095
|
+
release_date: 2025-04-05
|
|
2096
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2097
|
+
|
|
2098
|
+
- name: meta/llama-4-maverick-17b-128e-instruct-fp8
|
|
2099
|
+
display_name: Llama 4 Maverick (17Bx128E) Instruct FP8
|
|
2100
|
+
description: Llama 4 Maverick (17Bx128E) Instruct FP8 is part of the Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences using a mixture-of-experts architecture. ([blog](https://ai.meta.com/blog/llama-4-multimodal-intelligence/))
|
|
2101
|
+
creator_organization_name: Meta
|
|
2102
|
+
access: open
|
|
2103
|
+
num_parameters: 402000000000
|
|
2104
|
+
release_date: 2025-04-05
|
|
2105
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1546
2106
|
|
|
1547
2107
|
- name: meta/llama-3-8b-chat
|
|
1548
2108
|
display_name: Llama 3 Instruct (8B)
|
|
@@ -1698,10 +2258,28 @@ models:
|
|
|
1698
2258
|
num_parameters: 14000000000
|
|
1699
2259
|
release_date: 2024-05-21
|
|
1700
2260
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
2261
|
+
|
|
2262
|
+
- name: microsoft/phi-3.5-mini-instruct
|
|
2263
|
+
display_name: Phi-3.5-mini-instruct (3.8B)
|
|
2264
|
+
description: Phi-3.5-mini is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available websites. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/discover-the-new-multi-lingual-high-quality-phi-3-5-slms/4225280))
|
|
2265
|
+
creator_organization_name: Microsoft
|
|
2266
|
+
access: open
|
|
2267
|
+
num_parameters: 3800000000
|
|
2268
|
+
release_date: 2024-08-22
|
|
2269
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2270
|
+
|
|
2271
|
+
- name: microsoft/phi-3.5-moe-instruct
|
|
2272
|
+
display_name: Phi-3.5 MoE
|
|
2273
|
+
description: Phi-3.5 MoE is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available documents - with a focus on very high-quality, reasoning dense data. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/discover-the-new-multi-lingual-high-quality-phi-3-5-slms/4225280))
|
|
2274
|
+
creator_organization_name: Microsoft
|
|
2275
|
+
access: open
|
|
2276
|
+
num_parameters: 41900000000
|
|
2277
|
+
release_date: 2024-08-22
|
|
2278
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2279
|
+
|
|
2280
|
+
# KAIST AI
|
|
2281
|
+
- name: kaistai/prometheus-vision-13b-v1.0-hf
|
|
2282
|
+
display_name: LLaVA + Vicuna-v1.5 (13B)
|
|
1705
2283
|
description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
|
|
1706
2284
|
creator_organization_name: KAIST AI
|
|
1707
2285
|
access: open
|
|
@@ -1801,6 +2379,42 @@ models:
|
|
|
1801
2379
|
release_date: 2024-04-17
|
|
1802
2380
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1803
2381
|
|
|
2382
|
+
- name: allenai/olmo-2-1124-7b-instruct
|
|
2383
|
+
display_name: OLMo 2 7B Instruct November 2024
|
|
2384
|
+
description: OLMo 2 is a family of 7B and 13B models trained on up to 5T tokens. ([blog](https://allenai.org/blog/olmo2))
|
|
2385
|
+
creator_organization_name: Allen Institute for AI
|
|
2386
|
+
access: open
|
|
2387
|
+
num_parameters: 7300000000
|
|
2388
|
+
release_date: 2024-11-26
|
|
2389
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2390
|
+
|
|
2391
|
+
- name: allenai/olmo-2-1124-13b-instruct
|
|
2392
|
+
display_name: OLMo 2 13B Instruct November 2024
|
|
2393
|
+
description: OLMo 2 is a family of 7B and 13B models trained on up to 5T tokens. ([blog](https://allenai.org/blog/olmo2))
|
|
2394
|
+
creator_organization_name: Allen Institute for AI
|
|
2395
|
+
access: open
|
|
2396
|
+
num_parameters: 13700000000
|
|
2397
|
+
release_date: 2024-11-26
|
|
2398
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2399
|
+
|
|
2400
|
+
- name: allenai/olmo-2-0325-32b-instruct
|
|
2401
|
+
display_name: OLMo 2 32B Instruct March 2025
|
|
2402
|
+
description: OLMo 2 32B Instruct March 2025 is trained up to 6T tokens and post-trained using Tulu 3.1. ([blog](https://allenai.org/blog/olmo2-32B))
|
|
2403
|
+
creator_organization_name: Allen Institute for AI
|
|
2404
|
+
access: open
|
|
2405
|
+
num_parameters: 32200000000
|
|
2406
|
+
release_date: 2025-03-13
|
|
2407
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2408
|
+
|
|
2409
|
+
- name: allenai/olmoe-1b-7b-0125-instruct
|
|
2410
|
+
display_name: OLMoE 1B-7B Instruct January 2025
|
|
2411
|
+
description: OLMoE 1B-7B Instruct January 2025 is a fully open language model leveraging sparse Mixture-of-Experts (MoE). It has 7B parameters but uses only 1B per input token. It was pretrained on 5T tokens. ([blog](https://allenai.org/blog/olmoe-an-open-small-and-state-of-the-art-mixture-of-experts-model-c258432d0514), [paper](https://arxiv.org/abs/2409.02060))
|
|
2412
|
+
creator_organization_name: Allen Institute for AI
|
|
2413
|
+
access: open
|
|
2414
|
+
num_parameters: 32200000000
|
|
2415
|
+
release_date: 2025-03-13
|
|
2416
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2417
|
+
|
|
1804
2418
|
# Mistral AI
|
|
1805
2419
|
- name: mistralai/mistral-7b-v0.1
|
|
1806
2420
|
display_name: Mistral v0.1 (7B)
|
|
@@ -1837,6 +2451,15 @@ models:
|
|
|
1837
2451
|
num_parameters: 7300000000
|
|
1838
2452
|
release_date: 2024-05-22
|
|
1839
2453
|
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2454
|
+
|
|
2455
|
+
- name: mistralai/mistral-7b-instruct-v0.3-hf
|
|
2456
|
+
display_name: Mistral Instruct v0.3 (7B)
|
|
2457
|
+
description: Mistral v0.3 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
|
|
2458
|
+
creator_organization_name: Mistral AI
|
|
2459
|
+
access: open
|
|
2460
|
+
num_parameters: 7300000000
|
|
2461
|
+
release_date: 2024-05-22
|
|
2462
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1840
2463
|
|
|
1841
2464
|
- name: mistralai/mixtral-8x7b-32kseqlen
|
|
1842
2465
|
display_name: Mixtral (8x7B 32K seqlen)
|
|
@@ -1884,6 +2507,22 @@ models:
|
|
|
1884
2507
|
release_date: 2023-10-16
|
|
1885
2508
|
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1886
2509
|
|
|
2510
|
+
- name: mistralai/ministral-3b-2410
|
|
2511
|
+
display_name: Ministral 3B (2402)
|
|
2512
|
+
description: Ministral 3B (2402) is a model for on-device computing and at-the-edge use cases ([blog](https://mistral.ai/news/ministraux/)).
|
|
2513
|
+
creator_organization_name: Mistral AI
|
|
2514
|
+
access: limited
|
|
2515
|
+
release_date: 2024-10-16
|
|
2516
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2517
|
+
|
|
2518
|
+
- name: mistralai/ministral-8b-2410
|
|
2519
|
+
display_name: Ministral 8B (2402)
|
|
2520
|
+
description: Ministral 8B (2402) is a model for on-device computing and at-the-edge use cases a special interleaved sliding-window attention pattern for faster and memory-efficient inference ([blog](https://mistral.ai/news/ministraux/)).
|
|
2521
|
+
creator_organization_name: Mistral AI
|
|
2522
|
+
access: open
|
|
2523
|
+
release_date: 2024-10-16
|
|
2524
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2525
|
+
|
|
1887
2526
|
- name: mistralai/mistral-small-2402
|
|
1888
2527
|
display_name: Mistral Small (2402)
|
|
1889
2528
|
description: Mistral Small is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
|
|
@@ -1892,6 +2531,32 @@ models:
|
|
|
1892
2531
|
release_date: 2023-02-26
|
|
1893
2532
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1894
2533
|
|
|
2534
|
+
- name: mistralai/mistral-small-2409
|
|
2535
|
+
display_name: Mistral Small (2409)
|
|
2536
|
+
description: Mistral Small is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
|
|
2537
|
+
creator_organization_name: Mistral AI
|
|
2538
|
+
access: limited
|
|
2539
|
+
release_date: 2024-09-18
|
|
2540
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2541
|
+
|
|
2542
|
+
- name: mistralai/mistral-small-2501
|
|
2543
|
+
display_name: Mistral Small 3 (2501)
|
|
2544
|
+
description: Mistral Small 3 (2501) is a pre-trained and instructed model catered to the '80%' of generative AI tasks—those that require robust language and instruction following performance, with very low latency. ([blog](https://mistral.ai/news/mistral-small-3/))
|
|
2545
|
+
creator_organization_name: Mistral AI
|
|
2546
|
+
access: open
|
|
2547
|
+
num_parameters: 23600000000
|
|
2548
|
+
release_date: 2025-01-30
|
|
2549
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2550
|
+
|
|
2551
|
+
- name: mistralai/mistral-small-2503
|
|
2552
|
+
display_name: Mistral Small 3.1 (2503)
|
|
2553
|
+
description: Mistral Small 3.1 (2503) is a model with improved text performance, multimodal understanding, and an expanded context window of up to 128k tokens. ([blog](https://mistral.ai/news/mistral-small-3-1))
|
|
2554
|
+
creator_organization_name: Mistral AI
|
|
2555
|
+
access: open
|
|
2556
|
+
num_parameters: 23600000000
|
|
2557
|
+
release_date: 2025-03-17
|
|
2558
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2559
|
+
|
|
1895
2560
|
- name: mistralai/mistral-medium-2312
|
|
1896
2561
|
display_name: Mistral Medium (2312)
|
|
1897
2562
|
description: Mistral is a transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA).
|
|
@@ -1900,6 +2565,14 @@ models:
|
|
|
1900
2565
|
release_date: 2023-12-11
|
|
1901
2566
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1902
2567
|
|
|
2568
|
+
- name: mistralai/mistral-medium-2505
|
|
2569
|
+
display_name: Mistral Medium 3 (2505)
|
|
2570
|
+
description: Mistral Medium 3 (2505) is a language model that is intended to to deliver state-of-the-art performance at lower cost. ([blog](https://mistral.ai/news/mistral-medium-3))
|
|
2571
|
+
creator_organization_name: Mistral AI
|
|
2572
|
+
access: limited
|
|
2573
|
+
release_date: 2025-05-07
|
|
2574
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2575
|
+
|
|
1903
2576
|
- name: mistralai/mistral-large-2402
|
|
1904
2577
|
display_name: Mistral Large (2402)
|
|
1905
2578
|
description: Mistral Large is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
|
|
@@ -1917,6 +2590,15 @@ models:
|
|
|
1917
2590
|
release_date: 2023-07-24
|
|
1918
2591
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1919
2592
|
|
|
2593
|
+
- name: mistralai/mistral-large-2411
|
|
2594
|
+
display_name: Mistral Large (2411)
|
|
2595
|
+
description: Mistral Large (2411) is a 123B parameter model that has a 128k context window. ([blog](https://mistral.ai/news/pixtral-large/))
|
|
2596
|
+
creator_organization_name: Mistral AI
|
|
2597
|
+
access: open
|
|
2598
|
+
num_parameters: 123000000000
|
|
2599
|
+
release_date: 2024-11-18
|
|
2600
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2601
|
+
|
|
1920
2602
|
- name: mistralai/open-mistral-nemo-2407
|
|
1921
2603
|
display_name: Mistral NeMo (2402)
|
|
1922
2604
|
description: Mistral NeMo is a multilingual 12B model with a large context window of 128K tokens. ([blog](https://mistral.ai/news/mistral-nemo/))
|
|
@@ -1925,6 +2607,24 @@ models:
|
|
|
1925
2607
|
release_date: 2024-07-18
|
|
1926
2608
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1927
2609
|
|
|
2610
|
+
- name: mistralai/pixtral-12b-2409
|
|
2611
|
+
display_name: Mistral Pixtral (2409)
|
|
2612
|
+
description: Mistral Pixtral 12B is the first multimodal Mistral model for image understanding. ([blog](https://mistral.ai/news/pixtral-12b/))
|
|
2613
|
+
creator_organization_name: Mistral AI
|
|
2614
|
+
access: open
|
|
2615
|
+
release_date: 2024-09-17
|
|
2616
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2617
|
+
|
|
2618
|
+
- name: mistralai/pixtral-large-2411
|
|
2619
|
+
display_name: Mistral Pixtral Large (2411)
|
|
2620
|
+
description: Mistral Pixtral Large is a 124B open-weights multimodal model built on top of Mistral Large 2 (2407). ([blog](https://mistral.ai/news/pixtral-large/))
|
|
2621
|
+
creator_organization_name: Mistral AI
|
|
2622
|
+
access: open
|
|
2623
|
+
num_parameters: 124000000000
|
|
2624
|
+
release_date: 2024-11-18
|
|
2625
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2626
|
+
|
|
2627
|
+
|
|
1928
2628
|
# MosaicML
|
|
1929
2629
|
- name: mosaicml/mpt-7b
|
|
1930
2630
|
display_name: MPT (7B)
|
|
@@ -1942,7 +2642,7 @@ models:
|
|
|
1942
2642
|
access: open
|
|
1943
2643
|
num_parameters: 6700000000
|
|
1944
2644
|
release_date: 2023-05-05
|
|
1945
|
-
tags: []
|
|
2645
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
1946
2646
|
|
|
1947
2647
|
- name: mosaicml/mpt-instruct-7b
|
|
1948
2648
|
display_name: MPT-Instruct (7B)
|
|
@@ -1969,7 +2669,7 @@ models:
|
|
|
1969
2669
|
access: open
|
|
1970
2670
|
num_parameters: 30000000000
|
|
1971
2671
|
release_date: 2023-06-22
|
|
1972
|
-
tags: []
|
|
2672
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
1973
2673
|
|
|
1974
2674
|
- name: mosaicml/mpt-instruct-30b
|
|
1975
2675
|
display_name: MPT-Instruct (30B)
|
|
@@ -1981,6 +2681,27 @@ models:
|
|
|
1981
2681
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1982
2682
|
|
|
1983
2683
|
|
|
2684
|
+
|
|
2685
|
+
# NECTEC
|
|
2686
|
+
- name: nectec/Pathumma-llm-text-1.0.0
|
|
2687
|
+
display_name: Pathumma-llm-text-1.0.0 (7B)
|
|
2688
|
+
description: Pathumma-llm-text-1.0.0 (7B) is a instruction model from OpenThaiLLM-Prebuilt-7B ([blog](https://medium.com/nectec/pathummallm-v-1-0-0-release-6a098ddfe276))
|
|
2689
|
+
creator_organization_name: nectec
|
|
2690
|
+
access: open
|
|
2691
|
+
num_parameters: 7620000000
|
|
2692
|
+
release_date: 2024-10-28
|
|
2693
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2694
|
+
|
|
2695
|
+
- name: nectec/OpenThaiLLM-Prebuilt-7B
|
|
2696
|
+
display_name: OpenThaiLLM-Prebuilt-7B (7B)
|
|
2697
|
+
description: OpenThaiLLM-Prebuilt-7B (7B) is a pretrained Thai large language model with 7 billion parameters based on Qwen2.5-7B.
|
|
2698
|
+
creator_organization_name: nectec
|
|
2699
|
+
access: open
|
|
2700
|
+
num_parameters: 7620000000
|
|
2701
|
+
release_date: 2024-10-28
|
|
2702
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2703
|
+
|
|
2704
|
+
|
|
1984
2705
|
|
|
1985
2706
|
# Neurips
|
|
1986
2707
|
- name: neurips/local
|
|
@@ -2010,6 +2731,16 @@ models:
|
|
|
2010
2731
|
release_date: 2024-06-17
|
|
2011
2732
|
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2012
2733
|
|
|
2734
|
+
- name: nvidia/llama-3.1-nemotron-70b-instruct
|
|
2735
|
+
display_name: Llama 3.1 Nemotron Instruct (70B)
|
|
2736
|
+
description: Llama-3.1-Nemotron-70B-Instruct is a large language model customized by NVIDIA to improve the helpfulness of LLM generated responses to user queries. It was trained using RLHF (specifically, REINFORCE), Llama-3.1-Nemotron-70B-Reward and HelpSteer2-Preference prompts on a Llama-3.1-70B-Instruct model. ([paper](https://arxiv.org/abs/2410.01257))
|
|
2737
|
+
creator_organization_name: NVIDIA
|
|
2738
|
+
access: open
|
|
2739
|
+
num_parameters: 70000000000
|
|
2740
|
+
release_date: 2024-10-02
|
|
2741
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2742
|
+
|
|
2743
|
+
|
|
2013
2744
|
# OpenAI
|
|
2014
2745
|
|
|
2015
2746
|
## GPT 2 Models
|
|
@@ -2194,7 +2925,7 @@ models:
|
|
|
2194
2925
|
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2195
2926
|
|
|
2196
2927
|
|
|
2197
|
-
## GPT 4
|
|
2928
|
+
## GPT-4 and GPT-4 Turbo
|
|
2198
2929
|
|
|
2199
2930
|
- name: openai/gpt-4-1106-preview
|
|
2200
2931
|
display_name: GPT-4 Turbo (1106 preview)
|
|
@@ -2246,6 +2977,8 @@ models:
|
|
|
2246
2977
|
release_date: 2024-01-25
|
|
2247
2978
|
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2248
2979
|
|
|
2980
|
+
## GPT-4o
|
|
2981
|
+
|
|
2249
2982
|
- name: openai/gpt-4-turbo-2024-04-09
|
|
2250
2983
|
display_name: GPT-4 Turbo (2024-04-09)
|
|
2251
2984
|
description: GPT-4 Turbo (2024-04-09) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Snapshot from 2024-04-09.
|
|
@@ -2270,6 +3003,14 @@ models:
|
|
|
2270
3003
|
release_date: 2024-08-06
|
|
2271
3004
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2272
3005
|
|
|
3006
|
+
- name: openai/gpt-4o-2024-11-20
|
|
3007
|
+
display_name: GPT-4o (2024-11-20)
|
|
3008
|
+
description: GPT-4o (2024-11-20) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/introducing-structured-outputs-in-the-api/))
|
|
3009
|
+
creator_organization_name: OpenAI
|
|
3010
|
+
access: limited
|
|
3011
|
+
release_date: 2024-11-20
|
|
3012
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3013
|
+
|
|
2273
3014
|
- name: openai/gpt-4o-mini-2024-07-18
|
|
2274
3015
|
display_name: GPT-4o mini (2024-07-18)
|
|
2275
3016
|
description: GPT-4o mini (2024-07-18) is a multimodal model with a context window of 128K tokens and improved handling of non-English text. ([blog](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/))
|
|
@@ -2278,6 +3019,80 @@ models:
|
|
|
2278
3019
|
release_date: 2024-07-18
|
|
2279
3020
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2280
3021
|
|
|
3022
|
+
- name: openai/gpt-4.1-2025-04-14
|
|
3023
|
+
display_name: GPT-4.1 (2025-04-14)
|
|
3024
|
+
description: GPT-4.1 (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
|
|
3025
|
+
creator_organization_name: OpenAI
|
|
3026
|
+
access: limited
|
|
3027
|
+
release_date: 2025-04-14
|
|
3028
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3029
|
+
|
|
3030
|
+
- name: openai/gpt-4.1-mini-2025-04-14
|
|
3031
|
+
display_name: GPT-4.1 mini (2025-04-14)
|
|
3032
|
+
description: GPT-4.1 mini (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
|
|
3033
|
+
creator_organization_name: OpenAI
|
|
3034
|
+
access: limited
|
|
3035
|
+
release_date: 2025-04-14
|
|
3036
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3037
|
+
|
|
3038
|
+
- name: openai/gpt-4.1-nano-2025-04-14
|
|
3039
|
+
display_name: GPT-4.1 nano (2025-04-14)
|
|
3040
|
+
description: GPT-4.1 nano (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
|
|
3041
|
+
creator_organization_name: OpenAI
|
|
3042
|
+
access: limited
|
|
3043
|
+
release_date: 2025-04-14
|
|
3044
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3045
|
+
|
|
3046
|
+
- name: openai/whisper-1_gpt-4o-2024-11-20
|
|
3047
|
+
display_name: Whisper-1 + GPT-4o (2024-11-20)
|
|
3048
|
+
description: Transcribes the text with Whisper-1 and then uses GPT-4o to generate a response.
|
|
3049
|
+
creator_organization_name: OpenAI
|
|
3050
|
+
access: limited
|
|
3051
|
+
release_date: 2024-11-20
|
|
3052
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
|
|
3053
|
+
|
|
3054
|
+
- name: openai/gpt-4o-transcribe_gpt-4o-2024-11-20
|
|
3055
|
+
display_name: GPT-4o Transcribe + GPT-4o (2024-11-20)
|
|
3056
|
+
description: Transcribes the text with GPT-4o Transcribe and then uses GPT-4o to generate a response.
|
|
3057
|
+
creator_organization_name: OpenAI
|
|
3058
|
+
access: limited
|
|
3059
|
+
release_date: 2025-03-20
|
|
3060
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
|
|
3061
|
+
|
|
3062
|
+
- name: openai/gpt-4o-mini-transcribe_gpt-4o-2024-11-20
|
|
3063
|
+
display_name: GPT-4o mini Transcribe + GPT-4o (2024-11-20)
|
|
3064
|
+
description: Transcribes the text with GPT-4o mini Transcribe and then uses GPT-4o to generate a response.
|
|
3065
|
+
creator_organization_name: OpenAI
|
|
3066
|
+
access: limited
|
|
3067
|
+
release_date: 2025-03-20
|
|
3068
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
|
|
3069
|
+
|
|
3070
|
+
- name: openai/gpt-4o-audio-preview-2024-10-01
|
|
3071
|
+
display_name: GPT-4o Audio (Preview 2024-10-01)
|
|
3072
|
+
description: GPT-4o Audio (Preview 2024-10-01) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
|
|
3073
|
+
creator_organization_name: OpenAI
|
|
3074
|
+
access: limited
|
|
3075
|
+
release_date: 2024-10-01
|
|
3076
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3077
|
+
|
|
3078
|
+
- name: openai/gpt-4o-audio-preview-2024-12-17
|
|
3079
|
+
display_name: GPT-4o Audio (Preview 2024-12-17)
|
|
3080
|
+
description: GPT-4o Audio (Preview 2024-12-17) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
|
|
3081
|
+
creator_organization_name: OpenAI
|
|
3082
|
+
access: limited
|
|
3083
|
+
release_date: 2024-12-17
|
|
3084
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3085
|
+
|
|
3086
|
+
- name: openai/gpt-4o-mini-audio-preview-2024-12-17
|
|
3087
|
+
display_name: GPT-4o mini Audio (Preview 2024-12-17)
|
|
3088
|
+
description: GPT-4o mini Audio (Preview 2024-12-17) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
|
|
3089
|
+
creator_organization_name: OpenAI
|
|
3090
|
+
access: limited
|
|
3091
|
+
release_date: 2024-12-17
|
|
3092
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3093
|
+
|
|
3094
|
+
# GPT-4V
|
|
3095
|
+
|
|
2281
3096
|
- name: openai/gpt-4-vision-preview
|
|
2282
3097
|
# According to https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4, this model has pointed gpt-4-1106-vision-preview.
|
|
2283
3098
|
display_name: GPT-4V (1106 preview)
|
|
@@ -2295,7 +3110,64 @@ models:
|
|
|
2295
3110
|
release_date: 2023-11-06
|
|
2296
3111
|
tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
2297
3112
|
|
|
3113
|
+
## GPT-4.5
|
|
3114
|
+
- name: openai/gpt-4.5-preview-2025-02-27
|
|
3115
|
+
display_name: GPT-4.5 (2025-02-27 preview)
|
|
3116
|
+
description: GPT-4.5 (2025-02-27 preview) is a large multimodal model that is designed to be more general-purpose than OpenAI's STEM-focused reasoning models. It was trained using new supervision techniques combined with traditional methods like supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF). ([blog](https://openai.com/index/introducing-gpt-4-5/), [system card](https://openai.com/index/gpt-4-5-system-card/))
|
|
3117
|
+
creator_organization_name: OpenAI
|
|
3118
|
+
access: limited
|
|
3119
|
+
release_date: 2025-02-27
|
|
3120
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3121
|
+
|
|
2298
3122
|
## o1 Models
|
|
3123
|
+
- name: openai/o1-pro-2025-03-19
|
|
3124
|
+
display_name: o1 pro (2025-03-19)
|
|
3125
|
+
description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
|
|
3126
|
+
creator_organization_name: OpenAI
|
|
3127
|
+
access: limited
|
|
3128
|
+
release_date: 2025-03-19
|
|
3129
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3130
|
+
|
|
3131
|
+
- name: openai/o1-pro-2025-03-19-low-reasoning-effort
|
|
3132
|
+
display_name: o1 pro (2025-03-19, low reasoning effort)
|
|
3133
|
+
description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to low.
|
|
3134
|
+
creator_organization_name: OpenAI
|
|
3135
|
+
access: limited
|
|
3136
|
+
release_date: 2025-03-19
|
|
3137
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3138
|
+
|
|
3139
|
+
- name: openai/o1-pro-2025-03-19-high-reasoning-effort
|
|
3140
|
+
display_name: o1 pro (2025-03-19, high reasoning effort)
|
|
3141
|
+
description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to high.
|
|
3142
|
+
creator_organization_name: OpenAI
|
|
3143
|
+
access: limited
|
|
3144
|
+
release_date: 2025-03-19
|
|
3145
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3146
|
+
|
|
3147
|
+
- name: openai/o1-2024-12-17
|
|
3148
|
+
display_name: o1 (2024-12-17)
|
|
3149
|
+
description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
|
|
3150
|
+
creator_organization_name: OpenAI
|
|
3151
|
+
access: limited
|
|
3152
|
+
release_date: 2024-12-17
|
|
3153
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3154
|
+
|
|
3155
|
+
- name: openai/o1-2024-12-17-low-reasoning-effort
|
|
3156
|
+
display_name: o1 (2024-12-17, low reasoning effort)
|
|
3157
|
+
description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to low.
|
|
3158
|
+
creator_organization_name: OpenAI
|
|
3159
|
+
access: limited
|
|
3160
|
+
release_date: 2024-12-17
|
|
3161
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3162
|
+
|
|
3163
|
+
- name: openai/o1-2024-12-17-high-reasoning-effort
|
|
3164
|
+
display_name: o1 (2024-12-17, high reasoning effort)
|
|
3165
|
+
description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to high.
|
|
3166
|
+
creator_organization_name: OpenAI
|
|
3167
|
+
access: limited
|
|
3168
|
+
release_date: 2024-12-17
|
|
3169
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3170
|
+
|
|
2299
3171
|
- name: openai/o1-preview-2024-09-12
|
|
2300
3172
|
display_name: o1-preview (2024-09-12)
|
|
2301
3173
|
description: o1-preview is a language model trained with reinforcement learning to perform complex reasoning that can produce a long internal chain of thought before responding to the user. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
|
|
@@ -2312,6 +3184,78 @@ models:
|
|
|
2312
3184
|
release_date: 2024-09-12
|
|
2313
3185
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2314
3186
|
|
|
3187
|
+
- name: openai/o3-mini-2025-01-31
|
|
3188
|
+
display_name: o3-mini (2025-01-31)
|
|
3189
|
+
description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/))
|
|
3190
|
+
creator_organization_name: OpenAI
|
|
3191
|
+
access: limited
|
|
3192
|
+
release_date: 2025-01-31
|
|
3193
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3194
|
+
|
|
3195
|
+
- name: openai/o3-mini-2025-01-31-low-reasoning-effort
|
|
3196
|
+
display_name: o3-mini (2025-01-31, low reasoning effort)
|
|
3197
|
+
description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/)) The requests' reasoning effort parameter in is set to low.
|
|
3198
|
+
creator_organization_name: OpenAI
|
|
3199
|
+
access: limited
|
|
3200
|
+
release_date: 2025-01-31
|
|
3201
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3202
|
+
|
|
3203
|
+
- name: openai/o3-mini-2025-01-31-high-reasoning-effort
|
|
3204
|
+
display_name: o3-mini (2025-01-31, high reasoning effort)
|
|
3205
|
+
description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/)) The requests' reasoning effort parameter in is set to high.
|
|
3206
|
+
creator_organization_name: OpenAI
|
|
3207
|
+
access: limited
|
|
3208
|
+
release_date: 2025-01-31
|
|
3209
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3210
|
+
|
|
3211
|
+
- name: openai/o3-2025-04-16
|
|
3212
|
+
display_name: o3 (2025-04-16)
|
|
3213
|
+
description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3214
|
+
creator_organization_name: OpenAI
|
|
3215
|
+
access: limited
|
|
3216
|
+
release_date: 2025-04-16
|
|
3217
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3218
|
+
|
|
3219
|
+
- name: openai/o3-2025-04-16-low-reasoning-effort
|
|
3220
|
+
display_name: o3 (2025-04-16, low reasoning effort)
|
|
3221
|
+
description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3222
|
+
creator_organization_name: OpenAI
|
|
3223
|
+
access: limited
|
|
3224
|
+
release_date: 2025-04-16
|
|
3225
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3226
|
+
|
|
3227
|
+
- name: openai/o3-2025-04-16-high-reasoning-effort
|
|
3228
|
+
display_name: o3 (2025-04-16, high reasoning effort)
|
|
3229
|
+
description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3230
|
+
creator_organization_name: OpenAI
|
|
3231
|
+
access: limited
|
|
3232
|
+
release_date: 2025-04-16
|
|
3233
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3234
|
+
|
|
3235
|
+
- name: openai/o4-mini-2025-04-16
|
|
3236
|
+
display_name: o4-mini (2025-04-16)
|
|
3237
|
+
description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3238
|
+
creator_organization_name: OpenAI
|
|
3239
|
+
access: limited
|
|
3240
|
+
release_date: 2025-04-16
|
|
3241
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3242
|
+
|
|
3243
|
+
- name: openai/o4-mini-2025-04-16-low-reasoning-effort
|
|
3244
|
+
display_name: o4-mini (2025-04-16, low reasoning effort)
|
|
3245
|
+
description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3246
|
+
creator_organization_name: OpenAI
|
|
3247
|
+
access: limited
|
|
3248
|
+
release_date: 2025-04-16
|
|
3249
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3250
|
+
|
|
3251
|
+
- name: openai/o4-mini-2025-04-16-high-reasoning-effort
|
|
3252
|
+
display_name: o4-mini (2025-04-16, high reasoning effort)
|
|
3253
|
+
description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
|
|
3254
|
+
creator_organization_name: OpenAI
|
|
3255
|
+
access: limited
|
|
3256
|
+
release_date: 2025-04-16
|
|
3257
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3258
|
+
|
|
2315
3259
|
## Codex Models
|
|
2316
3260
|
# DEPRECATED: Codex models have been shut down on March 23 2023.
|
|
2317
3261
|
|
|
@@ -2556,6 +3500,47 @@ models:
|
|
|
2556
3500
|
release_date: 2024-06-07
|
|
2557
3501
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2558
3502
|
|
|
3503
|
+
- name: qwen/qwen2.5-7b-instruct-turbo
|
|
3504
|
+
display_name: Qwen2.5 Instruct Turbo (7B)
|
|
3505
|
+
description: Qwen2.5 Instruct Turbo (7B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
|
|
3506
|
+
creator_organization_name: Qwen
|
|
3507
|
+
access: open
|
|
3508
|
+
release_date: 2024-09-19
|
|
3509
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3510
|
+
|
|
3511
|
+
- name: qwen/qwen2.5-7b-instruct
|
|
3512
|
+
display_name: Qwen2.5 Instruct (7B)
|
|
3513
|
+
description: Qwen2.5 Instruct (7B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
|
|
3514
|
+
creator_organization_name: Qwen
|
|
3515
|
+
access: open
|
|
3516
|
+
release_date: 2024-09-19
|
|
3517
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3518
|
+
|
|
3519
|
+
- name: qwen/qwen2.5-72b-instruct-turbo
|
|
3520
|
+
display_name: Qwen2.5 Instruct Turbo (72B)
|
|
3521
|
+
description: Qwen2.5 Instruct Turbo (72B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
|
|
3522
|
+
creator_organization_name: Qwen
|
|
3523
|
+
access: open
|
|
3524
|
+
release_date: 2024-09-19
|
|
3525
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3526
|
+
|
|
3527
|
+
- name: qwen/qwen3-235b-a22b-fp8-tput
|
|
3528
|
+
display_name: Qwen3 235B A22B FP8 Throughput
|
|
3529
|
+
description: Qwen3 235B A22B FP8 Throughput is a hybrid instruct and reasoning mixture-of-experts model ([blog](https://qwenlm.github.io/blog/qwen3/)).
|
|
3530
|
+
creator_organization_name: Qwen
|
|
3531
|
+
access: open
|
|
3532
|
+
release_date: 2025-04-29
|
|
3533
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3534
|
+
|
|
3535
|
+
- name: qwen/qwq-32b-preview
|
|
3536
|
+
display_name: QwQ (32B Preview)
|
|
3537
|
+
description: QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities. ([blog post](https://qwenlm.github.io/blog/qwq-32b-preview/)).
|
|
3538
|
+
creator_organization_name: Alibaba Cloud
|
|
3539
|
+
access: open
|
|
3540
|
+
num_parameters: 32800000000
|
|
3541
|
+
release_date: 2024-11-28
|
|
3542
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3543
|
+
|
|
2559
3544
|
- name: qwen/qwen-vl
|
|
2560
3545
|
display_name: Qwen-VL
|
|
2561
3546
|
description: Visual multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2308.12966)).
|
|
@@ -2572,6 +3557,78 @@ models:
|
|
|
2572
3557
|
release_date: 2023-08-24
|
|
2573
3558
|
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
2574
3559
|
|
|
3560
|
+
- name: qwen/qwen2-vl-7b-instruct
|
|
3561
|
+
display_name: Qwen2-VL Instruct (7B)
|
|
3562
|
+
description: The second generation of Qwen2-VL models ([paper](https://arxiv.org/abs/2409.12191)).
|
|
3563
|
+
creator_organization_name: Alibaba Group
|
|
3564
|
+
access: open
|
|
3565
|
+
release_date: 2024-08-29
|
|
3566
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3567
|
+
|
|
3568
|
+
- name: qwen/qwen2-vl-72b-instruct
|
|
3569
|
+
display_name: Qwen2-VL Instruct (72B)
|
|
3570
|
+
description: The second generation of Qwen2-VL models ([paper](https://arxiv.org/abs/2409.12191)).
|
|
3571
|
+
creator_organization_name: Alibaba Group
|
|
3572
|
+
access: open
|
|
3573
|
+
release_date: 2024-08-29
|
|
3574
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3575
|
+
|
|
3576
|
+
- name: qwen/qwen2.5-vl-3b-instruct
|
|
3577
|
+
display_name: Qwen2.5-VL Instruct (3B)
|
|
3578
|
+
description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
|
|
3579
|
+
creator_organization_name: Alibaba Group
|
|
3580
|
+
access: open
|
|
3581
|
+
release_date: 2025-01-26
|
|
3582
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3583
|
+
|
|
3584
|
+
- name: qwen/qwen2.5-vl-7b-instruct
|
|
3585
|
+
display_name: Qwen2.5-VL Instruct (7B)
|
|
3586
|
+
description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
|
|
3587
|
+
creator_organization_name: Alibaba Group
|
|
3588
|
+
access: open
|
|
3589
|
+
release_date: 2025-01-26
|
|
3590
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3591
|
+
|
|
3592
|
+
- name: qwen/qwen2.5-vl-32b-instruct
|
|
3593
|
+
display_name: Qwen2.5-VL Instruct (32B)
|
|
3594
|
+
description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
|
|
3595
|
+
creator_organization_name: Alibaba Group
|
|
3596
|
+
access: open
|
|
3597
|
+
release_date: 2025-01-26
|
|
3598
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3599
|
+
|
|
3600
|
+
- name: qwen/qwen2.5-vl-72b-instruct
|
|
3601
|
+
display_name: Qwen2.5-VL Instruct (72B)
|
|
3602
|
+
description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
|
|
3603
|
+
creator_organization_name: Alibaba Group
|
|
3604
|
+
access: open
|
|
3605
|
+
release_date: 2025-01-26
|
|
3606
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3607
|
+
|
|
3608
|
+
- name: qwen/qwen-audio-chat
|
|
3609
|
+
display_name: Qwen-Audio Chat
|
|
3610
|
+
description: Auditory multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2311.07919)).
|
|
3611
|
+
creator_organization_name: Alibaba Cloud
|
|
3612
|
+
access: open
|
|
3613
|
+
release_date: 2023-11-14
|
|
3614
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG]
|
|
3615
|
+
|
|
3616
|
+
- name: qwen/qwen2-audio-7b-instruct
|
|
3617
|
+
display_name: Qwen2-Audio Instruct (7B)
|
|
3618
|
+
description: The second version of auditory multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2407.10759)).
|
|
3619
|
+
creator_organization_name: Alibaba Cloud
|
|
3620
|
+
access: open
|
|
3621
|
+
release_date: 2024-07-15
|
|
3622
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG]
|
|
3623
|
+
|
|
3624
|
+
- name: qwen/qwen2.5-omni-7b
|
|
3625
|
+
display_name: Qwen2.5-Omni (7B)
|
|
3626
|
+
description: The new flagship end-to-end multimodal model in the Qwen series that can process inputs including text, images, audio, and video ([paper](https://arxiv.org/abs/2503.20215)).
|
|
3627
|
+
creator_organization_name: Alibaba Cloud
|
|
3628
|
+
access: open
|
|
3629
|
+
release_date: 2025-03-27
|
|
3630
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3631
|
+
|
|
2575
3632
|
# SAIL (Sea AI Lab)
|
|
2576
3633
|
- name: sail/sailor-7b
|
|
2577
3634
|
display_name: Sailor (7B)
|
|
@@ -2617,7 +3674,7 @@ models:
|
|
|
2617
3674
|
access: open
|
|
2618
3675
|
num_parameters: 16000000000
|
|
2619
3676
|
release_date: 2022-03-25
|
|
2620
|
-
tags: []
|
|
3677
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
2621
3678
|
|
|
2622
3679
|
# SambaNova
|
|
2623
3680
|
- name: sambanova/sambalingo-thai-base
|
|
@@ -2769,8 +3826,6 @@ models:
|
|
|
2769
3826
|
release_date: 2023-04-20
|
|
2770
3827
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2771
3828
|
|
|
2772
|
-
|
|
2773
|
-
|
|
2774
3829
|
# Stanford
|
|
2775
3830
|
- name: stanford/alpaca-7b
|
|
2776
3831
|
display_name: Alpaca (7B)
|
|
@@ -2866,7 +3921,7 @@ models:
|
|
|
2866
3921
|
access: open
|
|
2867
3922
|
num_parameters: 3000000000
|
|
2868
3923
|
release_date: 2023-05-05
|
|
2869
|
-
|
|
3924
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
2870
3925
|
|
|
2871
3926
|
- name: together/redpajama-incite-base-7b
|
|
2872
3927
|
display_name: RedPajama-INCITE-Base (7B)
|
|
@@ -2917,9 +3972,27 @@ models:
|
|
|
2917
3972
|
access: open
|
|
2918
3973
|
num_parameters: 13000000000
|
|
2919
3974
|
release_date: 2022-09-19
|
|
2920
|
-
tags: []
|
|
3975
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
2921
3976
|
|
|
3977
|
+
# Upstage
|
|
3978
|
+
- name: upstage/solar-pro-preview-instruct
|
|
3979
|
+
display_name: Solar Pro Preview (22B)
|
|
3980
|
+
description: Solar Pro Preview (22B) is open-weights model for single GPU inference that is a preview of the upcoming Solar Pro model ([blog](https://www.upstage.ai/products/solar-pro-preview)).
|
|
3981
|
+
creator_organization_name: Upstage
|
|
3982
|
+
access: open
|
|
3983
|
+
num_parameters: 22000000000
|
|
3984
|
+
release_date: 2024-09-11
|
|
3985
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2922
3986
|
|
|
3987
|
+
- name: upstage/solar-pro-241126
|
|
3988
|
+
display_name: Solar Pro
|
|
3989
|
+
display_name: Solar Pro
|
|
3990
|
+
description: Solar Pro is a LLM designed for instruction-following and processing structured formats like HTML and Markdown. It supports English, Korean, and Japanese and has domain expertise in Finance, Healthcare, and Legal. ([blog](https://www.upstage.ai/blog/press/solar-pro-aws)).
|
|
3991
|
+
creator_organization_name: Upstage
|
|
3992
|
+
access: limited
|
|
3993
|
+
num_parameters: 22000000000
|
|
3994
|
+
release_date: 2024-11-26
|
|
3995
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2923
3996
|
|
|
2924
3997
|
# Writer
|
|
2925
3998
|
- name: writer/palmyra-base
|
|
@@ -3030,6 +4103,14 @@ models:
|
|
|
3030
4103
|
release_date: 2024-09-12
|
|
3031
4104
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3032
4105
|
|
|
4106
|
+
- name: writer/palmyra-x5
|
|
4107
|
+
display_name: Palmyra X5
|
|
4108
|
+
description: Palmyra X5 is a language model for enterprise that uses a Mixture of Experts (MoE) architecture and a hybrid attention mechanism that blends linear and softmax attention. ([blog](https://writer.com/engineering/long-context-palmyra-x5/))
|
|
4109
|
+
creator_organization_name: Writer
|
|
4110
|
+
access: limited
|
|
4111
|
+
release_date: 2024-04-28
|
|
4112
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4113
|
+
|
|
3033
4114
|
- name: writer/palmyra-med-32k
|
|
3034
4115
|
display_name: Palmyra-Med 32K (70B)
|
|
3035
4116
|
description: Palmyra-Med 32K (70B) is a model finetuned from Palmyra-X-003 intended for medical applications.
|
|
@@ -3040,11 +4121,10 @@ models:
|
|
|
3040
4121
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3041
4122
|
|
|
3042
4123
|
- name: writer/palmyra-med
|
|
3043
|
-
display_name: Palmyra
|
|
3044
|
-
description: Palmyra
|
|
4124
|
+
display_name: Palmyra Med
|
|
4125
|
+
description: Palmyra Med is a model intended for medical applications.
|
|
3045
4126
|
creator_organization_name: Writer
|
|
3046
4127
|
access: open
|
|
3047
|
-
num_parameters: 70600000000
|
|
3048
4128
|
release_date: 2024-07-31
|
|
3049
4129
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3050
4130
|
|
|
@@ -3057,6 +4137,32 @@ models:
|
|
|
3057
4137
|
release_date: 2024-07-31
|
|
3058
4138
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3059
4139
|
|
|
4140
|
+
- name: writer/palmyra-fin
|
|
4141
|
+
display_name: Palmyra Fin
|
|
4142
|
+
description: Palmyra Fin is a financial LLM built using combining a well-curated set of financial training data with custom fine-tuning instruction data([blog](https://writer.com/blog/palmyra-med-fin-models/)).
|
|
4143
|
+
creator_organization_name: Writer
|
|
4144
|
+
access: limited
|
|
4145
|
+
release_date: 2024-07-31
|
|
4146
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4147
|
+
|
|
4148
|
+
# xAI
|
|
4149
|
+
|
|
4150
|
+
- name: xai/grok-3-beta
|
|
4151
|
+
display_name: Grok 3 Beta
|
|
4152
|
+
description: Grok 3 Beta is a model trained on xAI's Colossus supercluster with significant improvements in reasoning, mathematics, coding, world knowledge, and instruction-following tasks. ([blog](https://x.ai/news/grok-3))
|
|
4153
|
+
creator_organization_name: xAI
|
|
4154
|
+
access: limited
|
|
4155
|
+
release_date: 2025-04-03 # https://docs.x.ai/docs/release-notes#april-2025
|
|
4156
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4157
|
+
|
|
4158
|
+
- name: xai/grok-3-mini-beta
|
|
4159
|
+
display_name: Grok 3 mini Beta
|
|
4160
|
+
description: Grok 3 mini Beta is a model trained on xAI's Colossus supercluster with significant improvements in reasoning, mathematics, coding, world knowledge, and instruction-following tasks. ([blog](https://x.ai/news/grok-3))
|
|
4161
|
+
creator_organization_name: xAI
|
|
4162
|
+
access: limited
|
|
4163
|
+
release_date: 2025-04-03 # https://docs.x.ai/docs/release-notes#april-2025
|
|
4164
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4165
|
+
|
|
3060
4166
|
# Yandex
|
|
3061
4167
|
- name: yandex/yalm
|
|
3062
4168
|
display_name: YaLM (100B)
|
|
@@ -3128,3 +4234,452 @@ models:
|
|
|
3128
4234
|
release_date: 2024-04-18
|
|
3129
4235
|
tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
3130
4236
|
|
|
4237
|
+
# Diva Llama
|
|
4238
|
+
- name: stanford/diva-llama
|
|
4239
|
+
display_name: Diva Llama 3 (8B)
|
|
4240
|
+
description: Diva Llama 3 is an end-to-end Voice Assistant Model which can handle speech and text as inputs. It was trained using distillation loss. ([paper](https://arxiv.org/abs/2410.02678))
|
|
4241
|
+
creator_organization_name: Stanford
|
|
4242
|
+
access: open
|
|
4243
|
+
num_parameters: 8000000000
|
|
4244
|
+
release_date: 2024-10-03
|
|
4245
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG]
|
|
4246
|
+
|
|
4247
|
+
|
|
4248
|
+
# LLaMA-Omni
|
|
4249
|
+
- name: ictnlp/llama-3.1-8b-omni
|
|
4250
|
+
display_name: LLaMA-Omni (8B)
|
|
4251
|
+
description: The audio-visual multimodal version of the LLaMA 3.1 model ([paper](https://arxiv.org/abs/2409.06666)).
|
|
4252
|
+
creator_organization_name: ICTNLP
|
|
4253
|
+
access: open
|
|
4254
|
+
num_parameters: 8000000000
|
|
4255
|
+
release_date: 2024-09-10
|
|
4256
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG]
|
|
4257
|
+
|
|
4258
|
+
|
|
4259
|
+
# Maritaca AI
|
|
4260
|
+
- name: maritaca-ai/sabia-7b
|
|
4261
|
+
display_name: Sabia 7B
|
|
4262
|
+
description: Sabia 7B
|
|
4263
|
+
creator_organization_name: MARITACA-AI
|
|
4264
|
+
access: open
|
|
4265
|
+
num_parameters: 6740000000
|
|
4266
|
+
release_date: 2023-11-08
|
|
4267
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4268
|
+
|
|
4269
|
+
# Granite - IBM
|
|
4270
|
+
# https://www.ibm.com/granite
|
|
4271
|
+
# https://github.com/ibm-granite/granite-3.0-language-models
|
|
4272
|
+
|
|
4273
|
+
- name: ibm-granite/granite-3.0-2b-base
|
|
4274
|
+
display_name: Granite 3.0 base (2B)
|
|
4275
|
+
description: Granite-3.0-2B-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
|
|
4276
|
+
creator_organization_name: IBM
|
|
4277
|
+
access: open
|
|
4278
|
+
num_parameters: 2530000000
|
|
4279
|
+
release: 2024-10-21
|
|
4280
|
+
tags: [TEXT_MODEL_TAG]
|
|
4281
|
+
|
|
4282
|
+
- name: ibm-granite/granite-3.0-2b-instruct
|
|
4283
|
+
display_name: Granite 3.0 Instruct (2B)
|
|
4284
|
+
description: Granite-3.0-2B-Instruct is a 2B parameter model finetuned from Granite-3.0-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
|
|
4285
|
+
creator_organization_name: IBM
|
|
4286
|
+
access: open
|
|
4287
|
+
num_parameters: 2630000000
|
|
4288
|
+
release: 2024-10-21
|
|
4289
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4290
|
+
|
|
4291
|
+
- name: ibm-granite/granite-3.0-8b-instruct
|
|
4292
|
+
display_name: Granite 3.0 instruct (8B)
|
|
4293
|
+
description: Granite-3.0-8B-Instruct is a 8B parameter model finetuned from Granite-3.0-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
|
|
4294
|
+
creator_organization_name: IBM
|
|
4295
|
+
access: open
|
|
4296
|
+
num_parameters: 8170000000
|
|
4297
|
+
release: 2024-10-21
|
|
4298
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4299
|
+
|
|
4300
|
+
- name: ibm-granite/granite-3.0-8b-base
|
|
4301
|
+
display_name: Granite 3.0 base (8B)
|
|
4302
|
+
description: Granite-3.0-8B-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
|
|
4303
|
+
creator_organization_name: IBM
|
|
4304
|
+
access: open
|
|
4305
|
+
num_parameters: 8170000000
|
|
4306
|
+
release: 2024-10-21
|
|
4307
|
+
tags: [TEXT_MODEL_TAG]
|
|
4308
|
+
|
|
4309
|
+
- name: ibm-granite/granite-3.0-3b-a800m-instruct
|
|
4310
|
+
display_name: Granite 3.0 A800M instruct (3B)
|
|
4311
|
+
description: Granite-3.0-3B-A800M-Instruct is a 3B parameter model finetuned from Granite-3.0-3B-A800M-Base-4K using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
|
|
4312
|
+
creator_organization_name: IBM
|
|
4313
|
+
access: open
|
|
4314
|
+
num_parameters: 3370000000
|
|
4315
|
+
release: 2024-10-21
|
|
4316
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4317
|
+
|
|
4318
|
+
- name: ibm-granite/granite-3.0-3b-a800m-base
|
|
4319
|
+
display_name: Granite 3.0 A800M base (3B)
|
|
4320
|
+
description: Granite-3.0-3B-A800M-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
|
|
4321
|
+
creator_organization_name: IBM
|
|
4322
|
+
access: open
|
|
4323
|
+
num_parameters: 3370000000
|
|
4324
|
+
release: 2024-10-21
|
|
4325
|
+
tags: [TEXT_MODEL_TAG]
|
|
4326
|
+
|
|
4327
|
+
- name: ibm-granite/granite-3.0-1b-a400m-instruct
|
|
4328
|
+
display_name: Granite 3.0 A400M instruct (1B)
|
|
4329
|
+
description: Granite-3.0-1B-A400M-Instruct is an 1B parameter model finetuned from Granite-3.0-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
|
|
4330
|
+
creator_organization_name: IBM
|
|
4331
|
+
access: open
|
|
4332
|
+
num_parameters: 1330000000
|
|
4333
|
+
release: 2024-10-21
|
|
4334
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4335
|
+
|
|
4336
|
+
- name: ibm-granite/granite-3.0-1b-a400m-base
|
|
4337
|
+
display_name: Granite 3.0 A400M base (1B)
|
|
4338
|
+
description: Granite-3.0-1B-A400M-Base is a decoder-only language model to support a variety of text-to-text generation tasks. It is trained from scratch following a two-stage training strategy.
|
|
4339
|
+
creator_organization_name: IBM
|
|
4340
|
+
access: open
|
|
4341
|
+
num_parameters: 1380000000
|
|
4342
|
+
release: 2024-10-21
|
|
4343
|
+
tags: [TEXT_MODEL_TAG]
|
|
4344
|
+
|
|
4345
|
+
- name: ibm-granite/granite-3.1-8b-base
|
|
4346
|
+
display_name: Granite 3.1 - 8B - Base
|
|
4347
|
+
description: Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
|
|
4348
|
+
creator_organization_name: IBM-GRANITE
|
|
4349
|
+
access: open
|
|
4350
|
+
num_parameters: 8170000000
|
|
4351
|
+
release_date: 2024-12-18
|
|
4352
|
+
tags: [TEXT_MODEL_TAG]
|
|
4353
|
+
|
|
4354
|
+
- name: ibm-granite/granite-3.1-8b-instruct
|
|
4355
|
+
display_name: Granite 3.1 - 8B - Instruct
|
|
4356
|
+
description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
4357
|
+
creator_organization_name: IBM
|
|
4358
|
+
access: open
|
|
4359
|
+
num_parameters: 8170000000
|
|
4360
|
+
release_date: 2024-12-18
|
|
4361
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4362
|
+
|
|
4363
|
+
- name: ibm-granite/granite-3.1-2b-instruct
|
|
4364
|
+
display_name: Granite 3.1 - 2B - Instruct
|
|
4365
|
+
description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
4366
|
+
creator_organization_name: IBM
|
|
4367
|
+
access: open
|
|
4368
|
+
num_parameters: 2530000000
|
|
4369
|
+
release_date: 2024-12-18
|
|
4370
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4371
|
+
|
|
4372
|
+
- name: ibm-granite/granite-3.1-2b-base
|
|
4373
|
+
display_name: Granite 3.1 - 2B - Base
|
|
4374
|
+
description: Granite-3.1-2B-Base extends the context length of Granite-3.0-2B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
|
|
4375
|
+
creator_organization_name: IBM-GRANITE
|
|
4376
|
+
access: open
|
|
4377
|
+
num_parameters: 2530000000
|
|
4378
|
+
release_date: 2024-12-18
|
|
4379
|
+
tags: [TEXT_MODEL_TAG]
|
|
4380
|
+
|
|
4381
|
+
- name: ibm-granite/granite-3.1-3b-a800m-instruct
|
|
4382
|
+
display_name: Granite 3.1 - 3B - A800M - Instruct
|
|
4383
|
+
description: Granite-3.1-3B-A800M-Instruct is a 3B parameter long-context instruct model finetuned from Granite-3.1-3B-A800M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
4384
|
+
creator_organization_name: IBM-GRANITE
|
|
4385
|
+
access: open
|
|
4386
|
+
num_parameters: 3300000000
|
|
4387
|
+
release_date: 2024-12-18
|
|
4388
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4389
|
+
|
|
4390
|
+
- name: ibm-granite/granite-3.1-3b-a800m-base
|
|
4391
|
+
display_name: Granite 3.1 - 3B - A800M - Base
|
|
4392
|
+
description: Granite-3.1-3B-A800M-Base extends the context length of Granite-3.0-3B-A800M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
|
|
4393
|
+
creator_organization_name: IBM-GRANITE
|
|
4394
|
+
access: open
|
|
4395
|
+
num_parameters: 3300000000
|
|
4396
|
+
release_date: 2024-12-18
|
|
4397
|
+
tags: [TEXT_MODEL_TAG]
|
|
4398
|
+
|
|
4399
|
+
- name: ibm-granite/granite-3.1-1b-a400m-instruct
|
|
4400
|
+
display_name: Granite 3.1 - 1B - A400M - Instruct
|
|
4401
|
+
description: Granite-3.1-1B-A400M-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
4402
|
+
creator_organization_name: IBM-GRANITE
|
|
4403
|
+
access: open
|
|
4404
|
+
num_parameters: 1330000000
|
|
4405
|
+
release_date: 2024-12-18
|
|
4406
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4407
|
+
|
|
4408
|
+
- name: ibm-granite/granite-3.1-1b-a400m-base
|
|
4409
|
+
display_name: Granite 3.1 - 1B - A400M - Base
|
|
4410
|
+
description: Granite-3.1-1B-A400M-Base extends the context length of Granite-3.0-1B-A400M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
|
|
4411
|
+
creator_organization_name: IBM-GRANITE
|
|
4412
|
+
access: open
|
|
4413
|
+
num_parameters: 1330000000
|
|
4414
|
+
release_date: 2024-12-18
|
|
4415
|
+
tags: [TEXT_MODEL_TAG]
|
|
4416
|
+
|
|
4417
|
+
- name: ibm/granite-13b-instruct-v2
|
|
4418
|
+
display_name: Granite 13b instruct v2
|
|
4419
|
+
description: Granite Base (13B) Instruct V2.0 is a large decoder-only transformer model.The following features were used in the design of the model Decoder-only model
|
|
4420
|
+
creator_organization_name: IBM
|
|
4421
|
+
access: limited
|
|
4422
|
+
num_parameters: 13000000000
|
|
4423
|
+
release: 2023-11-30
|
|
4424
|
+
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4425
|
+
|
|
4426
|
+
- name: ibm/granite-20b-code-instruct-8k
|
|
4427
|
+
display_name: Granite 20b code instruct (8K)
|
|
4428
|
+
description: Granite-20B-Code-Base-8K is a decoder-only code model designed for code generative tasks (e.g., code generation, code explanation, code fixing, etc.). It is trained from scratch with a two-phase training strategy. In phase 1, our model is trained on 3 trillion tokens sourced from 116 programming languages, ensuring a comprehensive understanding of programming languages and syntax. In phase 2, our model is trained on 500 billion tokens with a carefully designed mixture of high-quality data from code and natural language domains to improve the models’ ability to reason and follow instructions.
|
|
4429
|
+
creator_organization_name: IBM
|
|
4430
|
+
access: limited
|
|
4431
|
+
num_parameters: 20000000000
|
|
4432
|
+
release: 2024-18-4
|
|
4433
|
+
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4434
|
+
|
|
4435
|
+
- name: ibm/granite-34b-code-instruct
|
|
4436
|
+
display_name: Granite 34b code instruct
|
|
4437
|
+
description: Granite Base (34B) Code Instruct is a 34B parameter model fine tuned from Granite-34B-Code-Base on a combination of permissively licensed instruction data to enhance instruction following capabilities including logical reasoning and problem-solving skills.
|
|
4438
|
+
creator_organization_name: IBM
|
|
4439
|
+
access: open
|
|
4440
|
+
num_parameters: 34000000000
|
|
4441
|
+
release: 2024-6-5
|
|
4442
|
+
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4443
|
+
|
|
4444
|
+
|
|
4445
|
+
- name: ibm/granite-3b-code-instruct
|
|
4446
|
+
display_name: Granite 3b code instruct
|
|
4447
|
+
description: Granite-3B-Code-Instruct-128K is a 3B parameter long-context instruct model fine tuned from Granite-3B-Code-Base-128K on a combination of permissively licensed data used in training the original Granite code instruct models, in addition to synthetically generated code instruction datasets tailored for solving long context problems. By exposing the model to both short and long context data, we aim to enhance its long-context capability without sacrificing code generation performance at short input context.
|
|
4448
|
+
creator_organization_name: IBM
|
|
4449
|
+
access: open
|
|
4450
|
+
num_parameters: 3000000000
|
|
4451
|
+
release: 2024-6-18
|
|
4452
|
+
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4453
|
+
|
|
4454
|
+
- name: ibm/granite-8b-code-instruct
|
|
4455
|
+
display_name: Granite 8b code instruct
|
|
4456
|
+
description: Granite-8B-Code-Instruct-128K is a 8B parameter long-context instruct model fine tuned from Granite-8B-Code-Base-128K on a combination of permissively licensed data used in training the original Granite code instruct models, in addition to synthetically generated code instruction datasets tailored for solving long context problems. By exposing the model to both short and long context data, we aim to enhance its long-context capability without sacrificing code generation performance at short input context.
|
|
4457
|
+
creator_organization_name: IBM
|
|
4458
|
+
access: open
|
|
4459
|
+
num_parameters: 8000000000
|
|
4460
|
+
release: 2024-6-18
|
|
4461
|
+
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4462
|
+
|
|
4463
|
+
- name: ibm/granite-3.1-8b-instruct
|
|
4464
|
+
display_name: Granite 3.1 - 8B - Instruct
|
|
4465
|
+
description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
4466
|
+
creator_organization_name: IBM
|
|
4467
|
+
access: open
|
|
4468
|
+
num_parameters: 8170000000
|
|
4469
|
+
release_date: 2024-12-18
|
|
4470
|
+
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4471
|
+
|
|
4472
|
+
- name: ibm/granite-3.1-2b-instruct
|
|
4473
|
+
display_name: Granite 3.1 - 2B - Instruct
|
|
4474
|
+
description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
4475
|
+
creator_organization_name: IBM
|
|
4476
|
+
access: open
|
|
4477
|
+
num_parameters: 2530000000
|
|
4478
|
+
release_date: 2024-12-18
|
|
4479
|
+
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4480
|
+
|
|
4481
|
+
- name: ibm/granite-3.3-8b-instruct
|
|
4482
|
+
display_name: Granite 3.3 8B Instruct
|
|
4483
|
+
description: Granite 3.3 8B Instruct is a 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities. ([model card](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct))
|
|
4484
|
+
creator_organization_name: IBM
|
|
4485
|
+
access: open
|
|
4486
|
+
num_parameters: 8170000000
|
|
4487
|
+
release_date: 2025-04-16
|
|
4488
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4489
|
+
|
|
4490
|
+
- name: mistralai/mixtral-8x7b-instruct-v0:1
|
|
4491
|
+
display_name: Mixtral 8x7B Instruct on IBM WatsonX
|
|
4492
|
+
description: A 7B sparse Mixture-of-Experts model with stronger capabilities than Mistral 7B. Uses 12B active parameters out of 45B total. Supports multiple languages, code and 32k context window.
|
|
4493
|
+
creator_organization_name: Mistral
|
|
4494
|
+
access: limited
|
|
4495
|
+
release_date: 2023-12-11
|
|
4496
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4497
|
+
|
|
4498
|
+
- name: ura-hcmut/ura-llama-2.1-8b
|
|
4499
|
+
display_name: URA-Llama 2.1 (8B)
|
|
4500
|
+
description: URA-Llama 2.1 (8B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4501
|
+
creator_organization_name: URA
|
|
4502
|
+
access: open
|
|
4503
|
+
num_parameters: 8000000000
|
|
4504
|
+
release_date: 2024-08-04
|
|
4505
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4506
|
+
|
|
4507
|
+
- name: ura-hcmut/ura-llama-2-8b
|
|
4508
|
+
display_name: URA-Llama 2 (8B)
|
|
4509
|
+
description: URA-Llama 2 (8B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4510
|
+
creator_organization_name: URA
|
|
4511
|
+
access: open
|
|
4512
|
+
num_parameters: 8000000000
|
|
4513
|
+
release_date: 2024-08-04
|
|
4514
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4515
|
+
|
|
4516
|
+
- name: ura-hcmut/ura-llama-7b
|
|
4517
|
+
display_name: URA-Llama 7B (7B)
|
|
4518
|
+
description: URA-Llama 7B (7B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4519
|
+
creator_organization_name: URA
|
|
4520
|
+
access: open
|
|
4521
|
+
num_parameters: 7000000000
|
|
4522
|
+
release_date: 2023-10-10
|
|
4523
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4524
|
+
|
|
4525
|
+
- name: ura-hcmut/ura-llama-13b
|
|
4526
|
+
display_name: URA-Llama 13B (13B)
|
|
4527
|
+
description: URA-Llama 13B (13B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4528
|
+
creator_organization_name: URA
|
|
4529
|
+
access: open
|
|
4530
|
+
num_parameters: 13000000000
|
|
4531
|
+
release_date: 2023-10-10
|
|
4532
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4533
|
+
|
|
4534
|
+
- name: ura-hcmut/ura-llama-70b
|
|
4535
|
+
display_name: URA-Llama 70B (70B)
|
|
4536
|
+
description: URA-Llama 70B (70B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4537
|
+
creator_organization_name: URA
|
|
4538
|
+
access: open
|
|
4539
|
+
num_parameters: 70000000000
|
|
4540
|
+
release_date: 2023-10-10
|
|
4541
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4542
|
+
|
|
4543
|
+
- name: ura-hcmut/GemSUra-7B
|
|
4544
|
+
display_name: GemSUra 7B
|
|
4545
|
+
description: GemSUra 7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4546
|
+
creator_organization_name: URA
|
|
4547
|
+
access: open
|
|
4548
|
+
num_parameters: 7000000000
|
|
4549
|
+
release_date: 2024-03-12
|
|
4550
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4551
|
+
|
|
4552
|
+
- name: ura-hcmut/GemSUra-2B
|
|
4553
|
+
display_name: GemSUra 2B
|
|
4554
|
+
description: GemSUra 2B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4555
|
+
creator_organization_name: URA
|
|
4556
|
+
access: open
|
|
4557
|
+
num_parameters: 2000000000
|
|
4558
|
+
release_date: 2024-03-12
|
|
4559
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4560
|
+
|
|
4561
|
+
- name: ura-hcmut/MixSUra
|
|
4562
|
+
display_name: MixSUra
|
|
4563
|
+
description: MixSUra is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text. It is a mixture of experts model with 8 active experts.
|
|
4564
|
+
creator_organization_name: URA
|
|
4565
|
+
access: open
|
|
4566
|
+
num_parameters: 46700000000
|
|
4567
|
+
release_date: 2024-03-12
|
|
4568
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4569
|
+
|
|
4570
|
+
- name: vilm/vinallama-7b-chat
|
|
4571
|
+
display_name: VinaLLaMa
|
|
4572
|
+
description: VinaLLaMa is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4573
|
+
creator_organization_name: ViLM
|
|
4574
|
+
access: open
|
|
4575
|
+
num_parameters: 7000000000
|
|
4576
|
+
release_date: 2024-03-12
|
|
4577
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4578
|
+
|
|
4579
|
+
- name: vilm/vinallama-2.7b-chat
|
|
4580
|
+
display_name: VinaLLaMa 2.7B
|
|
4581
|
+
description: VinaLLaMa 2.7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4582
|
+
creator_organization_name: ViLM
|
|
4583
|
+
access: open
|
|
4584
|
+
num_parameters: 2700000000
|
|
4585
|
+
release_date: 2024-03-12
|
|
4586
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4587
|
+
|
|
4588
|
+
- name: vilm/vietcuna-7b-v3
|
|
4589
|
+
display_name: VietCuna 7B (v3)
|
|
4590
|
+
description: VietCuna 7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4591
|
+
creator_organization_name: ViLM
|
|
4592
|
+
access: open
|
|
4593
|
+
num_parameters: 7000000000
|
|
4594
|
+
release_date: 2023-08-07
|
|
4595
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4596
|
+
|
|
4597
|
+
- name: vilm/vietcuna-3b-v2
|
|
4598
|
+
display_name: VietCuna 3B (v2)
|
|
4599
|
+
description: VietCuna 3B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4600
|
+
creator_organization_name: ViLM
|
|
4601
|
+
access: open
|
|
4602
|
+
num_parameters: 3000000000
|
|
4603
|
+
release_date: 2023-07-15
|
|
4604
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4605
|
+
|
|
4606
|
+
- name: vilm/Quyen-v0.1
|
|
4607
|
+
display_name: Quyen (v0.1)
|
|
4608
|
+
description: Quyen is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4609
|
+
creator_organization_name: ViLM
|
|
4610
|
+
access: open
|
|
4611
|
+
num_parameters: 4000000000
|
|
4612
|
+
release_date: 2024-02-26
|
|
4613
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4614
|
+
|
|
4615
|
+
- name: vilm/Quyen-Plus-v0.1
|
|
4616
|
+
display_name: Quyen Plus (v0.1)
|
|
4617
|
+
description: Quyen Plus is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4618
|
+
creator_organization_name: ViLM
|
|
4619
|
+
access: open
|
|
4620
|
+
num_parameters: 7000000000
|
|
4621
|
+
release_date: 2024-02-26
|
|
4622
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4623
|
+
|
|
4624
|
+
- name: vilm/Quyen-Pro-v0.1
|
|
4625
|
+
display_name: Quyen Pro (v0.1)
|
|
4626
|
+
description: Quyen Pro is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4627
|
+
creator_organization_name: ViLM
|
|
4628
|
+
access: open
|
|
4629
|
+
num_parameters: 14000000000
|
|
4630
|
+
release_date: 2024-02-26
|
|
4631
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4632
|
+
|
|
4633
|
+
- name: vilm/Quyen-Pro-Max-v0.1
|
|
4634
|
+
display_name: Quyen Pro Max (v0.1)
|
|
4635
|
+
description: Quyen Pro Max is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4636
|
+
creator_organization_name: ViLM
|
|
4637
|
+
access: open
|
|
4638
|
+
num_parameters: 72000000000
|
|
4639
|
+
release_date: 2024-02-26
|
|
4640
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4641
|
+
|
|
4642
|
+
- name: vilm/Quyen-Mini-v0.1
|
|
4643
|
+
display_name: Quyen Mini (v0.1)
|
|
4644
|
+
description: Quyen Mini is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4645
|
+
creator_organization_name: ViLM
|
|
4646
|
+
access: open
|
|
4647
|
+
num_parameters: 1800000000
|
|
4648
|
+
release_date: 2024-02-26
|
|
4649
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4650
|
+
|
|
4651
|
+
- name: vilm/Quyen-SE-v0.1
|
|
4652
|
+
display_name: Quyen SE (v0.1)
|
|
4653
|
+
description: Quyen SE is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4654
|
+
creator_organization_name: ViLM
|
|
4655
|
+
access: open
|
|
4656
|
+
num_parameters: 500000000
|
|
4657
|
+
release_date: 2024-02-26
|
|
4658
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4659
|
+
|
|
4660
|
+
- name: Viet-Mistral/Vistral-7B-Chat
|
|
4661
|
+
display_name: Vistral 7B Chat
|
|
4662
|
+
description: Vistral 7B Chat is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4663
|
+
creator_organization_name: Viet-Mistral
|
|
4664
|
+
access: open
|
|
4665
|
+
num_parameters: 7000000000
|
|
4666
|
+
release_date: 2024-02-28
|
|
4667
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4668
|
+
|
|
4669
|
+
- name: vinai/PhoGPT-7B5-Instruct
|
|
4670
|
+
display_name: PhoGPT 7B5 Instruct
|
|
4671
|
+
description: PhoGPT 7B5 Instruct is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4672
|
+
creator_organization_name: VinAI
|
|
4673
|
+
access: open
|
|
4674
|
+
num_parameters: 7500000000
|
|
4675
|
+
release_date: 2024-02-19
|
|
4676
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
4677
|
+
|
|
4678
|
+
- name: vinai/PhoGPT-4B-Chat
|
|
4679
|
+
display_name: PhoGPT 4B Chat
|
|
4680
|
+
description: PhoGPT 4B Chat is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
|
|
4681
|
+
creator_organization_name: VinAI
|
|
4682
|
+
access: open
|
|
4683
|
+
num_parameters: 4000000000
|
|
4684
|
+
release_date: 2024-04-02
|
|
4685
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|