crfm-helm 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- crfm_helm-0.5.5.dist-info/METADATA +413 -0
- crfm_helm-0.5.5.dist-info/RECORD +894 -0
- {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +13 -1
- helm/benchmark/adaptation/adapters/adapter_factory.py +15 -1
- helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/chat_adapter.py +49 -0
- helm/benchmark/adaptation/adapters/ehr_instruction_adapter.py +108 -0
- helm/benchmark/adaptation/adapters/generation_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +4 -2
- helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +4 -2
- helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +1 -1
- helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +2 -2
- helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py +87 -0
- helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -1
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +3 -3
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +2 -2
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +2 -2
- helm/benchmark/adaptation/common_adapter_specs.py +69 -4
- helm/benchmark/adaptation/prompt.py +1 -1
- helm/benchmark/annotation/aci_bench_annotator.py +95 -0
- helm/benchmark/annotation/air_bench_annotator.py +20 -5
- helm/benchmark/annotation/annotator.py +5 -0
- helm/benchmark/annotation/annotator_factory.py +3 -20
- helm/benchmark/annotation/anthropic_red_team_annotator.py +11 -24
- helm/benchmark/annotation/autobencher_capabilities_annotator.py +107 -0
- helm/benchmark/annotation/autobencher_safety_annotator.py +98 -0
- helm/benchmark/annotation/bigcodebench_annotator.py +108 -0
- helm/benchmark/annotation/bird_sql_annotator.py +58 -0
- helm/benchmark/annotation/call_center_annotator.py +22 -11
- helm/benchmark/annotation/chw_care_plan_annotator.py +98 -0
- helm/benchmark/annotation/czech_bank_qa_annotator.py +78 -0
- helm/benchmark/annotation/dischargeme_annotator.py +107 -0
- helm/benchmark/annotation/ehr_sql_annotator.py +87 -0
- helm/benchmark/annotation/harm_bench_annotator.py +11 -24
- helm/benchmark/annotation/helpdesk_call_summarization_annotator.py +131 -0
- helm/benchmark/annotation/image2struct/image_compiler_annotator.py +6 -1
- helm/benchmark/annotation/live_qa_annotator.py +10 -5
- helm/benchmark/annotation/med_dialog_annotator.py +99 -0
- helm/benchmark/annotation/medalign_annotator.py +100 -0
- helm/benchmark/annotation/medi_qa_annotator.py +98 -0
- helm/benchmark/annotation/medication_qa_annotator.py +90 -61
- helm/benchmark/annotation/mental_health_annotator.py +98 -0
- helm/benchmark/annotation/mimic_rrs_annotator.py +100 -0
- helm/benchmark/annotation/model_as_judge.py +281 -18
- helm/benchmark/annotation/mtsamples_procedures_annotator.py +98 -0
- helm/benchmark/annotation/mtsamples_replicate_annotator.py +101 -0
- helm/benchmark/annotation/omni_math/gpt_evaluation_template.txt +152 -0
- helm/benchmark/annotation/omni_math/gpt_evaluation_zero_shot_template.txt +36 -0
- helm/benchmark/annotation/omni_math_annotator.py +132 -0
- helm/benchmark/annotation/simple_safety_tests_annotator.py +11 -25
- helm/benchmark/annotation/spider_annotator.py +18 -0
- helm/benchmark/annotation/starr_patient_instructions_annotator.py +98 -0
- helm/benchmark/annotation/wildbench/eval_template.pairwise.v2.md +75 -0
- helm/benchmark/annotation/wildbench/eval_template.score.v2.md +66 -0
- helm/benchmark/annotation/wildbench_annotator.py +119 -0
- helm/benchmark/annotation/xstest_annotator.py +20 -30
- helm/benchmark/annotation_executor.py +35 -15
- helm/benchmark/augmentations/cleva_perturbation.py +9 -8
- helm/benchmark/augmentations/contraction_expansion_perturbation.py +2 -2
- helm/benchmark/augmentations/contrast_sets_perturbation.py +2 -2
- helm/benchmark/augmentations/dialect_perturbation.py +4 -5
- helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
- helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
- helm/benchmark/augmentations/gender_perturbation.py +2 -2
- helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
- helm/benchmark/augmentations/mild_mix_perturbation.py +6 -6
- helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
- helm/benchmark/augmentations/person_name_perturbation.py +4 -5
- helm/benchmark/augmentations/perturbation.py +1 -1
- helm/benchmark/augmentations/space_perturbation.py +2 -2
- helm/benchmark/augmentations/suffix_perturbation.py +2 -2
- helm/benchmark/augmentations/synonym_perturbation.py +4 -3
- helm/benchmark/augmentations/test_perturbation.py +16 -13
- helm/benchmark/augmentations/translate_perturbation.py +2 -2
- helm/benchmark/augmentations/typos_perturbation.py +2 -2
- helm/benchmark/data_preprocessor.py +2 -2
- helm/benchmark/huggingface_registration.py +2 -7
- helm/benchmark/metrics/aci_bench_metrics.py +34 -0
- helm/benchmark/metrics/basic_metrics.py +6 -6
- helm/benchmark/metrics/bbq_metrics.py +2 -2
- helm/benchmark/metrics/bias_metrics.py +12 -3
- helm/benchmark/metrics/bigcodebench_metrics.py +25 -0
- helm/benchmark/metrics/bird_sql_metrics.py +28 -0
- helm/benchmark/metrics/chw_care_plan_metrics.py +34 -0
- helm/benchmark/metrics/classification_metrics.py +76 -12
- helm/benchmark/metrics/cleva_harms_metrics.py +8 -7
- helm/benchmark/metrics/code_metrics.py +5 -5
- helm/benchmark/metrics/comet_metric.py +125 -0
- helm/benchmark/metrics/common_metric_specs.py +9 -2
- helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +72 -0
- helm/benchmark/metrics/copyright_metrics.py +4 -4
- helm/benchmark/metrics/czech_bank_qa_metrics.py +29 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +2 -2
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +2 -2
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +2 -2
- helm/benchmark/metrics/dischargeme_metrics.py +34 -0
- helm/benchmark/metrics/disinformation_metrics.py +4 -4
- helm/benchmark/metrics/dry_run_metrics.py +5 -5
- helm/benchmark/metrics/efficiency_metrics.py +3 -3
- helm/benchmark/metrics/ehr_sql_metrics.py +103 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +3 -3
- helm/benchmark/metrics/evaluate_reference_metrics.py +144 -16
- helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +103 -0
- helm/benchmark/metrics/gpt4_audio_critique_metrics.py +167 -0
- helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +36 -0
- helm/benchmark/metrics/ifeval/instructions.py +1574 -0
- helm/benchmark/metrics/ifeval/instructions_registry.py +182 -0
- helm/benchmark/metrics/ifeval/instructions_registry.pyi +3 -0
- helm/benchmark/metrics/ifeval/instructions_util.py +153 -0
- helm/benchmark/metrics/ifeval_metrics.py +55 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/detection_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +1 -1
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +1 -1
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +1 -1
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/q16/test_q16.py +3 -1
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +1 -1
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +2 -2
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +1 -1
- helm/benchmark/metrics/image_generation/watermark_metrics.py +1 -1
- helm/benchmark/metrics/instruction_following_critique_metrics.py +4 -4
- helm/benchmark/metrics/language_modeling_metrics.py +4 -4
- helm/benchmark/metrics/machine_translation_metrics.py +2 -2
- helm/benchmark/metrics/med_dialog_metrics.py +34 -0
- helm/benchmark/metrics/medalign_metrics.py +34 -0
- helm/benchmark/metrics/medcalc_bench_metrics.py +124 -0
- helm/benchmark/metrics/medec_metrics.py +101 -0
- helm/benchmark/metrics/medi_qa_metrics.py +34 -0
- helm/benchmark/metrics/medication_qa_metrics.py +15 -4
- helm/benchmark/metrics/mental_health_metrics.py +34 -0
- helm/benchmark/metrics/metric.py +3 -3
- helm/benchmark/metrics/mimic_rrs_metrics.py +34 -0
- helm/benchmark/metrics/mimiciv_billing_code_metrics.py +96 -0
- helm/benchmark/metrics/mtsamples_procedures_metrics.py +34 -0
- helm/benchmark/metrics/mtsamples_replicate_metrics.py +34 -0
- helm/benchmark/metrics/nltk_helper.py +32 -0
- helm/benchmark/metrics/numeracy_metrics.py +4 -4
- helm/benchmark/metrics/omni_math_metrics.py +32 -0
- helm/benchmark/metrics/output_processing_metric.py +60 -0
- helm/benchmark/metrics/output_processors.py +15 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +2 -2
- helm/benchmark/metrics/ranking_metrics.py +3 -3
- helm/benchmark/metrics/reference_metric.py +3 -3
- helm/benchmark/metrics/safety_metrics.py +39 -17
- helm/benchmark/metrics/{bhasa_metrics.py → seahelm_metrics.py} +3 -3
- helm/benchmark/metrics/seahelm_metrics_specs.py +10 -0
- helm/benchmark/metrics/spider_metrics.py +7 -0
- helm/benchmark/metrics/starr_patient_instructions_metrics.py +34 -0
- helm/benchmark/metrics/statistic.py +1 -1
- helm/benchmark/metrics/summac/model_summac.py +1 -1
- helm/benchmark/metrics/summarization_critique_metrics.py +4 -4
- helm/benchmark/metrics/summarization_metrics.py +19 -9
- helm/benchmark/metrics/test_bias_metrics.py +5 -1
- helm/benchmark/metrics/test_classification_metrics.py +140 -68
- helm/benchmark/metrics/test_evaluate_reference_metrics.py +15 -0
- helm/benchmark/metrics/test_metric.py +1 -1
- helm/benchmark/metrics/test_statistic.py +2 -2
- helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +6 -6
- helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/free_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/toxicity_metrics.py +4 -4
- helm/benchmark/metrics/unitxt_metrics.py +21 -4
- helm/benchmark/metrics/vision_language/image_metrics.py +7 -3
- helm/benchmark/metrics/wildbench_metrics.py +34 -0
- helm/benchmark/model_metadata_registry.py +16 -0
- helm/benchmark/presentation/create_plots.py +1 -1
- helm/benchmark/presentation/schema.py +3 -0
- helm/benchmark/presentation/summarize.py +119 -256
- helm/benchmark/presentation/test_summarize.py +145 -3
- helm/benchmark/presentation/torr_robustness_summarizer.py +178 -0
- helm/benchmark/reeval_run.py +203 -0
- helm/benchmark/reeval_runner.py +355 -0
- helm/benchmark/run.py +8 -17
- helm/benchmark/run_expander.py +105 -8
- helm/benchmark/run_spec_factory.py +12 -0
- helm/benchmark/run_specs/air_bench_run_specs.py +21 -3
- helm/benchmark/run_specs/audio_run_specs.py +613 -0
- helm/benchmark/run_specs/call_center_run_specs.py +49 -0
- helm/benchmark/run_specs/capabilities_run_specs.py +308 -0
- helm/benchmark/run_specs/classic_run_specs.py +1 -69
- helm/benchmark/run_specs/enem_challenge_specs.py +31 -0
- helm/benchmark/run_specs/enterprise_run_specs.py +260 -0
- helm/benchmark/run_specs/experimental_run_specs.py +112 -3
- helm/benchmark/run_specs/finance_run_specs.py +6 -2
- helm/benchmark/run_specs/imdb_ptbr_run_specs.py +30 -0
- helm/benchmark/run_specs/lite_run_specs.py +2 -2
- helm/benchmark/run_specs/long_context_run_specs.py +89 -0
- helm/benchmark/run_specs/medhelm_run_specs.py +1155 -0
- helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py +49 -0
- helm/benchmark/run_specs/oab_exams_specs.py +32 -0
- helm/benchmark/run_specs/safety_run_specs.py +37 -0
- helm/benchmark/run_specs/{bhasa_run_specs.py → seahelm_run_specs.py} +66 -52
- helm/benchmark/run_specs/sql_run_specs.py +54 -0
- helm/benchmark/run_specs/tweetsentbr_run_specs.py +32 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +14 -5
- helm/benchmark/run_specs/vlm_run_specs.py +83 -5
- helm/benchmark/run_specs/winogrande_afr_run_specs.py +47 -0
- helm/benchmark/scenarios/aci_bench_scenario.py +120 -0
- helm/benchmark/scenarios/air_bench_scenario.py +6 -1
- helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +5 -3
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +1 -1
- helm/benchmark/scenarios/audio_language/__init__.py +0 -0
- helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py +128 -0
- helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py +154 -0
- helm/benchmark/scenarios/audio_language/ami_scenario.py +96 -0
- helm/benchmark/scenarios/audio_language/audio_mnist_scenario.py +62 -0
- helm/benchmark/scenarios/audio_language/audio_pairs_scenario.py +62 -0
- helm/benchmark/scenarios/audio_language/audiocaps_scenario.py +59 -0
- helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py +152 -0
- helm/benchmark/scenarios/audio_language/common_voice_15_scenario.py +99 -0
- helm/benchmark/scenarios/audio_language/covost2_scenario.py +163 -0
- helm/benchmark/scenarios/audio_language/fleurs_fairness_scenario.py +83 -0
- helm/benchmark/scenarios/audio_language/fleurs_scenario.py +312 -0
- helm/benchmark/scenarios/audio_language/iemocap_audio_scenario.py +83 -0
- helm/benchmark/scenarios/audio_language/librispeech_fairness_scenario.py +96 -0
- helm/benchmark/scenarios/audio_language/librispeech_scenario.py +80 -0
- helm/benchmark/scenarios/audio_language/meld_audio_scenario.py +113 -0
- helm/benchmark/scenarios/audio_language/multilingual_librispeech_scenario.py +80 -0
- helm/benchmark/scenarios/audio_language/mustard_scenario.py +142 -0
- helm/benchmark/scenarios/audio_language/mutox_scenario.py +254 -0
- helm/benchmark/scenarios/audio_language/parade_scenario.py +97 -0
- helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py +124 -0
- helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py +69 -0
- helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py +87 -0
- helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py +106 -0
- helm/benchmark/scenarios/autobencher_capabilities_scenario.py +68 -0
- helm/benchmark/scenarios/autobencher_safety_scenario.py +51 -0
- helm/benchmark/scenarios/babi_qa_scenario.py +1 -1
- helm/benchmark/scenarios/banking77_scenario.py +6 -1
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/big_bench_scenario.py +11 -1
- helm/benchmark/scenarios/bigcodebench_scenario.py +58 -0
- helm/benchmark/scenarios/bird_sql_scenario.py +94 -0
- helm/benchmark/scenarios/bird_sql_scenario_helper.py +118 -0
- helm/benchmark/scenarios/blimp_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +1 -1
- helm/benchmark/scenarios/boolq_scenario.py +1 -1
- helm/benchmark/scenarios/casehold_scenario.py +79 -0
- helm/benchmark/scenarios/chw_care_plan_scenario.py +105 -0
- helm/benchmark/scenarios/civil_comments_scenario.py +1 -1
- helm/benchmark/scenarios/clear_scenario.py +153 -0
- helm/benchmark/scenarios/cleva_scenario.py +2 -2
- helm/benchmark/scenarios/code_scenario.py +17 -4
- helm/benchmark/scenarios/commonsense_scenario.py +1 -1
- helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +97 -0
- helm/benchmark/scenarios/copyright_scenario.py +1 -1
- helm/benchmark/scenarios/covid_dialog_scenario.py +10 -1
- helm/benchmark/scenarios/cti_to_mitre_scenario.py +240 -0
- helm/benchmark/scenarios/custom_mcqa_scenario.py +1 -1
- helm/benchmark/scenarios/czech_bank_qa_scenario.py +130 -0
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +1 -1
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +1 -1
- helm/benchmark/scenarios/dialogue_scenarios.py +13 -2
- helm/benchmark/scenarios/dischargeme_scenario.py +157 -0
- helm/benchmark/scenarios/disinformation_scenario.py +10 -1
- helm/benchmark/scenarios/dyck_language_scenario.py +10 -1
- helm/benchmark/scenarios/echr_judgment_classification_scenario.py +113 -0
- helm/benchmark/scenarios/ehr_sql_scenario.py +131 -0
- helm/benchmark/scenarios/ehrshot_scenario.py +1546 -0
- helm/benchmark/scenarios/enem_challenge_scenario.py +58 -0
- helm/benchmark/scenarios/entity_data_imputation_scenario.py +11 -1
- helm/benchmark/scenarios/entity_matching_scenario.py +12 -2
- helm/benchmark/scenarios/financial_phrasebank_scenario.py +94 -0
- helm/benchmark/scenarios/gold_commodity_news_scenario.py +124 -0
- helm/benchmark/scenarios/gpqa_scenario.py +80 -0
- helm/benchmark/scenarios/grammar_scenario.py +2 -2
- helm/benchmark/scenarios/gsm_scenario.py +10 -1
- helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +50 -0
- helm/benchmark/scenarios/harm_bench_scenario.py +1 -1
- helm/benchmark/scenarios/headqa_scenario.py +131 -0
- helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +37 -0
- helm/benchmark/scenarios/ice_scenario.py +8 -4
- helm/benchmark/scenarios/ifeval_scenario.py +53 -0
- helm/benchmark/scenarios/imdb_ptbr_scenario.py +60 -0
- helm/benchmark/scenarios/imdb_scenario.py +11 -2
- helm/benchmark/scenarios/infinite_bench_sum_scenario.py +82 -0
- helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py +2 -2
- helm/benchmark/scenarios/koala_scenario.py +1 -1
- helm/benchmark/scenarios/legal_contract_summarization_scenario.py +129 -0
- helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py +77 -0
- helm/benchmark/scenarios/legal_summarization_scenario.py +11 -1
- helm/benchmark/scenarios/legal_support_scenario.py +11 -1
- helm/benchmark/scenarios/legalbench_scenario.py +22 -3
- helm/benchmark/scenarios/lex_glue_scenario.py +12 -2
- helm/benchmark/scenarios/lextreme_scenario.py +11 -1
- helm/benchmark/scenarios/live_qa_scenario.py +1 -1
- helm/benchmark/scenarios/lm_entry_scenario.py +1 -1
- helm/benchmark/scenarios/lsat_qa_scenario.py +1 -1
- helm/benchmark/scenarios/math_scenario.py +9 -1
- helm/benchmark/scenarios/me_q_sum_scenario.py +10 -1
- helm/benchmark/scenarios/med_dialog_scenario.py +22 -24
- helm/benchmark/scenarios/med_mcqa_scenario.py +10 -1
- helm/benchmark/scenarios/med_paragraph_simplification_scenario.py +10 -1
- helm/benchmark/scenarios/med_qa_scenario.py +10 -1
- helm/benchmark/scenarios/medalign_scenario.py +88 -0
- helm/benchmark/scenarios/medalign_scenario_helper.py +429 -0
- helm/benchmark/scenarios/medbullets_scenario.py +140 -0
- helm/benchmark/scenarios/medcalc_bench_scenario.py +125 -0
- helm/benchmark/scenarios/medec_scenario.py +120 -0
- helm/benchmark/scenarios/medhallu_scenario.py +66 -0
- helm/benchmark/scenarios/medi_qa_scenario.py +105 -0
- helm/benchmark/scenarios/medication_qa_scenario.py +2 -2
- helm/benchmark/scenarios/mental_health_scenario.py +112 -0
- helm/benchmark/scenarios/mimic_bhc_scenario.py +98 -0
- helm/benchmark/scenarios/mimic_rrs_scenario.py +89 -0
- helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +71 -0
- helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py +74 -0
- helm/benchmark/scenarios/mmlu_pro_scenario.py +95 -0
- helm/benchmark/scenarios/mmlu_scenario.py +11 -1
- helm/benchmark/scenarios/msmarco_scenario.py +1 -1
- helm/benchmark/scenarios/mtsamples_procedures_scenario.py +141 -0
- helm/benchmark/scenarios/mtsamples_replicate_scenario.py +141 -0
- helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +271 -0
- helm/benchmark/scenarios/narrativeqa_scenario.py +1 -1
- helm/benchmark/scenarios/natural_qa_scenario.py +1 -1
- helm/benchmark/scenarios/newsqa_scenario.py +1 -1
- helm/benchmark/scenarios/numeracy_scenario.py +10 -1
- helm/benchmark/scenarios/oab_exams_scenario.py +57 -0
- helm/benchmark/scenarios/omni_math_scenario.py +53 -0
- helm/benchmark/scenarios/open_assistant_scenario.py +11 -2
- helm/benchmark/scenarios/opinions_qa_scenario.py +1 -1
- helm/benchmark/scenarios/pubmed_qa_scenario.py +54 -43
- helm/benchmark/scenarios/quac_scenario.py +10 -1
- helm/benchmark/scenarios/race_based_med_scenario.py +142 -0
- helm/benchmark/scenarios/raft_scenario.py +18 -3
- helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +1 -1
- helm/benchmark/scenarios/ruler_qa_scenario_helper.py +171 -0
- helm/benchmark/scenarios/ruler_qa_scenarios.py +88 -0
- helm/benchmark/scenarios/scenario.py +9 -1
- helm/benchmark/scenarios/{bhasa_scenario.py → seahelm_scenario.py} +233 -84
- helm/benchmark/scenarios/self_instruct_scenario.py +1 -1
- helm/benchmark/scenarios/shc_bmt_scenario.py +69 -0
- helm/benchmark/scenarios/shc_cdi_scenario.py +70 -0
- helm/benchmark/scenarios/shc_conf_scenario.py +70 -0
- helm/benchmark/scenarios/shc_ent_scenario.py +72 -0
- helm/benchmark/scenarios/shc_gip_scenario.py +66 -0
- helm/benchmark/scenarios/shc_ptbm_scenario.py +76 -0
- helm/benchmark/scenarios/shc_sei_scenario.py +89 -0
- helm/benchmark/scenarios/shc_sequoia_scenario.py +69 -0
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +1 -1
- helm/benchmark/scenarios/spider_scenario.py +91 -0
- helm/benchmark/scenarios/starr_patient_instructions_scenario.py +90 -0
- helm/benchmark/scenarios/summarization_scenario.py +11 -1
- helm/benchmark/scenarios/sumosum_scenario.py +157 -0
- helm/benchmark/scenarios/synthetic_efficiency_scenario.py +1 -1
- helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +11 -1
- helm/benchmark/scenarios/synthetic_reasoning_scenario.py +11 -1
- helm/benchmark/scenarios/test_bigcodebench_scenario.py +26 -0
- helm/benchmark/scenarios/test_czech_bank_qa_scenario.py +18 -0
- helm/benchmark/scenarios/test_enem_challenge_scenario.py +53 -0
- helm/benchmark/scenarios/test_ewok_scenario.py +6 -2
- helm/benchmark/scenarios/test_gold_commodity_news_scenario.py +18 -0
- helm/benchmark/scenarios/test_gpqa_scenario.py +44 -0
- helm/benchmark/scenarios/test_ifeval_scenario.py +36 -0
- helm/benchmark/scenarios/test_imdb_ptbr_scenario.py +27 -0
- helm/benchmark/scenarios/test_infinite_bench_sum_scenario.py +46 -0
- helm/benchmark/scenarios/test_math_scenario.py +1 -0
- helm/benchmark/scenarios/test_mmlu_clinical_afr_scenario.py +21 -0
- helm/benchmark/scenarios/test_mmlu_pro_scenario.py +53 -0
- helm/benchmark/scenarios/test_oab_exams_scenario.py +51 -0
- helm/benchmark/scenarios/test_omni_math_scenario.py +27 -0
- helm/benchmark/scenarios/test_tweetsentbr_scenario.py +24 -0
- helm/benchmark/scenarios/test_wildbench_scenario.py +15 -0
- helm/benchmark/scenarios/test_winogrande_afr_scenario.py +19 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +10 -1
- helm/benchmark/scenarios/the_pile_scenario.py +1 -1
- helm/benchmark/scenarios/truthful_qa_scenario.py +10 -1
- helm/benchmark/scenarios/tweetsentbr_scenario.py +66 -0
- helm/benchmark/scenarios/twitter_aae_scenario.py +1 -1
- helm/benchmark/scenarios/unitxt_scenario.py +8 -2
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +1 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/blink_scenario.py +140 -0
- helm/benchmark/scenarios/vision_language/mm_star_scenario.py +95 -0
- helm/benchmark/scenarios/vision_language/vqa_rad_scenario.py +88 -0
- helm/benchmark/scenarios/wikifact_scenario.py +11 -1
- helm/benchmark/scenarios/wikitext_103_scenario.py +1 -1
- helm/benchmark/scenarios/wildbench_scenario.py +83 -0
- helm/benchmark/scenarios/winogrande_afr_scenario.py +78 -0
- helm/benchmark/scenarios/wmt_14_scenario.py +14 -2
- helm/benchmark/scenarios/xstest_scenario.py +1 -1
- helm/benchmark/server.py +11 -0
- helm/benchmark/slurm_runner.py +1 -1
- helm/benchmark/static/schema_audio.yaml +752 -0
- helm/benchmark/static/schema_autobencher.yaml +150 -0
- helm/benchmark/static/schema_call_center.yaml +97 -60
- helm/benchmark/static/schema_capabilities.yaml +254 -0
- helm/benchmark/static/schema_czech_bank.yaml +148 -0
- helm/benchmark/static/schema_enem_challenge.yaml +146 -0
- helm/benchmark/static/schema_enterprise.yaml +298 -0
- helm/benchmark/static/schema_finance.yaml +14 -12
- helm/benchmark/static/schema_heim.yaml +1389 -0
- helm/benchmark/static/schema_legal.yaml +566 -0
- helm/benchmark/static/{schema_medical.yaml → schema_long_context.yaml} +67 -82
- helm/benchmark/static/schema_medhelm.yaml +1081 -0
- helm/benchmark/static/schema_mmlu_winogrande_afr.yaml +1045 -0
- helm/benchmark/static/schema_safety.yaml +42 -6
- helm/benchmark/static/{schema_bhasa.yaml → schema_seahelm.yaml} +40 -26
- helm/benchmark/static/schema_social_audio.yaml +224 -0
- helm/benchmark/static/schema_sql.yaml +171 -0
- helm/benchmark/static/{schema_tables.yaml → schema_torr.yaml} +187 -30
- helm/benchmark/static/schema_tweetsentbr.yaml +146 -0
- helm/benchmark/static/schema_vhelm.yaml +151 -47
- helm/benchmark/static_build/assets/helm-safety-2907a7b6.png +0 -0
- helm/benchmark/static_build/assets/index-262903c1.js +10 -0
- helm/benchmark/static_build/assets/index-42060d71.css +1 -0
- helm/benchmark/static_build/assets/medhelm-overview-3ddfcd65.png +0 -0
- helm/benchmark/static_build/assets/{react-d4a0b69b.js → react-f82877fd.js} +1 -1
- helm/benchmark/static_build/assets/{recharts-6d337683.js → recharts-4037aff0.js} +1 -1
- helm/benchmark/static_build/assets/{tremor-54a99cc4.js → tremor-9cefc3c5.js} +1 -1
- helm/benchmark/static_build/assets/vhelm-aspects-1437d673.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-a1ca3f3f.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-8afb7616.png +0 -0
- helm/benchmark/static_build/config.js +1 -1
- helm/benchmark/static_build/index.html +5 -5
- helm/benchmark/window_services/default_window_service.py +1 -1
- helm/benchmark/window_services/encoder_decoder_window_service.py +1 -1
- helm/benchmark/window_services/ice_window_service.py +1 -1
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +1 -1
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +1 -1
- helm/benchmark/window_services/local_window_service.py +2 -2
- helm/benchmark/window_services/test_anthropic_window_service.py +3 -3
- helm/benchmark/window_services/test_bloom_window_service.py +3 -3
- helm/benchmark/window_services/test_gpt2_window_service.py +7 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +8 -3
- helm/benchmark/window_services/test_gptj_window_service.py +8 -3
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -3
- helm/benchmark/window_services/test_openai_window_service.py +8 -3
- helm/benchmark/window_services/test_opt_window_service.py +3 -3
- helm/benchmark/window_services/test_palmyra_window_service.py +3 -3
- helm/benchmark/window_services/test_t0pp_window_service.py +3 -3
- helm/benchmark/window_services/test_t511b_window_service.py +3 -3
- helm/benchmark/window_services/test_ul2_window_service.py +3 -3
- helm/benchmark/window_services/test_utils.py +1 -1
- helm/benchmark/window_services/test_yalm_window_service.py +3 -3
- helm/benchmark/window_services/tokenizer_service.py +0 -5
- helm/benchmark/window_services/yalm_window_service.py +1 -1
- helm/clients/ai21_client.py +3 -3
- helm/clients/aleph_alpha_client.py +1 -1
- helm/clients/audio_language/__init__.py +0 -0
- helm/clients/audio_language/diva_llama_client.py +118 -0
- helm/clients/audio_language/llama_omni_client.py +198 -0
- helm/clients/audio_language/qwen2_audiolm_client.py +188 -0
- helm/clients/audio_language/qwen_audiolm_client.py +150 -0
- helm/clients/auto_client.py +4 -2
- helm/clients/azure_openai_client.py +55 -0
- helm/clients/bedrock_client.py +201 -7
- helm/clients/bedrock_utils.py +33 -0
- helm/clients/clip_scorers/clip_scorer.py +1 -1
- helm/clients/clip_scorers/multilingual_clip_scorer.py +1 -1
- helm/clients/cohere_client.py +3 -3
- helm/clients/google_client.py +1 -1
- helm/clients/http_model_client.py +1 -1
- helm/clients/huggingface_client.py +10 -18
- helm/clients/ibm_client.py +267 -0
- helm/clients/image_generation/adobe_vision_client.py +1 -1
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +1 -1
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +3 -3
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +5 -2
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +5 -2
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +2 -2
- helm/clients/image_generation/cogview2_client.py +1 -1
- helm/clients/image_generation/dalle2_client.py +1 -1
- helm/clients/image_generation/dalle3_client.py +2 -2
- helm/clients/image_generation/dalle_mini/__init__.py +1 -1
- helm/clients/image_generation/dalle_mini/data.py +1 -1
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -5
- helm/clients/image_generation/dalle_mini/model/configuration.py +1 -1
- helm/clients/image_generation/dalle_mini/model/modeling.py +2 -2
- helm/clients/image_generation/dalle_mini/model/processor.py +4 -4
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +1 -1
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -1
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +2 -2
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +1 -1
- helm/clients/image_generation/dalle_mini_client.py +1 -1
- helm/clients/image_generation/deep_floyd_client.py +1 -1
- helm/clients/image_generation/huggingface_diffusers_client.py +1 -1
- helm/clients/image_generation/lexica_client.py +1 -1
- helm/clients/image_generation/mindalle/models/__init__.py +6 -6
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +1 -1
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +1 -1
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -3
- helm/clients/image_generation/mindalle_client.py +1 -1
- helm/clients/image_generation/together_image_generation_client.py +1 -1
- helm/clients/lit_gpt_client.py +2 -2
- helm/clients/mistral_client.py +62 -18
- helm/clients/nvidia_nim_client.py +0 -3
- helm/clients/openai_client.py +255 -21
- helm/clients/palmyra_client.py +2 -6
- helm/clients/reka_client.py +1 -1
- helm/clients/stanfordhealthcare_azure_openai_client.py +58 -0
- helm/clients/stanfordhealthcare_claude_client.py +31 -0
- helm/clients/stanfordhealthcare_google_client.py +43 -0
- helm/clients/stanfordhealthcare_http_model_client.py +93 -0
- helm/clients/stanfordhealthcare_openai_client.py +62 -0
- helm/clients/stanfordhealthcare_shc_openai_client.py +42 -0
- helm/clients/test_client.py +1 -1
- helm/clients/test_together_client.py +6 -1
- helm/clients/together_client.py +69 -7
- helm/clients/upstage_client.py +23 -0
- helm/clients/vertexai_client.py +39 -13
- helm/clients/vision_language/open_flamingo/__init__.py +2 -2
- helm/clients/vision_language/open_flamingo/src/factory.py +3 -3
- helm/clients/vision_language/open_flamingo/src/flamingo.py +2 -2
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +2 -2
- helm/clients/vision_language/qwen2_vlm_client.py +175 -0
- helm/clients/vllm_client.py +4 -6
- helm/clients/yi_client.py +0 -3
- helm/common/audio_utils.py +111 -0
- helm/common/cache.py +8 -30
- helm/common/file_caches/local_file_cache.py +1 -1
- helm/common/file_caches/test_local_file_cache.py +1 -1
- helm/common/images_utils.py +2 -2
- helm/common/key_value_store.py +9 -9
- helm/common/media_object.py +2 -2
- helm/common/mongo_key_value_store.py +3 -3
- helm/common/multimodal_request_utils.py +26 -0
- helm/common/reeval_parameters.py +12 -0
- helm/common/request.py +6 -2
- helm/common/response_format.py +18 -0
- helm/common/test_cache.py +1 -48
- helm/common/test_media_object.py +1 -1
- helm/common/tokenization_request.py +0 -9
- helm/config/model_deployments.yaml +1258 -33
- helm/config/model_metadata.yaml +1110 -41
- helm/config/tokenizer_configs.yaml +403 -3
- helm/proxy/cli.py +2 -2
- helm/proxy/example_queries.py +1 -1
- helm/proxy/server.py +11 -13
- helm/proxy/services/remote_service.py +1 -7
- helm/proxy/services/server_service.py +6 -19
- helm/proxy/services/service.py +0 -6
- helm/proxy/services/test_remote_service.py +2 -2
- helm/proxy/services/test_service.py +1 -1
- helm/proxy/static/general.js +122 -0
- helm/proxy/static/help.html +99 -0
- helm/proxy/static/index.css +57 -0
- helm/proxy/static/index.html +40 -0
- helm/proxy/static/index.js +456 -0
- helm/proxy/static/info-icon.png +0 -0
- helm/proxy/test_retry.py +1 -1
- helm/proxy/token_counters/auto_token_counter.py +1 -1
- helm/tokenizers/aleph_alpha_tokenizer.py +1 -1
- helm/tokenizers/caching_tokenizer.py +2 -30
- helm/tokenizers/http_model_tokenizer.py +1 -1
- helm/tokenizers/huggingface_tokenizer.py +2 -2
- helm/tokenizers/lit_gpt_tokenizer.py +1 -1
- helm/tokenizers/test_anthropic_tokenizer.py +6 -2
- helm/tokenizers/test_huggingface_tokenizer.py +1 -1
- helm/tokenizers/test_yalm_tokenizer.py +1 -1
- helm/tokenizers/tiktoken_tokenizer.py +1 -1
- helm/tokenizers/tokenizer.py +3 -1
- helm/tokenizers/yalm_tokenizer.py +3 -3
- helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py +1 -1
- crfm_helm-0.5.3.dist-info/METADATA +0 -355
- crfm_helm-0.5.3.dist-info/RECORD +0 -699
- helm/benchmark/data_overlap/data_overlap_spec.py +0 -86
- helm/benchmark/data_overlap/export_scenario_text.py +0 -119
- helm/benchmark/data_overlap/light_scenario.py +0 -60
- helm/benchmark/metrics/bhasa_metrics_specs.py +0 -10
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
- helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/index-05c76bb1.css +0 -1
- helm/benchmark/static_build/assets/index-58f97dcd.js +0 -10
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/tokenizers/anthropic_tokenizer.py +0 -52
- {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info/licenses}/LICENSE +0 -0
- {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info}/top_level.txt +0 -0
- /helm/benchmark/{data_overlap → metrics/ifeval}/__init__.py +0 -0
helm/config/model_metadata.yaml
CHANGED
|
@@ -18,7 +18,7 @@ models:
|
|
|
18
18
|
access: open
|
|
19
19
|
release_date: 2023-01-01
|
|
20
20
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
# Adobe
|
|
23
23
|
- name: adobe/giga-gan
|
|
24
24
|
display_name: GigaGAN (1B)
|
|
@@ -128,7 +128,7 @@ models:
|
|
|
128
128
|
|
|
129
129
|
# AI Singapore
|
|
130
130
|
- name: aisingapore/sea-lion-7b
|
|
131
|
-
display_name: SEA-LION
|
|
131
|
+
display_name: SEA-LION 7B
|
|
132
132
|
description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
|
|
133
133
|
creator_organization_name: AI Singapore
|
|
134
134
|
access: open
|
|
@@ -137,7 +137,7 @@ models:
|
|
|
137
137
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
138
138
|
|
|
139
139
|
- name: aisingapore/sea-lion-7b-instruct
|
|
140
|
-
display_name: SEA-LION Instruct
|
|
140
|
+
display_name: SEA-LION 7B Instruct
|
|
141
141
|
description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
|
|
142
142
|
creator_organization_name: AI Singapore
|
|
143
143
|
access: open
|
|
@@ -145,6 +145,77 @@ models:
|
|
|
145
145
|
release_date: 2023-02-24
|
|
146
146
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
147
147
|
|
|
148
|
+
- name: aisingapore/llama3-8b-cpt-sea-lionv2-base
|
|
149
|
+
display_name: Llama3 8B CPT SEA-LIONv2
|
|
150
|
+
description: Llama3 8B CPT SEA-LIONv2 is a multilingual model which was continued pre-trained on 48B additional tokens, including tokens in Southeast Asian languages.
|
|
151
|
+
creator_organization_name: AI Singapore
|
|
152
|
+
access: open
|
|
153
|
+
num_parameters: 8030000000
|
|
154
|
+
release_date: 2024-07-31
|
|
155
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
156
|
+
|
|
157
|
+
- name: aisingapore/llama3-8b-cpt-sea-lionv2.1-instruct
|
|
158
|
+
display_name: Llama3 8B CPT SEA-LIONv2.1 Instruct
|
|
159
|
+
description: Llama3 8B CPT SEA-LIONv2.1 Instruct is a multilingual model which has been fine-tuned with around 100,000 English instruction-completion pairs alongside a smaller pool of around 50,000 instruction-completion pairs from other Southeast Asian languages, such as Indonesian, Thai and Vietnamese.
|
|
160
|
+
creator_organization_name: AI Singapore
|
|
161
|
+
access: open
|
|
162
|
+
num_parameters: 8030000000
|
|
163
|
+
release_date: 2024-08-21
|
|
164
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
165
|
+
|
|
166
|
+
- name: aisingapore/gemma2-9b-cpt-sea-lionv3-base
|
|
167
|
+
display_name: Gemma2 9B CPT SEA-LIONv3
|
|
168
|
+
description: Gemma2 9B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across the 11 official Southeast Asian languages, such as English, Chinese, Vietnamese, Indonesian, Thai, Tamil, Filipino, Malay, Khmer, Lao, Burmese.
|
|
169
|
+
creator_organization_name: AI Singapore
|
|
170
|
+
access: open
|
|
171
|
+
num_parameters: 9240000000
|
|
172
|
+
release_date: 2024-10-30
|
|
173
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
174
|
+
|
|
175
|
+
- name: aisingapore/gemma2-9b-cpt-sea-lionv3-instruct
|
|
176
|
+
display_name: Gemma2 9B CPT SEA-LIONv3 Instruct
|
|
177
|
+
description: Gemma2 9B CPT SEA-LIONv3 Instruct is a multilingual model which has been fine-tuned with around 500,000 English instruction-completion pairs alongside a larger pool of around 1,000,000 instruction-completion pairs from other ASEAN languages, such as Indonesian, Thai and Vietnamese.
|
|
178
|
+
creator_organization_name: AI Singapore
|
|
179
|
+
access: open
|
|
180
|
+
num_parameters: 9240000000
|
|
181
|
+
release_date: 2024-10-30
|
|
182
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
183
|
+
|
|
184
|
+
- name: aisingapore/llama3.1-8b-cpt-sea-lionv3-base
|
|
185
|
+
display_name: Llama3.1 8B CPT SEA-LIONv3
|
|
186
|
+
description: Llama3.1 8B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across 11 SEA languages, such as Burmese, Chinese, English, Filipino, Indonesia, Khmer, Lao, Malay, Tamil, Thai and Vietnamese.
|
|
187
|
+
creator_organization_name: AI Singapore
|
|
188
|
+
access: open
|
|
189
|
+
num_parameters: 9240000000
|
|
190
|
+
release_date: 2024-12-11
|
|
191
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
192
|
+
|
|
193
|
+
- name: aisingapore/llama3.1-8b-cpt-sea-lionv3-instruct
|
|
194
|
+
display_name: Llama3.1 8B CPT SEA-LIONv3 Instruct
|
|
195
|
+
description: Llama3.1 8B CPT SEA-LIONv3 Instruct is a multilingual model that has been fine-tuned in two stages on approximately 12.3M English instruction-completion pairs alongside a pool of 4.5M Southeast Asian instruction-completion pairs from SEA languages such as Indonesian, Javanese, Sundanese, Tamil, Thai and Vietnamese.
|
|
196
|
+
creator_organization_name: AI Singapore
|
|
197
|
+
access: open
|
|
198
|
+
num_parameters: 9240000000
|
|
199
|
+
release_date: 2024-12-11
|
|
200
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
201
|
+
|
|
202
|
+
- name: aisingapore/llama3.1-70b-cpt-sea-lionv3-base
|
|
203
|
+
display_name: Llama3.1 70B CPT SEA-LIONv3
|
|
204
|
+
description: Llama3.1 70B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across 11 SEA languages, such as Burmese, Chinese, English, Filipino, Indonesia, Khmer, Lao, Malay, Tamil, Thai and Vietnamese.
|
|
205
|
+
creator_organization_name: AI Singapore
|
|
206
|
+
access: open
|
|
207
|
+
num_parameters: 70600000000
|
|
208
|
+
release_date: 2024-12-11
|
|
209
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
210
|
+
|
|
211
|
+
- name: aisingapore/llama3.1-70b-cpt-sea-lionv3-instruct
|
|
212
|
+
display_name: Llama3.1 70B CPT SEA-LIONv3 Instruct
|
|
213
|
+
description: Llama3.1 70B CPT SEA-LIONv3 Instruct is a multilingual model that has been fine-tuned in two stages on approximately 12.3M English instruction-completion pairs alongside a pool of 4.5M Southeast Asian instruction-completion pairs from SEA languages such as Indonesian, Javanese, Sundanese, Tamil, Thai, and Vietnamese.
|
|
214
|
+
creator_organization_name: AI Singapore
|
|
215
|
+
access: open
|
|
216
|
+
num_parameters: 70600000000
|
|
217
|
+
release_date: 2024-12-11
|
|
218
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
148
219
|
|
|
149
220
|
# Aleph Alpha
|
|
150
221
|
# Aleph Alpha's Luminous models: https://docs.aleph-alpha.com/docs/introduction/luminous
|
|
@@ -202,7 +273,34 @@ models:
|
|
|
202
273
|
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
203
274
|
|
|
204
275
|
|
|
205
|
-
# Amazon
|
|
276
|
+
# Amazon Nova models
|
|
277
|
+
# References for Amazon Nova models:
|
|
278
|
+
# https://aws.amazon.com/ai/generative-ai/nova/
|
|
279
|
+
- name: amazon/nova-pro-v1:0
|
|
280
|
+
display_name: Amazon Nova Pro
|
|
281
|
+
description: Amazon Nova Pro Model
|
|
282
|
+
creator_organization_name: Amazon
|
|
283
|
+
access: limited
|
|
284
|
+
release_date: 2024-12-03
|
|
285
|
+
tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
286
|
+
|
|
287
|
+
- name: amazon/nova-lite-v1:0
|
|
288
|
+
display_name: Amazon Nova Lite
|
|
289
|
+
description: Amazon Nova Lite Model
|
|
290
|
+
creator_organization_name: Amazon
|
|
291
|
+
access: limited
|
|
292
|
+
release_date: 2024-12-03
|
|
293
|
+
tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
294
|
+
|
|
295
|
+
- name: amazon/nova-micro-v1:0
|
|
296
|
+
display_name: Amazon Nova Micro
|
|
297
|
+
description: Amazon Nova Micro Model
|
|
298
|
+
creator_organization_name: Amazon
|
|
299
|
+
access: limited
|
|
300
|
+
release_date: 2024-12-03
|
|
301
|
+
tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
302
|
+
|
|
303
|
+
# Titan Models
|
|
206
304
|
# References for Amazon Titan models:
|
|
207
305
|
# - https://aws.amazon.com/bedrock/titan/
|
|
208
306
|
# - https://community.aws/content/2ZUVD3fkNtqEOYIa2iUJAFArS7c/family-of-titan-text-models---cli-demo
|
|
@@ -213,16 +311,8 @@ models:
|
|
|
213
311
|
creator_organization_name: Amazon
|
|
214
312
|
access: limited
|
|
215
313
|
release_date: 2023-11-29
|
|
216
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
217
|
-
|
|
218
|
-
- name: amazon/titan-tg1-large
|
|
219
|
-
display_name: Amazon Titan Large
|
|
220
|
-
description: Amazon Titan Large is efficient model perfect for fine-tuning English-language tasks like summarization, create article, marketing campaign.
|
|
221
|
-
creator_organization_name: Amazon
|
|
222
|
-
access: limited
|
|
223
|
-
release_date: 2023-11-29
|
|
224
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
225
|
-
|
|
314
|
+
tags: [BEDROCK_MODEL_TAG,TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
315
|
+
|
|
226
316
|
- name: amazon/titan-text-express-v1
|
|
227
317
|
display_name: Amazon Titan Text Express
|
|
228
318
|
description: Amazon Titan Text Express, with a context length of up to 8,000 tokens, excels in advanced language tasks like open-ended text generation and conversational chat. It's also optimized for Retrieval Augmented Generation (RAG). Initially designed for English, the model offers preview multilingual support for over 100 additional languages.
|
|
@@ -231,6 +321,93 @@ models:
|
|
|
231
321
|
release_date: 2023-11-29
|
|
232
322
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
233
323
|
|
|
324
|
+
# Mistral Models on Bedrock
|
|
325
|
+
# References for Mistral on Amazon Bedrock
|
|
326
|
+
# https://aws.amazon.com/bedrock/mistral/
|
|
327
|
+
|
|
328
|
+
- name: mistralai/amazon-mistral-7b-instruct-v0:2
|
|
329
|
+
display_name: Mistral 7B Instruct on Amazon Bedrock
|
|
330
|
+
description: A 7B dense Transformer, fast-deployed and easily customisable. Small, yet powerful for a variety of use cases. Supports English and code, and a 32k context window.
|
|
331
|
+
creator_organization_name: Mistral
|
|
332
|
+
access: limited
|
|
333
|
+
release_date: 2024-03-23
|
|
334
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
335
|
+
|
|
336
|
+
- name: mistralai/amazon-mixtral-8x7b-instruct-v0:1
|
|
337
|
+
display_name: Mixtral 8x7B Instruct on Amazon Bedrock
|
|
338
|
+
description: A 7B sparse Mixture-of-Experts model with stronger capabilities than Mistral 7B. Uses 12B active parameters out of 45B total. Supports multiple languages, code and 32k context window.
|
|
339
|
+
creator_organization_name: Mistral
|
|
340
|
+
access: limited
|
|
341
|
+
release_date: 2023-12-11
|
|
342
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
343
|
+
|
|
344
|
+
- name: mistralai/amazon-mistral-large-2402-v1:0
|
|
345
|
+
display_name: Mistral Large(2402) on Amazon Bedrock
|
|
346
|
+
description: The most advanced Mistral AI Large Language model capable of handling any language task including complex multilingual reasoning, text understanding, transformation, and code generation.
|
|
347
|
+
creator_organization_name: Mistral
|
|
348
|
+
access: limited
|
|
349
|
+
release_date: 2023-07-26
|
|
350
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
351
|
+
|
|
352
|
+
- name: mistralai/amazon-mistral-small-2402-v1:0
|
|
353
|
+
display_name: Mistral Small on Amazon Bedrock
|
|
354
|
+
description: Mistral Small is perfectly suited for straightforward tasks that can be performed in bulk, such as classification, customer support, or text generation. It provides outstanding performance at a cost-effective price point.
|
|
355
|
+
creator_organization_name: Mistral
|
|
356
|
+
access: limited
|
|
357
|
+
release_date: 2023-02-26
|
|
358
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
359
|
+
|
|
360
|
+
- name: mistralai/amazon-mistral-large-2407-v1:0
|
|
361
|
+
display_name: Mistral Large(2407) on Amazon Bedrock
|
|
362
|
+
description: Mistral Large 2407 is an advanced Large Language Model (LLM) that supports dozens of languages and is trained on 80+ coding languages. It has best-in-class agentic capabilities with native function calling JSON outputting and reasoning capabilities.
|
|
363
|
+
creator_organization_name: Mistral
|
|
364
|
+
access: limited
|
|
365
|
+
release_date: 2024-07-24
|
|
366
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
367
|
+
|
|
368
|
+
# Llama3 on Amazon Bedrock
|
|
369
|
+
# References for Llama3 on Amazon Bedrock
|
|
370
|
+
# https://aws.amazon.com/bedrock/llama/
|
|
371
|
+
|
|
372
|
+
- name: meta/amazon-llama3-8b-instruct-v1:0
|
|
373
|
+
display_name: Llama 3 8B Instruct on Amazon Bedrock
|
|
374
|
+
description: Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for limited computational power and resources, edge devices, and faster training times.
|
|
375
|
+
creator_organization_name: Meta
|
|
376
|
+
access: limited
|
|
377
|
+
release_date: 2024-04-23
|
|
378
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
379
|
+
|
|
380
|
+
- name: meta/amazon-llama3-70b-instruct-v1:0
|
|
381
|
+
display_name: Llama 3 70B Instruct on Amazon Bedrock
|
|
382
|
+
description: Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for content creation, conversational AI, language understanding, R&D, and Enterprise applications.
|
|
383
|
+
creator_organization_name: Meta
|
|
384
|
+
access: limited
|
|
385
|
+
release_date: 2024-04-23
|
|
386
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
387
|
+
|
|
388
|
+
- name: meta/amazon-llama3-1-405b-instruct-v1:0
|
|
389
|
+
display_name: Llama 3.1 405b Instruct on Amazon Bedrock.
|
|
390
|
+
description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
|
|
391
|
+
creator_organization_name: Meta
|
|
392
|
+
access: limited
|
|
393
|
+
release_date: 2024-07-26
|
|
394
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
395
|
+
|
|
396
|
+
- name: meta/amazon-llama3-1-70b-instruct-v1:0
|
|
397
|
+
display_name: Llama 3.1 70b Instruct on Amazon Bedrock.
|
|
398
|
+
description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
|
|
399
|
+
creator_organization_name: Meta
|
|
400
|
+
access: limited
|
|
401
|
+
release_date: 2024-07-26
|
|
402
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
403
|
+
|
|
404
|
+
- name: meta/amazon-llama3-1-8b-instruct-v1:0
|
|
405
|
+
display_name: Llama 3.1 8b Instruct on Amazon Bedrock.
|
|
406
|
+
description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
|
|
407
|
+
creator_organization_name: Meta
|
|
408
|
+
access: limited
|
|
409
|
+
release_date: 2024-07-26
|
|
410
|
+
tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
234
411
|
|
|
235
412
|
# Anthropic
|
|
236
413
|
- name: anthropic/claude-v1.3
|
|
@@ -298,6 +475,14 @@ models:
|
|
|
298
475
|
release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
|
|
299
476
|
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
300
477
|
|
|
478
|
+
- name: anthropic/claude-3-5-haiku-20241022
|
|
479
|
+
display_name: Claude 3.5 Haiku (20241022)
|
|
480
|
+
description: Claude 3.5 Haiku is a Claude 3 family model which matches the performance of Claude 3 Opus at a similar speed to the previous generation of Haiku ([blog](https://www.anthropic.com/news/3-5-models-and-computer-use)).
|
|
481
|
+
creator_organization_name: Anthropic
|
|
482
|
+
access: limited
|
|
483
|
+
release_date: 2024-11-04 # Released after the blog post
|
|
484
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
485
|
+
|
|
301
486
|
- name: anthropic/claude-3-5-sonnet-20240620
|
|
302
487
|
display_name: Claude 3.5 Sonnet (20240620)
|
|
303
488
|
description: Claude 3.5 Sonnet is a Claude 3 family model which outperforms Claude 3 Opus while operating faster and at a lower cost. ([blog](https://www.anthropic.com/news/claude-3-5-sonnet))
|
|
@@ -306,6 +491,22 @@ models:
|
|
|
306
491
|
release_date: 2024-06-20
|
|
307
492
|
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
308
493
|
|
|
494
|
+
- name: anthropic/claude-3-5-sonnet-20241022
|
|
495
|
+
display_name: Claude 3.5 Sonnet (20241022)
|
|
496
|
+
description: Claude 3.5 Sonnet is a Claude 3 family model which outperforms Claude 3 Opus while operating faster and at a lower cost ([blog](https://www.anthropic.com/news/claude-3-5-sonnet)). This is an upgraded snapshot released on 2024-10-22 ([blog](https://www.anthropic.com/news/3-5-models-and-computer-use)).
|
|
497
|
+
creator_organization_name: Anthropic
|
|
498
|
+
access: limited
|
|
499
|
+
release_date: 2024-10-22
|
|
500
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
501
|
+
|
|
502
|
+
- name: anthropic/claude-3-7-sonnet-20250219
|
|
503
|
+
display_name: Claude 3.7 Sonnet (20250219)
|
|
504
|
+
description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)).
|
|
505
|
+
creator_organization_name: Anthropic
|
|
506
|
+
access: limited
|
|
507
|
+
release_date: 2025-02-24
|
|
508
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
509
|
+
|
|
309
510
|
- name: anthropic/stanford-online-all-v4-s3
|
|
310
511
|
display_name: Anthropic-LM v4-s3 (52B)
|
|
311
512
|
description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
|
|
@@ -325,7 +526,7 @@ models:
|
|
|
325
526
|
access: open
|
|
326
527
|
num_parameters: 13000000000
|
|
327
528
|
release_date: 2022-04-03
|
|
328
|
-
tags: [] # TODO: add tags
|
|
529
|
+
tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
|
|
329
530
|
|
|
330
531
|
|
|
331
532
|
|
|
@@ -346,7 +547,7 @@ models:
|
|
|
346
547
|
access: open
|
|
347
548
|
num_parameters: 176000000000
|
|
348
549
|
release_date: 2022-11-03
|
|
349
|
-
tags: [] # TODO: add tags
|
|
550
|
+
tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
|
|
350
551
|
|
|
351
552
|
- name: bigscience/t0pp
|
|
352
553
|
display_name: T0pp (11B)
|
|
@@ -401,7 +602,7 @@ models:
|
|
|
401
602
|
access: limited
|
|
402
603
|
num_parameters: 6700000000
|
|
403
604
|
release_date: 2023-04-06
|
|
404
|
-
tags: [] # TODO: add tags
|
|
605
|
+
tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
|
|
405
606
|
|
|
406
607
|
- name: cerebras/cerebras-gpt-13b # NOT SUPPORTED
|
|
407
608
|
display_name: Cerebras GPT (13B)
|
|
@@ -410,7 +611,7 @@ models:
|
|
|
410
611
|
access: limited
|
|
411
612
|
num_parameters: 13000000000
|
|
412
613
|
release_date: 2023-04-06
|
|
413
|
-
tags: [] # TODO: add tags
|
|
614
|
+
tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
|
|
414
615
|
|
|
415
616
|
|
|
416
617
|
|
|
@@ -627,7 +828,7 @@ models:
|
|
|
627
828
|
access: closed
|
|
628
829
|
num_parameters: 280000000000
|
|
629
830
|
release_date: 2021-12-08
|
|
630
|
-
tags: []
|
|
831
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
631
832
|
|
|
632
833
|
- name: deepmind/chinchilla # NOT SUPPORTED
|
|
633
834
|
display_name: Chinchilla (70B)
|
|
@@ -636,7 +837,7 @@ models:
|
|
|
636
837
|
access: closed
|
|
637
838
|
num_parameters: 70000000000
|
|
638
839
|
release_date: 2022-03-31
|
|
639
|
-
tags: []
|
|
840
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
640
841
|
|
|
641
842
|
|
|
642
843
|
# Deepseek
|
|
@@ -649,7 +850,36 @@ models:
|
|
|
649
850
|
release_date: 2024-01-05
|
|
650
851
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
651
852
|
|
|
853
|
+
- name: deepseek-ai/deepseek-v3
|
|
854
|
+
display_name: DeepSeek v3
|
|
855
|
+
description: DeepSeek v3 a Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. It adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures. ([paper](https://github.com/deepseek-ai/DeepSeek-V3/blob/main/DeepSeek_V3.pdf))
|
|
856
|
+
creator_organization_name: DeepSeek
|
|
857
|
+
access: open
|
|
858
|
+
# NOTE: The total size of DeepSeek-V3 models on HuggingFace is 685B, which includes 671B of the Main Model weights and 14B of the Multi-Token Prediction (MTP) Module weights.
|
|
859
|
+
num_parameters: 685000000000
|
|
860
|
+
release_date: 2024-12-24
|
|
861
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
862
|
+
|
|
863
|
+
- name: deepseek-ai/deepseek-r1
|
|
864
|
+
display_name: DeepSeek R1
|
|
865
|
+
description: DeepSeek R1 is DeepSeek's first-generation reasoning model which incoporates which incorporates multi-stage training and cold-start data before RL. ([paper](https://arxiv.org/abs/2501.12948))
|
|
866
|
+
creator_organization_name: DeepSeek
|
|
867
|
+
access: open
|
|
868
|
+
# NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
|
|
869
|
+
num_parameters: 685000000000
|
|
870
|
+
release_date: 2025-01-20
|
|
871
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
652
872
|
|
|
873
|
+
- name: deepseek-ai/deepseek-r1-hide-reasoning
|
|
874
|
+
display_name: DeepSeek R1 (hide reasoning)
|
|
875
|
+
description: DeepSeek R1 is DeepSeek's first-generation reasoning model which incoporates which incorporates multi-stage training and cold-start data before RL. ([paper](https://arxiv.org/abs/2501.12948)) The reasoning tokens are hidden from the output of the model.
|
|
876
|
+
creator_organization_name: DeepSeek
|
|
877
|
+
access: open
|
|
878
|
+
# NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
|
|
879
|
+
num_parameters: 685000000000
|
|
880
|
+
release_date: 2025-01-20
|
|
881
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
882
|
+
|
|
653
883
|
# EleutherAI
|
|
654
884
|
- name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together.
|
|
655
885
|
display_name: GPT-J (6B)
|
|
@@ -752,7 +982,7 @@ models:
|
|
|
752
982
|
access: closed
|
|
753
983
|
num_parameters: 540000000000
|
|
754
984
|
release_date: 2023-03-01 # was first announced on 2022-04 but remained private.
|
|
755
|
-
tags: []
|
|
985
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
756
986
|
|
|
757
987
|
# Note: This is aliased to a snapshot of gemini-pro. When possible, please use a versioned snapshot instead.
|
|
758
988
|
- name: google/gemini-pro
|
|
@@ -802,7 +1032,7 @@ models:
|
|
|
802
1032
|
creator_organization_name: Google
|
|
803
1033
|
access: limited
|
|
804
1034
|
release_date: 2024-05-24
|
|
805
|
-
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1035
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
806
1036
|
|
|
807
1037
|
- name: google/gemini-1.5-flash-001
|
|
808
1038
|
display_name: Gemini 1.5 Flash (001)
|
|
@@ -810,7 +1040,7 @@ models:
|
|
|
810
1040
|
creator_organization_name: Google
|
|
811
1041
|
access: limited
|
|
812
1042
|
release_date: 2024-05-24
|
|
813
|
-
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1043
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
814
1044
|
|
|
815
1045
|
- name: google/gemini-1.5-pro-preview-0409
|
|
816
1046
|
display_name: Gemini 1.5 Pro (0409 preview)
|
|
@@ -868,6 +1098,70 @@ models:
|
|
|
868
1098
|
release_date: 2024-05-24
|
|
869
1099
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
870
1100
|
|
|
1101
|
+
- name: google/gemini-1.5-pro-002
|
|
1102
|
+
display_name: Gemini 1.5 Pro (002)
|
|
1103
|
+
description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
|
|
1104
|
+
creator_organization_name: Google
|
|
1105
|
+
access: limited
|
|
1106
|
+
release_date: 2024-09-24
|
|
1107
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1108
|
+
|
|
1109
|
+
- name: google/gemini-1.5-flash-002
|
|
1110
|
+
display_name: Gemini 1.5 Flash (002)
|
|
1111
|
+
description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
|
|
1112
|
+
creator_organization_name: Google
|
|
1113
|
+
access: limited
|
|
1114
|
+
release_date: 2024-09-24
|
|
1115
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1116
|
+
|
|
1117
|
+
- name: google/gemini-2.0-flash-exp
|
|
1118
|
+
display_name: Gemini 2.0 Flash (Experimental)
|
|
1119
|
+
description: Gemini 2.0 Flash (Experimental) is a Gemini model that supports multimodal inputs like images, video and audio, as well as multimodal output like natively generated images mixed with text and steerable text-to-speech (TTS) multilingual audio. ([blog](https://blog.google/technology/google-deepmind/google-gemini-ai-update-december-2024/#gemini-2-0-flash))
|
|
1120
|
+
creator_organization_name: Google
|
|
1121
|
+
access: limited
|
|
1122
|
+
release_date: 2024-12-11
|
|
1123
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1124
|
+
|
|
1125
|
+
- name: google/gemini-1.5-flash-8b-001
|
|
1126
|
+
display_name: Gemini 1.5 Flash 8B
|
|
1127
|
+
description: Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks. ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1128
|
+
creator_organization_name: Google
|
|
1129
|
+
access: limited
|
|
1130
|
+
release_date: 2024-10-01
|
|
1131
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1132
|
+
|
|
1133
|
+
- name: google/gemini-2.0-flash-001
|
|
1134
|
+
display_name: Gemini 2.0 Flash
|
|
1135
|
+
description: Gemini 2.0 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1136
|
+
creator_organization_name: Google
|
|
1137
|
+
access: limited
|
|
1138
|
+
release_date: 2025-02-01
|
|
1139
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1140
|
+
|
|
1141
|
+
- name: google/gemini-2.0-flash-lite-preview-02-05
|
|
1142
|
+
display_name: Gemini 2.0 Flash Lite (02-05 preview)
|
|
1143
|
+
description: Gemini 2.0 Flash Lite (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1144
|
+
creator_organization_name: Google
|
|
1145
|
+
access: limited
|
|
1146
|
+
release_date: 2025-02-05
|
|
1147
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1148
|
+
|
|
1149
|
+
- name: google/gemini-2.0-flash-thinking-exp-01-21
|
|
1150
|
+
display_name: Gemini 2.0 Flash Thinking (01-21 preview)
|
|
1151
|
+
description: Gemini 2.0 Flash Thinking (01-21 preview) ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/thinking))
|
|
1152
|
+
creator_organization_name: Google
|
|
1153
|
+
access: limited
|
|
1154
|
+
release_date: 2025-01-21
|
|
1155
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1156
|
+
|
|
1157
|
+
- name: google/gemini-2.0-pro-exp-02-05
|
|
1158
|
+
display_name: Gemini 2.0 Pro (02-05 preview)
|
|
1159
|
+
description: Gemini 2.0 Pro (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
|
|
1160
|
+
creator_organization_name: Google
|
|
1161
|
+
access: limited
|
|
1162
|
+
release_date: 2025-02-05
|
|
1163
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1164
|
+
|
|
871
1165
|
- name: google/gemma-2b
|
|
872
1166
|
display_name: Gemma (2B)
|
|
873
1167
|
description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
|
|
@@ -1287,7 +1581,7 @@ models:
|
|
|
1287
1581
|
access: open
|
|
1288
1582
|
num_parameters: 175000000000
|
|
1289
1583
|
release_date: 2022-12-22
|
|
1290
|
-
tags: []
|
|
1584
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
1291
1585
|
|
|
1292
1586
|
- name: meta/opt-iml-30b # NOT SUPPORTED
|
|
1293
1587
|
display_name: OPT-IML (30B)
|
|
@@ -1296,7 +1590,7 @@ models:
|
|
|
1296
1590
|
access: open
|
|
1297
1591
|
num_parameters: 30000000000
|
|
1298
1592
|
release_date: 2022-12-22
|
|
1299
|
-
tags: []
|
|
1593
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
1300
1594
|
|
|
1301
1595
|
- name: meta/opt-175b
|
|
1302
1596
|
display_name: OPT (175B)
|
|
@@ -1343,7 +1637,7 @@ models:
|
|
|
1343
1637
|
access: open
|
|
1344
1638
|
num_parameters: 120000000000
|
|
1345
1639
|
release_date: 2022-11-15
|
|
1346
|
-
tags: []
|
|
1640
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
1347
1641
|
|
|
1348
1642
|
- name: meta/galactica-30b # NOT SUPPORTED
|
|
1349
1643
|
display_name: Galactica (30B)
|
|
@@ -1352,7 +1646,7 @@ models:
|
|
|
1352
1646
|
access: open
|
|
1353
1647
|
num_parameters: 30000000000
|
|
1354
1648
|
release_date: 2022-11-15
|
|
1355
|
-
tags: []
|
|
1649
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
1356
1650
|
|
|
1357
1651
|
- name: meta/llama-7b
|
|
1358
1652
|
display_name: LLaMA (7B)
|
|
@@ -1427,6 +1721,24 @@ models:
|
|
|
1427
1721
|
num_parameters: 8000000000
|
|
1428
1722
|
release_date: 2024-04-18
|
|
1429
1723
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1724
|
+
|
|
1725
|
+
- name: meta/llama-3-8b-instruct-turbo
|
|
1726
|
+
display_name: Llama 3 Instruct Turbo (8B)
|
|
1727
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Turbo is Together's implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
|
|
1728
|
+
creator_organization_name: Meta
|
|
1729
|
+
access: open
|
|
1730
|
+
num_parameters: 8000000000
|
|
1731
|
+
release_date: 2024-07-18
|
|
1732
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1733
|
+
|
|
1734
|
+
- name: meta/llama-3-8b-instruct-lite
|
|
1735
|
+
display_name: Llama 3 Instruct Lite (8B)
|
|
1736
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Lite is Together's implementation, it leverages a number of optimizations including INT4 quantization, provides the most cost-efficient and scalable Llama 3 models available anywhere, while maintaining excellent quality relative to full precision reference implementations ([blog](https://www.together.ai/blog/together-inference-engine-2))
|
|
1737
|
+
creator_organization_name: Meta
|
|
1738
|
+
access: open
|
|
1739
|
+
num_parameters: 8000000000
|
|
1740
|
+
release_date: 2024-07-18
|
|
1741
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1430
1742
|
|
|
1431
1743
|
- name: meta/llama-3-70b
|
|
1432
1744
|
display_name: Llama 3 (70B)
|
|
@@ -1436,6 +1748,51 @@ models:
|
|
|
1436
1748
|
num_parameters: 70000000000
|
|
1437
1749
|
release_date: 2024-04-18
|
|
1438
1750
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1751
|
+
|
|
1752
|
+
- name: meta/llama-3-70b-instruct-turbo
|
|
1753
|
+
display_name: Llama 3 Instruct Turbo (70B)
|
|
1754
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Turbo is Together's implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
|
|
1755
|
+
creator_organization_name: Meta
|
|
1756
|
+
access: open
|
|
1757
|
+
num_parameters: 70000000000
|
|
1758
|
+
release_date: 2024-07-18
|
|
1759
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1760
|
+
|
|
1761
|
+
- name: meta/llama-3-70b-instruct-lite
|
|
1762
|
+
display_name: Llama 3 Instruct Lite (70B)
|
|
1763
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Lite is Together's implementation, it leverages a number of optimizations including INT4 quantization, provides the most cost-efficient and scalable Llama 3 models available anywhere, while maintaining excellent quality relative to full precision reference implementations ([blog](https://www.together.ai/blog/together-inference-engine-2))
|
|
1764
|
+
creator_organization_name: Meta
|
|
1765
|
+
access: open
|
|
1766
|
+
num_parameters: 70000000000
|
|
1767
|
+
release_date: 2024-07-18
|
|
1768
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1769
|
+
|
|
1770
|
+
- name: meta/llama-3.1-8b-instruct
|
|
1771
|
+
display_name: Llama 3.1 Instruct (8B)
|
|
1772
|
+
description: Llama 3.1 (8B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
|
|
1773
|
+
creator_organization_name: Meta
|
|
1774
|
+
access: open
|
|
1775
|
+
num_parameters: 8000000000
|
|
1776
|
+
release_date: 2024-07-23
|
|
1777
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1778
|
+
|
|
1779
|
+
- name: meta/llama-3.1-70b-instruct
|
|
1780
|
+
display_name: Llama 3.1 Instruct (70B)
|
|
1781
|
+
description: Llama 3.1 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
|
|
1782
|
+
creator_organization_name: Meta
|
|
1783
|
+
access: open
|
|
1784
|
+
num_parameters: 70000000000
|
|
1785
|
+
release_date: 2024-07-23
|
|
1786
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1787
|
+
|
|
1788
|
+
- name: meta/llama-3.1-405b-instruct
|
|
1789
|
+
display_name: Llama 3.1 Instruct (405B)
|
|
1790
|
+
description: Llama 3.1 (405B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
|
|
1791
|
+
creator_organization_name: Meta
|
|
1792
|
+
access: open
|
|
1793
|
+
num_parameters: 405000000000
|
|
1794
|
+
release_date: 2024-07-23
|
|
1795
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1439
1796
|
|
|
1440
1797
|
- name: meta/llama-3.1-8b-instruct-turbo
|
|
1441
1798
|
display_name: Llama 3.1 Instruct Turbo (8B)
|
|
@@ -1444,7 +1801,7 @@ models:
|
|
|
1444
1801
|
access: open
|
|
1445
1802
|
num_parameters: 8000000000
|
|
1446
1803
|
release_date: 2024-07-23
|
|
1447
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1804
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1448
1805
|
|
|
1449
1806
|
- name: meta/llama-3.1-70b-instruct-turbo
|
|
1450
1807
|
display_name: Llama 3.1 Instruct Turbo (70B)
|
|
@@ -1453,7 +1810,7 @@ models:
|
|
|
1453
1810
|
access: open
|
|
1454
1811
|
num_parameters: 70000000000
|
|
1455
1812
|
release_date: 2024-07-23
|
|
1456
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1813
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1457
1814
|
|
|
1458
1815
|
- name: meta/llama-3.1-405b-instruct-turbo
|
|
1459
1816
|
display_name: Llama 3.1 Instruct Turbo (405B)
|
|
@@ -1462,7 +1819,61 @@ models:
|
|
|
1462
1819
|
access: open
|
|
1463
1820
|
num_parameters: 405000000000
|
|
1464
1821
|
release_date: 2024-07-23
|
|
1465
|
-
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1822
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1823
|
+
|
|
1824
|
+
- name: meta/llama-3.2-1b-instruct
|
|
1825
|
+
display_name: Llama 3.2 Instruct (1.23B)
|
|
1826
|
+
description: The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned text-only generative models in 1B and 3B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/))
|
|
1827
|
+
creator_organization_name: Meta
|
|
1828
|
+
access: open
|
|
1829
|
+
num_parameters: 1230000000
|
|
1830
|
+
release_date: 2024-09-25
|
|
1831
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1832
|
+
|
|
1833
|
+
- name: meta/llama-3.2-3b-instruct-turbo
|
|
1834
|
+
display_name: Llama 3.2 Instruct Turbo (3B)
|
|
1835
|
+
description: The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned text-only generative models in 1B and 3B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
|
|
1836
|
+
creator_organization_name: Meta
|
|
1837
|
+
access: open
|
|
1838
|
+
num_parameters: 3210000000
|
|
1839
|
+
release_date: 2024-09-25
|
|
1840
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1841
|
+
|
|
1842
|
+
- name: meta/llama-3.2-11b-vision-instruct-turbo
|
|
1843
|
+
display_name: Llama 3.2 Vision Instruct Turbo (11B)
|
|
1844
|
+
description: The Llama 3.2 Vision collection of multimodal large language models (LLMs) is a collection of pretrained and instruction-tuned image reasoning generative models in 11B and 90B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
|
|
1845
|
+
creator_organization_name: Meta
|
|
1846
|
+
access: open
|
|
1847
|
+
num_parameters: 10700000000
|
|
1848
|
+
release_date: 2024-09-25
|
|
1849
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1850
|
+
|
|
1851
|
+
- name: meta/llama-3.2-90b-vision-instruct-turbo
|
|
1852
|
+
display_name: Llama 3.2 Vision Instruct Turbo (90B)
|
|
1853
|
+
description: The Llama 3.2 Vision collection of multimodal large language models (LLMs) is a collection of pretrained and instruction-tuned image reasoning generative models in 11B and 90B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
|
|
1854
|
+
creator_organization_name: Meta
|
|
1855
|
+
access: open
|
|
1856
|
+
num_parameters: 88600000000
|
|
1857
|
+
release_date: 2024-09-25
|
|
1858
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1859
|
+
|
|
1860
|
+
- name: meta/llama-3.3-70b-instruct-turbo
|
|
1861
|
+
display_name: Llama 3.3 Instruct Turbo (70B)
|
|
1862
|
+
description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
|
|
1863
|
+
creator_organization_name: Meta
|
|
1864
|
+
access: open
|
|
1865
|
+
num_parameters: 70000000000
|
|
1866
|
+
release_date: 2024-12-06
|
|
1867
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1868
|
+
|
|
1869
|
+
- name: meta/llama-3.3-70b-instruct
|
|
1870
|
+
display_name: Llama 3.3 Instruct (70B)
|
|
1871
|
+
description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
|
|
1872
|
+
creator_organization_name: Meta
|
|
1873
|
+
access: open
|
|
1874
|
+
num_parameters: 70000000000
|
|
1875
|
+
release_date: 2024-12-06
|
|
1876
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1466
1877
|
|
|
1467
1878
|
- name: meta/llama-3-8b-chat
|
|
1468
1879
|
display_name: Llama 3 Instruct (8B)
|
|
@@ -1510,9 +1921,6 @@ models:
|
|
|
1510
1921
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1511
1922
|
|
|
1512
1923
|
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
1924
|
# Microsoft/NVIDIA
|
|
1517
1925
|
- name: microsoft/TNLGv2_530B
|
|
1518
1926
|
display_name: TNLG v2 (530B)
|
|
@@ -1621,6 +2029,24 @@ models:
|
|
|
1621
2029
|
num_parameters: 14000000000
|
|
1622
2030
|
release_date: 2024-05-21
|
|
1623
2031
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2032
|
+
|
|
2033
|
+
- name: microsoft/phi-3.5-mini-instruct
|
|
2034
|
+
display_name: Phi-3.5-mini-instruct (3.8B)
|
|
2035
|
+
description: Phi-3.5-mini is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available websites. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/discover-the-new-multi-lingual-high-quality-phi-3-5-slms/4225280))
|
|
2036
|
+
creator_organization_name: Microsoft
|
|
2037
|
+
access: open
|
|
2038
|
+
num_parameters: 3800000000
|
|
2039
|
+
release_date: 2024-08-22
|
|
2040
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2041
|
+
|
|
2042
|
+
- name: microsoft/phi-3.5-moe-instruct
|
|
2043
|
+
display_name: Phi-3.5 MoE
|
|
2044
|
+
description: Phi-3.5 MoE is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available documents - with a focus on very high-quality, reasoning dense data. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/discover-the-new-multi-lingual-high-quality-phi-3-5-slms/4225280))
|
|
2045
|
+
creator_organization_name: Microsoft
|
|
2046
|
+
access: open
|
|
2047
|
+
num_parameters: 41900000000
|
|
2048
|
+
release_date: 2024-08-22
|
|
2049
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1624
2050
|
|
|
1625
2051
|
# KAIST AI
|
|
1626
2052
|
- name: kaistai/prometheus-vision-13b-v1.0-hf
|
|
@@ -1760,6 +2186,15 @@ models:
|
|
|
1760
2186
|
num_parameters: 7300000000
|
|
1761
2187
|
release_date: 2024-05-22
|
|
1762
2188
|
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2189
|
+
|
|
2190
|
+
- name: mistralai/mistral-7b-instruct-v0.3-hf
|
|
2191
|
+
display_name: Mistral Instruct v0.3 (7B)
|
|
2192
|
+
description: Mistral v0.3 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
|
|
2193
|
+
creator_organization_name: Mistral AI
|
|
2194
|
+
access: open
|
|
2195
|
+
num_parameters: 7300000000
|
|
2196
|
+
release_date: 2024-05-22
|
|
2197
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1763
2198
|
|
|
1764
2199
|
- name: mistralai/mixtral-8x7b-32kseqlen
|
|
1765
2200
|
display_name: Mixtral (8x7B 32K seqlen)
|
|
@@ -1807,6 +2242,22 @@ models:
|
|
|
1807
2242
|
release_date: 2023-10-16
|
|
1808
2243
|
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1809
2244
|
|
|
2245
|
+
- name: mistralai/ministral-3b-2410
|
|
2246
|
+
display_name: Ministral 3B (2402)
|
|
2247
|
+
description: Ministral 3B (2402) is a model for on-device computing and at-the-edge use cases ([blog](https://mistral.ai/news/ministraux/)).
|
|
2248
|
+
creator_organization_name: Mistral AI
|
|
2249
|
+
access: limited
|
|
2250
|
+
release_date: 2024-10-16
|
|
2251
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2252
|
+
|
|
2253
|
+
- name: mistralai/ministral-8b-2410
|
|
2254
|
+
display_name: Ministral 8B (2402)
|
|
2255
|
+
description: Ministral 8B (2402) is a model for on-device computing and at-the-edge use cases a special interleaved sliding-window attention pattern for faster and memory-efficient inference ([blog](https://mistral.ai/news/ministraux/)).
|
|
2256
|
+
creator_organization_name: Mistral AI
|
|
2257
|
+
access: open
|
|
2258
|
+
release_date: 2024-10-16
|
|
2259
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2260
|
+
|
|
1810
2261
|
- name: mistralai/mistral-small-2402
|
|
1811
2262
|
display_name: Mistral Small (2402)
|
|
1812
2263
|
description: Mistral Small is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
|
|
@@ -1815,6 +2266,32 @@ models:
|
|
|
1815
2266
|
release_date: 2023-02-26
|
|
1816
2267
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1817
2268
|
|
|
2269
|
+
- name: mistralai/mistral-small-2409
|
|
2270
|
+
display_name: Mistral Small (2409)
|
|
2271
|
+
description: Mistral Small is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
|
|
2272
|
+
creator_organization_name: Mistral AI
|
|
2273
|
+
access: limited
|
|
2274
|
+
release_date: 2024-09-18
|
|
2275
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2276
|
+
|
|
2277
|
+
- name: mistralai/mistral-small-2501
|
|
2278
|
+
display_name: Mistral Small 3 (2501)
|
|
2279
|
+
description: Mistral Small 3 (2501) is a pre-trained and instructed model catered to the '80%' of generative AI tasks—those that require robust language and instruction following performance, with very low latency. ([blog](https://mistral.ai/news/mistral-small-3/))
|
|
2280
|
+
creator_organization_name: Mistral AI
|
|
2281
|
+
access: open
|
|
2282
|
+
num_parameters: 23600000000
|
|
2283
|
+
release_date: 2025-01-30
|
|
2284
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2285
|
+
|
|
2286
|
+
- name: mistralai/mistral-small-2503
|
|
2287
|
+
display_name: Mistral Small 3.1 (2503)
|
|
2288
|
+
description: Mistral Small 3.1 (2503) is a model with improved text performance, multimodal understanding, and an expanded context window of up to 128k tokens. ([blog](https://mistral.ai/news/mistral-small-3-1))
|
|
2289
|
+
creator_organization_name: Mistral AI
|
|
2290
|
+
access: open
|
|
2291
|
+
num_parameters: 23600000000
|
|
2292
|
+
release_date: 2025-03-17
|
|
2293
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2294
|
+
|
|
1818
2295
|
- name: mistralai/mistral-medium-2312
|
|
1819
2296
|
display_name: Mistral Medium (2312)
|
|
1820
2297
|
description: Mistral is a transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA).
|
|
@@ -1840,6 +2317,15 @@ models:
|
|
|
1840
2317
|
release_date: 2023-07-24
|
|
1841
2318
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1842
2319
|
|
|
2320
|
+
- name: mistralai/mistral-large-2411
|
|
2321
|
+
display_name: Mistral Large (2411)
|
|
2322
|
+
description: Mistral Large (2411) is a 123B parameter model that has a 128k context window. ([blog](https://mistral.ai/news/pixtral-large/))
|
|
2323
|
+
creator_organization_name: Mistral AI
|
|
2324
|
+
access: open
|
|
2325
|
+
num_parameters: 123000000000
|
|
2326
|
+
release_date: 2024-11-18
|
|
2327
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2328
|
+
|
|
1843
2329
|
- name: mistralai/open-mistral-nemo-2407
|
|
1844
2330
|
display_name: Mistral NeMo (2402)
|
|
1845
2331
|
description: Mistral NeMo is a multilingual 12B model with a large context window of 128K tokens. ([blog](https://mistral.ai/news/mistral-nemo/))
|
|
@@ -1848,6 +2334,24 @@ models:
|
|
|
1848
2334
|
release_date: 2024-07-18
|
|
1849
2335
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1850
2336
|
|
|
2337
|
+
- name: mistralai/pixtral-12b-2409
|
|
2338
|
+
display_name: Mistral Pixtral (2409)
|
|
2339
|
+
description: Mistral Pixtral 12B is the first multimodal Mistral model for image understanding. ([blog](https://mistral.ai/news/pixtral-12b/))
|
|
2340
|
+
creator_organization_name: Mistral AI
|
|
2341
|
+
access: open
|
|
2342
|
+
release_date: 2024-09-17
|
|
2343
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2344
|
+
|
|
2345
|
+
- name: mistralai/pixtral-large-2411
|
|
2346
|
+
display_name: Mistral Pixtral Large (2411)
|
|
2347
|
+
description: Mistral Pixtral Large is a 124B open-weights multimodal model built on top of Mistral Large 2 (2407). ([blog](https://mistral.ai/news/pixtral-large/))
|
|
2348
|
+
creator_organization_name: Mistral AI
|
|
2349
|
+
access: open
|
|
2350
|
+
num_parameters: 124000000000
|
|
2351
|
+
release_date: 2024-11-18
|
|
2352
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2353
|
+
|
|
2354
|
+
|
|
1851
2355
|
# MosaicML
|
|
1852
2356
|
- name: mosaicml/mpt-7b
|
|
1853
2357
|
display_name: MPT (7B)
|
|
@@ -1865,7 +2369,7 @@ models:
|
|
|
1865
2369
|
access: open
|
|
1866
2370
|
num_parameters: 6700000000
|
|
1867
2371
|
release_date: 2023-05-05
|
|
1868
|
-
tags: []
|
|
2372
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
1869
2373
|
|
|
1870
2374
|
- name: mosaicml/mpt-instruct-7b
|
|
1871
2375
|
display_name: MPT-Instruct (7B)
|
|
@@ -1892,7 +2396,7 @@ models:
|
|
|
1892
2396
|
access: open
|
|
1893
2397
|
num_parameters: 30000000000
|
|
1894
2398
|
release_date: 2023-06-22
|
|
1895
|
-
tags: []
|
|
2399
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
1896
2400
|
|
|
1897
2401
|
- name: mosaicml/mpt-instruct-30b
|
|
1898
2402
|
display_name: MPT-Instruct (30B)
|
|
@@ -1904,6 +2408,27 @@ models:
|
|
|
1904
2408
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1905
2409
|
|
|
1906
2410
|
|
|
2411
|
+
|
|
2412
|
+
# NECTEC
|
|
2413
|
+
- name: nectec/Pathumma-llm-text-1.0.0
|
|
2414
|
+
display_name: Pathumma-llm-text-1.0.0 (7B)
|
|
2415
|
+
description: Pathumma-llm-text-1.0.0 (7B) is a instruction model from OpenThaiLLM-Prebuilt-7B ([blog](https://medium.com/nectec/pathummallm-v-1-0-0-release-6a098ddfe276))
|
|
2416
|
+
creator_organization_name: nectec
|
|
2417
|
+
access: open
|
|
2418
|
+
num_parameters: 7620000000
|
|
2419
|
+
release_date: 2024-10-28
|
|
2420
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2421
|
+
|
|
2422
|
+
- name: nectec/OpenThaiLLM-Prebuilt-7B
|
|
2423
|
+
display_name: OpenThaiLLM-Prebuilt-7B (7B)
|
|
2424
|
+
description: OpenThaiLLM-Prebuilt-7B (7B) is a pretrained Thai large language model with 7 billion parameters based on Qwen2.5-7B.
|
|
2425
|
+
creator_organization_name: nectec
|
|
2426
|
+
access: open
|
|
2427
|
+
num_parameters: 7620000000
|
|
2428
|
+
release_date: 2024-10-28
|
|
2429
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2430
|
+
|
|
2431
|
+
|
|
1907
2432
|
|
|
1908
2433
|
# Neurips
|
|
1909
2434
|
- name: neurips/local
|
|
@@ -1933,6 +2458,16 @@ models:
|
|
|
1933
2458
|
release_date: 2024-06-17
|
|
1934
2459
|
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1935
2460
|
|
|
2461
|
+
- name: nvidia/llama-3.1-nemotron-70b-instruct
|
|
2462
|
+
display_name: Llama 3.1 Nemotron Instruct (70B)
|
|
2463
|
+
description: Llama-3.1-Nemotron-70B-Instruct is a large language model customized by NVIDIA to improve the helpfulness of LLM generated responses to user queries. It was trained using RLHF (specifically, REINFORCE), Llama-3.1-Nemotron-70B-Reward and HelpSteer2-Preference prompts on a Llama-3.1-70B-Instruct model. ([paper](https://arxiv.org/abs/2410.01257))
|
|
2464
|
+
creator_organization_name: NVIDIA
|
|
2465
|
+
access: open
|
|
2466
|
+
num_parameters: 70000000000
|
|
2467
|
+
release_date: 2024-10-02
|
|
2468
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2469
|
+
|
|
2470
|
+
|
|
1936
2471
|
# OpenAI
|
|
1937
2472
|
|
|
1938
2473
|
## GPT 2 Models
|
|
@@ -2117,7 +2652,7 @@ models:
|
|
|
2117
2652
|
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2118
2653
|
|
|
2119
2654
|
|
|
2120
|
-
## GPT 4
|
|
2655
|
+
## GPT-4 and GPT-4 Turbo
|
|
2121
2656
|
|
|
2122
2657
|
- name: openai/gpt-4-1106-preview
|
|
2123
2658
|
display_name: GPT-4 Turbo (1106 preview)
|
|
@@ -2169,6 +2704,8 @@ models:
|
|
|
2169
2704
|
release_date: 2024-01-25
|
|
2170
2705
|
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2171
2706
|
|
|
2707
|
+
## GPT-4o
|
|
2708
|
+
|
|
2172
2709
|
- name: openai/gpt-4-turbo-2024-04-09
|
|
2173
2710
|
display_name: GPT-4 Turbo (2024-04-09)
|
|
2174
2711
|
description: GPT-4 Turbo (2024-04-09) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Snapshot from 2024-04-09.
|
|
@@ -2193,6 +2730,14 @@ models:
|
|
|
2193
2730
|
release_date: 2024-08-06
|
|
2194
2731
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2195
2732
|
|
|
2733
|
+
- name: openai/gpt-4o-2024-11-20
|
|
2734
|
+
display_name: GPT-4o (2024-11-20)
|
|
2735
|
+
description: GPT-4o (2024-11-20) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/introducing-structured-outputs-in-the-api/))
|
|
2736
|
+
creator_organization_name: OpenAI
|
|
2737
|
+
access: limited
|
|
2738
|
+
release_date: 2024-11-20
|
|
2739
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2740
|
+
|
|
2196
2741
|
- name: openai/gpt-4o-mini-2024-07-18
|
|
2197
2742
|
display_name: GPT-4o mini (2024-07-18)
|
|
2198
2743
|
description: GPT-4o mini (2024-07-18) is a multimodal model with a context window of 128K tokens and improved handling of non-English text. ([blog](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/))
|
|
@@ -2201,6 +2746,40 @@ models:
|
|
|
2201
2746
|
release_date: 2024-07-18
|
|
2202
2747
|
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2203
2748
|
|
|
2749
|
+
- name: openai/whisper-1_gpt-4o-2024-11-20
|
|
2750
|
+
display_name: Whisper-1 + GPT-4o (2024-11-20)
|
|
2751
|
+
description: Transcribes the text with Whisper-1 and then uses GPT-4o to generate a response.
|
|
2752
|
+
creator_organization_name: OpenAI
|
|
2753
|
+
access: limited
|
|
2754
|
+
release_date: 2024-11-20
|
|
2755
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
|
|
2756
|
+
|
|
2757
|
+
- name: openai/gpt-4o-audio-preview-2024-10-01
|
|
2758
|
+
display_name: GPT-4o Audio (Preview 2024-10-01)
|
|
2759
|
+
description: GPT-4o Audio (Preview 2024-10-01) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
|
|
2760
|
+
creator_organization_name: OpenAI
|
|
2761
|
+
access: limited
|
|
2762
|
+
release_date: 2024-10-01
|
|
2763
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2764
|
+
|
|
2765
|
+
- name: openai/gpt-4o-audio-preview-2024-12-17
|
|
2766
|
+
display_name: GPT-4o Audio (Preview 2024-12-17)
|
|
2767
|
+
description: GPT-4o Audio (Preview 2024-12-17) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
|
|
2768
|
+
creator_organization_name: OpenAI
|
|
2769
|
+
access: limited
|
|
2770
|
+
release_date: 2024-12-17
|
|
2771
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2772
|
+
|
|
2773
|
+
- name: openai/gpt-4o-mini-audio-preview-2024-12-17
|
|
2774
|
+
display_name: GPT-4o mini Audio (Preview 2024-12-17)
|
|
2775
|
+
description: GPT-4o mini Audio (Preview 2024-12-17) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
|
|
2776
|
+
creator_organization_name: OpenAI
|
|
2777
|
+
access: limited
|
|
2778
|
+
release_date: 2024-12-17
|
|
2779
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2780
|
+
|
|
2781
|
+
# GPT-4V
|
|
2782
|
+
|
|
2204
2783
|
- name: openai/gpt-4-vision-preview
|
|
2205
2784
|
# According to https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4, this model has pointed gpt-4-1106-vision-preview.
|
|
2206
2785
|
display_name: GPT-4V (1106 preview)
|
|
@@ -2218,6 +2797,80 @@ models:
|
|
|
2218
2797
|
release_date: 2023-11-06
|
|
2219
2798
|
tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
2220
2799
|
|
|
2800
|
+
## GPT-4.5
|
|
2801
|
+
- name: openai/gpt-4.5-preview-2025-02-27
|
|
2802
|
+
display_name: GPT-4.5 (2025-02-27 preview)
|
|
2803
|
+
description: GPT-4.5 (2025-02-27 preview) is a large multimodal model that is designed to be more general-purpose than OpenAI's STEM-focused reasoning models. It was trained using new supervision techniques combined with traditional methods like supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF). ([blog](https://openai.com/index/introducing-gpt-4-5/), [system card](https://openai.com/index/gpt-4-5-system-card/))
|
|
2804
|
+
creator_organization_name: OpenAI
|
|
2805
|
+
access: limited
|
|
2806
|
+
release_date: 2025-02-27
|
|
2807
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2808
|
+
|
|
2809
|
+
## o1 Models
|
|
2810
|
+
- name: openai/o1-2024-12-17
|
|
2811
|
+
display_name: o1 (2024-12-17)
|
|
2812
|
+
description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
|
|
2813
|
+
creator_organization_name: OpenAI
|
|
2814
|
+
access: limited
|
|
2815
|
+
release_date: 2024-12-17
|
|
2816
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2817
|
+
|
|
2818
|
+
- name: openai/o1-2024-12-17-low-reasoning-effort
|
|
2819
|
+
display_name: o1 (2024-12-17, low reasoning effort)
|
|
2820
|
+
description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to low.
|
|
2821
|
+
creator_organization_name: OpenAI
|
|
2822
|
+
access: limited
|
|
2823
|
+
release_date: 2024-12-17
|
|
2824
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2825
|
+
|
|
2826
|
+
- name: openai/o1-2024-12-17-high-reasoning-effort
|
|
2827
|
+
display_name: o1 (2024-12-17, high reasoning effort)
|
|
2828
|
+
description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to high.
|
|
2829
|
+
creator_organization_name: OpenAI
|
|
2830
|
+
access: limited
|
|
2831
|
+
release_date: 2024-12-17
|
|
2832
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2833
|
+
|
|
2834
|
+
- name: openai/o1-preview-2024-09-12
|
|
2835
|
+
display_name: o1-preview (2024-09-12)
|
|
2836
|
+
description: o1-preview is a language model trained with reinforcement learning to perform complex reasoning that can produce a long internal chain of thought before responding to the user. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
|
|
2837
|
+
creator_organization_name: OpenAI
|
|
2838
|
+
access: limited
|
|
2839
|
+
release_date: 2024-09-12
|
|
2840
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2841
|
+
|
|
2842
|
+
- name: openai/o1-mini-2024-09-12
|
|
2843
|
+
display_name: o1-mini (2024-09-12)
|
|
2844
|
+
description: o1-mini is a cost-effective reasoning model for applications that require reasoning without broad world knowledge. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/openai-o1-mini-advancing-cost-efficient-reasoning/))
|
|
2845
|
+
creator_organization_name: OpenAI
|
|
2846
|
+
access: limited
|
|
2847
|
+
release_date: 2024-09-12
|
|
2848
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2849
|
+
|
|
2850
|
+
- name: openai/o3-mini-2025-01-31
|
|
2851
|
+
display_name: o3-mini (2025-01-31)
|
|
2852
|
+
description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/))
|
|
2853
|
+
creator_organization_name: OpenAI
|
|
2854
|
+
access: limited
|
|
2855
|
+
release_date: 2025-01-31
|
|
2856
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2857
|
+
|
|
2858
|
+
- name: openai/o3-mini-2025-01-31-low-reasoning-effort
|
|
2859
|
+
display_name: o3-mini (2025-01-31, low reasoning effort)
|
|
2860
|
+
description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/)) The requests' reasoning effort parameter in is set to low.
|
|
2861
|
+
creator_organization_name: OpenAI
|
|
2862
|
+
access: limited
|
|
2863
|
+
release_date: 2025-01-31
|
|
2864
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2865
|
+
|
|
2866
|
+
- name: openai/o3-mini-2025-01-31-high-reasoning-effort
|
|
2867
|
+
display_name: o3-mini (2025-01-31, high reasoning effort)
|
|
2868
|
+
description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/)) The requests' reasoning effort parameter in is set to high.
|
|
2869
|
+
creator_organization_name: OpenAI
|
|
2870
|
+
access: limited
|
|
2871
|
+
release_date: 2025-01-31
|
|
2872
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2873
|
+
|
|
2221
2874
|
## Codex Models
|
|
2222
2875
|
# DEPRECATED: Codex models have been shut down on March 23 2023.
|
|
2223
2876
|
|
|
@@ -2462,6 +3115,39 @@ models:
|
|
|
2462
3115
|
release_date: 2024-06-07
|
|
2463
3116
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2464
3117
|
|
|
3118
|
+
- name: qwen/qwen2.5-7b-instruct-turbo
|
|
3119
|
+
display_name: Qwen2.5 Instruct Turbo (7B)
|
|
3120
|
+
description: Qwen2.5 Instruct Turbo (7B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
|
|
3121
|
+
creator_organization_name: Qwen
|
|
3122
|
+
access: open
|
|
3123
|
+
release_date: 2024-09-19
|
|
3124
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3125
|
+
|
|
3126
|
+
- name: qwen/qwen2.5-7b-instruct
|
|
3127
|
+
display_name: Qwen2.5 Instruct (7B)
|
|
3128
|
+
description: Qwen2.5 Instruct (7B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
|
|
3129
|
+
creator_organization_name: Qwen
|
|
3130
|
+
access: open
|
|
3131
|
+
release_date: 2024-09-19
|
|
3132
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3133
|
+
|
|
3134
|
+
- name: qwen/qwen2.5-72b-instruct-turbo
|
|
3135
|
+
display_name: Qwen2.5 Instruct Turbo (72B)
|
|
3136
|
+
description: Qwen2.5 Instruct Turbo (72B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
|
|
3137
|
+
creator_organization_name: Qwen
|
|
3138
|
+
access: open
|
|
3139
|
+
release_date: 2024-09-19
|
|
3140
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3141
|
+
|
|
3142
|
+
- name: qwen/qwq-32b-preview
|
|
3143
|
+
display_name: QwQ (32B Preview)
|
|
3144
|
+
description: QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities. ([blog post](https://qwenlm.github.io/blog/qwq-32b-preview/)).
|
|
3145
|
+
creator_organization_name: Alibaba Cloud
|
|
3146
|
+
access: open
|
|
3147
|
+
num_parameters: 32800000000
|
|
3148
|
+
release_date: 2024-11-28
|
|
3149
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3150
|
+
|
|
2465
3151
|
- name: qwen/qwen-vl
|
|
2466
3152
|
display_name: Qwen-VL
|
|
2467
3153
|
description: Visual multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2308.12966)).
|
|
@@ -2478,6 +3164,38 @@ models:
|
|
|
2478
3164
|
release_date: 2023-08-24
|
|
2479
3165
|
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
2480
3166
|
|
|
3167
|
+
- name: qwen/qwen2-vl-7b-instruct
|
|
3168
|
+
display_name: Qwen2-VL Instruct (7B)
|
|
3169
|
+
description: The second generation of Qwen2-VL models ([paper](https://arxiv.org/abs/2409.12191)).
|
|
3170
|
+
creator_organization_name: Alibaba Group
|
|
3171
|
+
access: open
|
|
3172
|
+
release_date: 2024-08-29
|
|
3173
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3174
|
+
|
|
3175
|
+
- name: qwen/qwen2-vl-72b-instruct
|
|
3176
|
+
display_name: Qwen2-VL Instruct (72B)
|
|
3177
|
+
description: The second generation of Qwen2-VL models ([paper](https://arxiv.org/abs/2409.12191)).
|
|
3178
|
+
creator_organization_name: Alibaba Group
|
|
3179
|
+
access: open
|
|
3180
|
+
release_date: 2024-08-29
|
|
3181
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
3182
|
+
|
|
3183
|
+
- name: qwen/qwen-audio-chat
|
|
3184
|
+
display_name: Qwen-Audio Chat
|
|
3185
|
+
description: Auditory multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2311.07919)).
|
|
3186
|
+
creator_organization_name: Alibaba Cloud
|
|
3187
|
+
access: open
|
|
3188
|
+
release_date: 2023-11-14
|
|
3189
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG]
|
|
3190
|
+
|
|
3191
|
+
- name: qwen/qwen2-audio-7b-instruct
|
|
3192
|
+
display_name: Qwen2-Audio Instruct (7B)
|
|
3193
|
+
description: The second version of auditory multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2407.10759)).
|
|
3194
|
+
creator_organization_name: Alibaba Cloud
|
|
3195
|
+
access: open
|
|
3196
|
+
release_date: 2024-07-15
|
|
3197
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG]
|
|
3198
|
+
|
|
2481
3199
|
# SAIL (Sea AI Lab)
|
|
2482
3200
|
- name: sail/sailor-7b
|
|
2483
3201
|
display_name: Sailor (7B)
|
|
@@ -2523,7 +3241,7 @@ models:
|
|
|
2523
3241
|
access: open
|
|
2524
3242
|
num_parameters: 16000000000
|
|
2525
3243
|
release_date: 2022-03-25
|
|
2526
|
-
tags: []
|
|
3244
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
2527
3245
|
|
|
2528
3246
|
# SambaNova
|
|
2529
3247
|
- name: sambanova/sambalingo-thai-base
|
|
@@ -2675,8 +3393,6 @@ models:
|
|
|
2675
3393
|
release_date: 2023-04-20
|
|
2676
3394
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2677
3395
|
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
3396
|
# Stanford
|
|
2681
3397
|
- name: stanford/alpaca-7b
|
|
2682
3398
|
display_name: Alpaca (7B)
|
|
@@ -2772,7 +3488,7 @@ models:
|
|
|
2772
3488
|
access: open
|
|
2773
3489
|
num_parameters: 3000000000
|
|
2774
3490
|
release_date: 2023-05-05
|
|
2775
|
-
|
|
3491
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
2776
3492
|
|
|
2777
3493
|
- name: together/redpajama-incite-base-7b
|
|
2778
3494
|
display_name: RedPajama-INCITE-Base (7B)
|
|
@@ -2823,9 +3539,27 @@ models:
|
|
|
2823
3539
|
access: open
|
|
2824
3540
|
num_parameters: 13000000000
|
|
2825
3541
|
release_date: 2022-09-19
|
|
2826
|
-
tags: []
|
|
3542
|
+
tags: [UNSUPPORTED_MODEL_TAG]
|
|
2827
3543
|
|
|
3544
|
+
# Upstage
|
|
3545
|
+
- name: upstage/solar-pro-preview-instruct
|
|
3546
|
+
display_name: Solar Pro Preview (22B)
|
|
3547
|
+
description: Solar Pro Preview (22B) is open-weights model for single GPU inference that is a preview of the upcoming Solar Pro model ([blog](https://www.upstage.ai/products/solar-pro-preview)).
|
|
3548
|
+
creator_organization_name: Upstage
|
|
3549
|
+
access: open
|
|
3550
|
+
num_parameters: 22000000000
|
|
3551
|
+
release_date: 2024-09-11
|
|
3552
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2828
3553
|
|
|
3554
|
+
- name: upstage/solar-pro-241126
|
|
3555
|
+
display_name: Solar Pro
|
|
3556
|
+
display_name: Solar Pro
|
|
3557
|
+
description: Solar Pro is a LLM designed for instruction-following and processing structured formats like HTML and Markdown. It supports English, Korean, and Japanese and has domain expertise in Finance, Healthcare, and Legal. ([blog](https://www.upstage.ai/blog/press/solar-pro-aws)).
|
|
3558
|
+
creator_organization_name: Upstage
|
|
3559
|
+
access: limited
|
|
3560
|
+
num_parameters: 22000000000
|
|
3561
|
+
release_date: 2024-11-26
|
|
3562
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2829
3563
|
|
|
2830
3564
|
# Writer
|
|
2831
3565
|
- name: writer/palmyra-base
|
|
@@ -2928,6 +3662,58 @@ models:
|
|
|
2928
3662
|
# Does not support echo
|
|
2929
3663
|
tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
2930
3664
|
|
|
3665
|
+
- name: writer/palmyra-x-004
|
|
3666
|
+
display_name: Palmyra-X-004
|
|
3667
|
+
description: Palmyra-X-004 language model with a large context window of up to 128,000 tokens that excels in processing and understanding complex tasks.
|
|
3668
|
+
creator_organization_name: Writer
|
|
3669
|
+
access: limited
|
|
3670
|
+
release_date: 2024-09-12
|
|
3671
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3672
|
+
|
|
3673
|
+
- name: writer/palmyra-med-32k
|
|
3674
|
+
display_name: Palmyra-Med 32K (70B)
|
|
3675
|
+
description: Palmyra-Med 32K (70B) is a model finetuned from Palmyra-X-003 intended for medical applications.
|
|
3676
|
+
creator_organization_name: Writer
|
|
3677
|
+
access: open
|
|
3678
|
+
num_parameters: 70600000000
|
|
3679
|
+
release_date: 2024-07-31
|
|
3680
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3681
|
+
|
|
3682
|
+
- name: writer/palmyra-med
|
|
3683
|
+
display_name: Palmyra-Med (70B)
|
|
3684
|
+
description: Palmyra-Med (70B) is a model finetuned from Palmyra-X-003 intended for medical applications.
|
|
3685
|
+
creator_organization_name: Writer
|
|
3686
|
+
access: open
|
|
3687
|
+
num_parameters: 70600000000
|
|
3688
|
+
release_date: 2024-07-31
|
|
3689
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3690
|
+
|
|
3691
|
+
- name: writer/palmyra-fin-32k
|
|
3692
|
+
display_name: Palmyra-Fin 32K (70B)
|
|
3693
|
+
description: Palmyra-Fin 32K (70B) is a model finetuned from Palmyra-X-003 intended for financial applications.
|
|
3694
|
+
creator_organization_name: Writer
|
|
3695
|
+
access: open
|
|
3696
|
+
num_parameters: 70600000000
|
|
3697
|
+
release_date: 2024-07-31
|
|
3698
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3699
|
+
|
|
3700
|
+
- name: writer/palmyra-fin
|
|
3701
|
+
display_name: Palmyra Fin
|
|
3702
|
+
description: Palmyra Fin is a financial LLM built using combining a well-curated set of financial training data with custom fine-tuning instruction data([blog](https://writer.com/blog/palmyra-med-fin-models/)).
|
|
3703
|
+
creator_organization_name: Writer
|
|
3704
|
+
access: limited
|
|
3705
|
+
release_date: 2024-07-31
|
|
3706
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3707
|
+
|
|
3708
|
+
# xAI
|
|
3709
|
+
|
|
3710
|
+
- name: xai/grok-beta
|
|
3711
|
+
display_name: Grok Beta
|
|
3712
|
+
description: Grok Beta is a model from xAI.
|
|
3713
|
+
creator_organization_name: xAI
|
|
3714
|
+
access: closed
|
|
3715
|
+
release_date: 2024-08-13
|
|
3716
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2931
3717
|
|
|
2932
3718
|
# Yandex
|
|
2933
3719
|
- name: yandex/yalm
|
|
@@ -3000,3 +3786,286 @@ models:
|
|
|
3000
3786
|
release_date: 2024-04-18
|
|
3001
3787
|
tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
3002
3788
|
|
|
3789
|
+
# Diva Llama
|
|
3790
|
+
- name: stanford/diva-llama
|
|
3791
|
+
display_name: Diva Llama 3 (8B)
|
|
3792
|
+
description: Diva Llama 3 is an end-to-end Voice Assistant Model which can handle speech and text as inputs. It was trained using distillation loss. ([paper](https://arxiv.org/abs/2410.02678))
|
|
3793
|
+
creator_organization_name: Stanford
|
|
3794
|
+
access: open
|
|
3795
|
+
num_parameters: 8000000000
|
|
3796
|
+
release_date: 2024-10-03
|
|
3797
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG]
|
|
3798
|
+
|
|
3799
|
+
|
|
3800
|
+
# LLaMA-Omni
|
|
3801
|
+
- name: ictnlp/llama-3.1-8b-omni
|
|
3802
|
+
display_name: LLaMA-Omni (8B)
|
|
3803
|
+
description: The audio-visual multimodal version of the LLaMA 3.1 model ([paper](https://arxiv.org/abs/2409.06666)).
|
|
3804
|
+
creator_organization_name: ICTNLP
|
|
3805
|
+
access: open
|
|
3806
|
+
num_parameters: 8000000000
|
|
3807
|
+
release_date: 2024-09-10
|
|
3808
|
+
tags: [AUDIO_LANGUAGE_MODEL_TAG]
|
|
3809
|
+
|
|
3810
|
+
# Granite - IBM
|
|
3811
|
+
# https://www.ibm.com/granite
|
|
3812
|
+
# https://github.com/ibm-granite/granite-3.0-language-models
|
|
3813
|
+
|
|
3814
|
+
- name: ibm-granite/granite-3.0-2b-base
|
|
3815
|
+
display_name: Granite 3.0 base (2B)
|
|
3816
|
+
description: Granite-3.0-2B-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
|
|
3817
|
+
creator_organization_name: IBM
|
|
3818
|
+
access: open
|
|
3819
|
+
num_parameters: 2530000000
|
|
3820
|
+
release: 2024-10-21
|
|
3821
|
+
tags: [TEXT_MODEL_TAG]
|
|
3822
|
+
|
|
3823
|
+
- name: ibm-granite/granite-3.0-2b-instruct
|
|
3824
|
+
display_name: Granite 3.0 Instruct (2B)
|
|
3825
|
+
description: Granite-3.0-2B-Instruct is a 2B parameter model finetuned from Granite-3.0-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
|
|
3826
|
+
creator_organization_name: IBM
|
|
3827
|
+
access: open
|
|
3828
|
+
num_parameters: 2630000000
|
|
3829
|
+
release: 2024-10-21
|
|
3830
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3831
|
+
|
|
3832
|
+
- name: ibm-granite/granite-3.0-8b-instruct
|
|
3833
|
+
display_name: Granite 3.0 instruct (8B)
|
|
3834
|
+
description: Granite-3.0-8B-Instruct is a 8B parameter model finetuned from Granite-3.0-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
|
|
3835
|
+
creator_organization_name: IBM
|
|
3836
|
+
access: open
|
|
3837
|
+
num_parameters: 8170000000
|
|
3838
|
+
release: 2024-10-21
|
|
3839
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3840
|
+
|
|
3841
|
+
- name: ibm-granite/granite-3.0-8b-base
|
|
3842
|
+
display_name: Granite 3.0 base (8B)
|
|
3843
|
+
description: Granite-3.0-8B-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
|
|
3844
|
+
creator_organization_name: IBM
|
|
3845
|
+
access: open
|
|
3846
|
+
num_parameters: 8170000000
|
|
3847
|
+
release: 2024-10-21
|
|
3848
|
+
tags: [TEXT_MODEL_TAG]
|
|
3849
|
+
|
|
3850
|
+
- name: ibm-granite/granite-3.0-3b-a800m-instruct
|
|
3851
|
+
display_name: Granite 3.0 A800M instruct (3B)
|
|
3852
|
+
description: Granite-3.0-3B-A800M-Instruct is a 3B parameter model finetuned from Granite-3.0-3B-A800M-Base-4K using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
|
|
3853
|
+
creator_organization_name: IBM
|
|
3854
|
+
access: open
|
|
3855
|
+
num_parameters: 3370000000
|
|
3856
|
+
release: 2024-10-21
|
|
3857
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3858
|
+
|
|
3859
|
+
- name: ibm-granite/granite-3.0-3b-a800m-base
|
|
3860
|
+
display_name: Granite 3.0 A800M base (3B)
|
|
3861
|
+
description: Granite-3.0-3B-A800M-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
|
|
3862
|
+
creator_organization_name: IBM
|
|
3863
|
+
access: open
|
|
3864
|
+
num_parameters: 3370000000
|
|
3865
|
+
release: 2024-10-21
|
|
3866
|
+
tags: [TEXT_MODEL_TAG]
|
|
3867
|
+
|
|
3868
|
+
- name: ibm-granite/granite-3.0-1b-a400m-instruct
|
|
3869
|
+
display_name: Granite 3.0 A400M instruct (1B)
|
|
3870
|
+
description: Granite-3.0-1B-A400M-Instruct is an 1B parameter model finetuned from Granite-3.0-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
|
|
3871
|
+
creator_organization_name: IBM
|
|
3872
|
+
access: open
|
|
3873
|
+
num_parameters: 1330000000
|
|
3874
|
+
release: 2024-10-21
|
|
3875
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3876
|
+
|
|
3877
|
+
- name: ibm-granite/granite-3.0-1b-a400m-base
|
|
3878
|
+
display_name: Granite 3.0 A400M base (1B)
|
|
3879
|
+
description: Granite-3.0-1B-A400M-Base is a decoder-only language model to support a variety of text-to-text generation tasks. It is trained from scratch following a two-stage training strategy.
|
|
3880
|
+
creator_organization_name: IBM
|
|
3881
|
+
access: open
|
|
3882
|
+
num_parameters: 1380000000
|
|
3883
|
+
release: 2024-10-21
|
|
3884
|
+
tags: [TEXT_MODEL_TAG]
|
|
3885
|
+
|
|
3886
|
+
- name: maritaca-ai/sabia-7b
|
|
3887
|
+
display_name: Sabia 7B
|
|
3888
|
+
description: Sabia 7B
|
|
3889
|
+
creator_organization_name: MARITACA-AI
|
|
3890
|
+
access: open
|
|
3891
|
+
num_parameters: 6740000000
|
|
3892
|
+
release_date: 2023-11-08
|
|
3893
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3894
|
+
|
|
3895
|
+
# Granite-3.1-8b-base
|
|
3896
|
+
- name: ibm-granite/granite-3.1-8b-base
|
|
3897
|
+
display_name: Granite 3.1 - 8B - Base
|
|
3898
|
+
description: Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
|
|
3899
|
+
creator_organization_name: IBM-GRANITE
|
|
3900
|
+
access: open
|
|
3901
|
+
num_parameters: 8170000000
|
|
3902
|
+
release_date: 2024-12-18
|
|
3903
|
+
tags: [TEXT_MODEL_TAG]
|
|
3904
|
+
|
|
3905
|
+
# Granite-3.1-8b-instruct
|
|
3906
|
+
- name: ibm-granite/granite-3.1-8b-instruct
|
|
3907
|
+
display_name: Granite 3.1 - 8B - Instruct
|
|
3908
|
+
description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
3909
|
+
creator_organization_name: IBM
|
|
3910
|
+
access: open
|
|
3911
|
+
num_parameters: 8170000000
|
|
3912
|
+
release_date: 2024-12-18
|
|
3913
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3914
|
+
|
|
3915
|
+
# Granite-3.1-2b-instruct
|
|
3916
|
+
- name: ibm-granite/granite-3.1-2b-instruct
|
|
3917
|
+
display_name: Granite 3.1 - 2B - Instruct
|
|
3918
|
+
description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
3919
|
+
creator_organization_name: IBM
|
|
3920
|
+
access: open
|
|
3921
|
+
num_parameters: 2530000000
|
|
3922
|
+
release_date: 2024-12-18
|
|
3923
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3924
|
+
|
|
3925
|
+
# Granite-3.1-2b-base
|
|
3926
|
+
- name: ibm-granite/granite-3.1-2b-base
|
|
3927
|
+
display_name: Granite 3.1 - 2B - Base
|
|
3928
|
+
description: Granite-3.1-2B-Base extends the context length of Granite-3.0-2B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
|
|
3929
|
+
creator_organization_name: IBM-GRANITE
|
|
3930
|
+
access: open
|
|
3931
|
+
num_parameters: 2530000000
|
|
3932
|
+
release_date: 2024-12-18
|
|
3933
|
+
tags: [TEXT_MODEL_TAG]
|
|
3934
|
+
|
|
3935
|
+
# Granite-3.1-3b-a800m-instruct
|
|
3936
|
+
- name: ibm-granite/granite-3.1-3b-a800m-instruct
|
|
3937
|
+
display_name: Granite 3.1 - 3B - A800M - Instruct
|
|
3938
|
+
description: Granite-3.1-3B-A800M-Instruct is a 3B parameter long-context instruct model finetuned from Granite-3.1-3B-A800M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
3939
|
+
creator_organization_name: IBM-GRANITE
|
|
3940
|
+
access: open
|
|
3941
|
+
num_parameters: 3300000000
|
|
3942
|
+
release_date: 2024-12-18
|
|
3943
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3944
|
+
|
|
3945
|
+
# Granite-3.1-3b-a800m-base
|
|
3946
|
+
- name: ibm-granite/granite-3.1-3b-a800m-base
|
|
3947
|
+
display_name: Granite 3.1 - 3B - A800M - Base
|
|
3948
|
+
description: Granite-3.1-3B-A800M-Base extends the context length of Granite-3.0-3B-A800M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
|
|
3949
|
+
creator_organization_name: IBM-GRANITE
|
|
3950
|
+
access: open
|
|
3951
|
+
num_parameters: 3300000000
|
|
3952
|
+
release_date: 2024-12-18
|
|
3953
|
+
tags: [TEXT_MODEL_TAG]
|
|
3954
|
+
|
|
3955
|
+
# Granite-3.1-1b-a400m-instruct
|
|
3956
|
+
- name: ibm-granite/granite-3.1-1b-a400m-instruct
|
|
3957
|
+
display_name: Granite 3.1 - 1B - A400M - Instruct
|
|
3958
|
+
description: Granite-3.1-1B-A400M-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
3959
|
+
creator_organization_name: IBM-GRANITE
|
|
3960
|
+
access: open
|
|
3961
|
+
num_parameters: 1330000000
|
|
3962
|
+
release_date: 2024-12-18
|
|
3963
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3964
|
+
|
|
3965
|
+
# Granite-3.1-1b-a400m-base
|
|
3966
|
+
- name: ibm-granite/granite-3.1-1b-a400m-base
|
|
3967
|
+
display_name: Granite 3.1 - 1B - A400M - Base
|
|
3968
|
+
description: Granite-3.1-1B-A400M-Base extends the context length of Granite-3.0-1B-A400M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
|
|
3969
|
+
creator_organization_name: IBM-GRANITE
|
|
3970
|
+
access: open
|
|
3971
|
+
num_parameters: 1330000000
|
|
3972
|
+
release_date: 2024-12-18
|
|
3973
|
+
tags: [TEXT_MODEL_TAG]
|
|
3974
|
+
|
|
3975
|
+
# DeepSeek-R1-Distill-Llama-3.1-8b
|
|
3976
|
+
- name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
|
|
3977
|
+
display_name: DeepSeek-R1-Distill-Llama-8b
|
|
3978
|
+
description: DeepSeek-R1-Distill-Llama-8b is a model that is distilled from LLaMA 8B model for the DeepSeek-R1 task.
|
|
3979
|
+
creator_organization_name: DeepSeek
|
|
3980
|
+
access: open
|
|
3981
|
+
num_parameters: 8000000000
|
|
3982
|
+
release_date: 2025-01-20
|
|
3983
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3984
|
+
|
|
3985
|
+
# deepseek-ai/deepseek-coder-6.7b-instruct
|
|
3986
|
+
- name: deepseek-ai/deepseek-coder-6.7b-instruct
|
|
3987
|
+
display_name: DeepSeek-Coder-6.7b-Instruct
|
|
3988
|
+
description: DeepSeek-Coder-6.7b-Instruct is a model that is fine-tuned from the LLaMA 6.7B model for the DeepSeek-Coder task.
|
|
3989
|
+
creator_organization_name: DeepSeek
|
|
3990
|
+
access: open
|
|
3991
|
+
num_parameters: 6740000000
|
|
3992
|
+
release_date: 2025-01-20
|
|
3993
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
3994
|
+
|
|
3995
|
+
# WatsonX - IBM
|
|
3996
|
+
- name: ibm/granite-13b-instruct-v2
|
|
3997
|
+
display_name: Granite 13b instruct v2
|
|
3998
|
+
description: Granite Base (13B) Instruct V2.0 is a large decoder-only transformer model.The following features were used in the design of the model Decoder-only model
|
|
3999
|
+
creator_organization_name: IBM
|
|
4000
|
+
access: limited
|
|
4001
|
+
num_parameters: 13000000000
|
|
4002
|
+
release: 2023-11-30
|
|
4003
|
+
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4004
|
+
|
|
4005
|
+
- name: ibm/granite-20b-code-instruct-8k
|
|
4006
|
+
display_name: Granite 20b code instruct (8K)
|
|
4007
|
+
description: Granite-20B-Code-Base-8K is a decoder-only code model designed for code generative tasks (e.g., code generation, code explanation, code fixing, etc.). It is trained from scratch with a two-phase training strategy. In phase 1, our model is trained on 3 trillion tokens sourced from 116 programming languages, ensuring a comprehensive understanding of programming languages and syntax. In phase 2, our model is trained on 500 billion tokens with a carefully designed mixture of high-quality data from code and natural language domains to improve the models’ ability to reason and follow instructions.
|
|
4008
|
+
creator_organization_name: IBM
|
|
4009
|
+
access: limited
|
|
4010
|
+
num_parameters: 20000000000
|
|
4011
|
+
release: 2024-18-4
|
|
4012
|
+
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4013
|
+
|
|
4014
|
+
- name: ibm/granite-34b-code-instruct
|
|
4015
|
+
display_name: Granite 34b code instruct
|
|
4016
|
+
description: Granite Base (34B) Code Instruct is a 34B parameter model fine tuned from Granite-34B-Code-Base on a combination of permissively licensed instruction data to enhance instruction following capabilities including logical reasoning and problem-solving skills.
|
|
4017
|
+
creator_organization_name: IBM
|
|
4018
|
+
access: open
|
|
4019
|
+
num_parameters: 34000000000
|
|
4020
|
+
release: 2024-6-5
|
|
4021
|
+
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4022
|
+
|
|
4023
|
+
|
|
4024
|
+
- name: ibm/granite-3b-code-instruct
|
|
4025
|
+
display_name: Granite 3b code instruct
|
|
4026
|
+
description: Granite-3B-Code-Instruct-128K is a 3B parameter long-context instruct model fine tuned from Granite-3B-Code-Base-128K on a combination of permissively licensed data used in training the original Granite code instruct models, in addition to synthetically generated code instruction datasets tailored for solving long context problems. By exposing the model to both short and long context data, we aim to enhance its long-context capability without sacrificing code generation performance at short input context.
|
|
4027
|
+
creator_organization_name: IBM
|
|
4028
|
+
access: open
|
|
4029
|
+
num_parameters: 3000000000
|
|
4030
|
+
release: 2024-6-18
|
|
4031
|
+
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4032
|
+
|
|
4033
|
+
- name: ibm/granite-8b-code-instruct
|
|
4034
|
+
display_name: Granite 8b code instruct
|
|
4035
|
+
description: Granite-8B-Code-Instruct-128K is a 8B parameter long-context instruct model fine tuned from Granite-8B-Code-Base-128K on a combination of permissively licensed data used in training the original Granite code instruct models, in addition to synthetically generated code instruction datasets tailored for solving long context problems. By exposing the model to both short and long context data, we aim to enhance its long-context capability without sacrificing code generation performance at short input context.
|
|
4036
|
+
creator_organization_name: IBM
|
|
4037
|
+
access: open
|
|
4038
|
+
num_parameters: 8000000000
|
|
4039
|
+
release: 2024-6-18
|
|
4040
|
+
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4041
|
+
|
|
4042
|
+
|
|
4043
|
+
|
|
4044
|
+
|
|
4045
|
+
|
|
4046
|
+
|
|
4047
|
+
- name: ibm/granite-3.1-8b-instruct
|
|
4048
|
+
display_name: Granite 3.1 - 8B - Instruct
|
|
4049
|
+
description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
4050
|
+
creator_organization_name: IBM
|
|
4051
|
+
access: open
|
|
4052
|
+
num_parameters: 8170000000
|
|
4053
|
+
release_date: 2024-12-18
|
|
4054
|
+
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4055
|
+
|
|
4056
|
+
- name: ibm/granite-3.1-2b-instruct
|
|
4057
|
+
display_name: Granite 3.1 - 2B - Instruct
|
|
4058
|
+
description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
|
|
4059
|
+
creator_organization_name: IBM
|
|
4060
|
+
access: open
|
|
4061
|
+
num_parameters: 2530000000
|
|
4062
|
+
release_date: 2024-12-18
|
|
4063
|
+
tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
|
|
4064
|
+
|
|
4065
|
+
- name: mistralai/mixtral-8x7b-instruct-v0:1
|
|
4066
|
+
display_name: Mixtral 8x7B Instruct on IBM WatsonX
|
|
4067
|
+
description: A 7B sparse Mixture-of-Experts model with stronger capabilities than Mistral 7B. Uses 12B active parameters out of 45B total. Supports multiple languages, code and 32k context window.
|
|
4068
|
+
creator_organization_name: Mistral
|
|
4069
|
+
access: limited
|
|
4070
|
+
release_date: 2023-12-11
|
|
4071
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|