crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/METADATA +144 -36
- crfm_helm-0.5.0.dist-info/RECORD +642 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +37 -2
- helm/benchmark/adaptation/adapters/adapter.py +4 -42
- helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
- helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/generation_adapter.py +2 -0
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +21 -4
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +12 -5
- helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +5 -1
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +59 -14
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +40 -5
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +78 -10
- helm/benchmark/adaptation/common_adapter_specs.py +376 -0
- helm/benchmark/adaptation/prompt.py +7 -1
- helm/benchmark/adaptation/request_state.py +6 -1
- helm/benchmark/adaptation/scenario_state.py +6 -2
- helm/benchmark/annotation/annotator.py +43 -0
- helm/benchmark/annotation/annotator_factory.py +61 -0
- helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
- helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
- helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
- helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
- helm/benchmark/annotation/test_annotator_factory.py +26 -0
- helm/benchmark/annotation/test_dummy_annotator.py +44 -0
- helm/benchmark/annotation_executor.py +124 -0
- helm/benchmark/augmentations/cleva_perturbation.py +7 -14
- helm/benchmark/augmentations/contraction_expansion_perturbation.py +3 -3
- helm/benchmark/augmentations/contrast_sets_perturbation.py +0 -3
- helm/benchmark/augmentations/data_augmenter.py +0 -2
- helm/benchmark/augmentations/dialect_perturbation.py +2 -2
- helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
- helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
- helm/benchmark/augmentations/gender_perturbation.py +3 -3
- helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
- helm/benchmark/augmentations/mild_mix_perturbation.py +2 -2
- helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
- helm/benchmark/augmentations/person_name_perturbation.py +0 -7
- helm/benchmark/augmentations/perturbation.py +20 -7
- helm/benchmark/augmentations/perturbation_description.py +1 -1
- helm/benchmark/augmentations/space_perturbation.py +2 -2
- helm/benchmark/augmentations/suffix_perturbation.py +29 -0
- helm/benchmark/augmentations/synonym_perturbation.py +2 -2
- helm/benchmark/augmentations/test_perturbation.py +11 -7
- helm/benchmark/augmentations/translate_perturbation.py +30 -0
- helm/benchmark/augmentations/typos_perturbation.py +2 -2
- helm/benchmark/config_registry.py +38 -0
- helm/benchmark/executor.py +46 -16
- helm/benchmark/huggingface_registration.py +37 -7
- helm/benchmark/metrics/basic_metrics.py +172 -641
- helm/benchmark/metrics/bbq_metrics.py +3 -4
- helm/benchmark/metrics/bias_metrics.py +6 -6
- helm/benchmark/metrics/classification_metrics.py +11 -8
- helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
- helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
- helm/benchmark/metrics/code_metrics.py +4 -3
- helm/benchmark/metrics/code_metrics_helper.py +0 -2
- helm/benchmark/metrics/common_metric_specs.py +167 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
- helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
- helm/benchmark/metrics/disinformation_metrics.py +6 -112
- helm/benchmark/metrics/dry_run_metrics.py +5 -3
- helm/benchmark/metrics/efficiency_metrics.py +206 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
- helm/benchmark/metrics/evaluate_reference_metrics.py +376 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
- helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
- helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
- helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
- helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
- helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
- helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
- helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
- helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
- helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
- helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
- helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
- helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
- helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
- helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
- helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
- helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
- helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
- helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
- helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
- helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
- helm/benchmark/metrics/language_modeling_metrics.py +99 -0
- helm/benchmark/metrics/machine_translation_metrics.py +5 -5
- helm/benchmark/metrics/metric.py +93 -172
- helm/benchmark/metrics/metric_name.py +0 -1
- helm/benchmark/metrics/metric_service.py +16 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
- helm/benchmark/metrics/ranking_metrics.py +6 -7
- helm/benchmark/metrics/reference_metric.py +148 -0
- helm/benchmark/metrics/summac/model_summac.py +0 -2
- helm/benchmark/metrics/summarization_metrics.py +8 -8
- helm/benchmark/metrics/test_classification_metrics.py +9 -6
- helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
- helm/benchmark/metrics/test_evaluate_reference_metrics.py +30 -0
- helm/benchmark/metrics/test_metric.py +2 -2
- helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +13 -3
- helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -0
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +9 -2
- helm/benchmark/metrics/toxicity_metrics.py +1 -1
- helm/benchmark/metrics/toxicity_utils.py +23 -0
- helm/benchmark/metrics/unitxt_metrics.py +81 -0
- helm/benchmark/metrics/vision_language/__init__.py +0 -0
- helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +450 -0
- helm/benchmark/metrics/vision_language/image_utils.py +100 -0
- helm/benchmark/model_deployment_registry.py +164 -41
- helm/benchmark/model_metadata_registry.py +181 -35
- helm/benchmark/multi_gpu_runner.py +133 -0
- helm/benchmark/presentation/contamination.py +3 -3
- helm/benchmark/presentation/create_plots.py +8 -7
- helm/benchmark/presentation/run_display.py +50 -17
- helm/benchmark/presentation/schema.py +28 -46
- helm/benchmark/presentation/summarize.py +213 -96
- helm/benchmark/presentation/table.py +8 -8
- helm/benchmark/presentation/test_contamination.py +2 -2
- helm/benchmark/presentation/test_run_entry.py +14 -9
- helm/benchmark/presentation/test_summarize.py +5 -0
- helm/benchmark/run.py +66 -54
- helm/benchmark/run_expander.py +342 -31
- helm/benchmark/run_spec.py +93 -0
- helm/benchmark/run_spec_factory.py +162 -0
- helm/benchmark/run_specs/__init__.py +0 -0
- helm/benchmark/{run_specs.py → run_specs/classic_run_specs.py} +217 -1330
- helm/benchmark/run_specs/cleva_run_specs.py +277 -0
- helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
- helm/benchmark/run_specs/heim_run_specs.py +623 -0
- helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
- helm/benchmark/run_specs/lite_run_specs.py +307 -0
- helm/benchmark/run_specs/simple_run_specs.py +104 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
- helm/benchmark/run_specs/vlm_run_specs.py +501 -0
- helm/benchmark/runner.py +116 -69
- helm/benchmark/runner_config_registry.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +2 -2
- helm/benchmark/scenarios/cleva_scenario.py +43 -46
- helm/benchmark/scenarios/code_scenario.py +3 -2
- helm/benchmark/scenarios/commonsense_scenario.py +171 -191
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
- helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
- helm/benchmark/scenarios/entity_matching_scenario.py +1 -1
- helm/benchmark/scenarios/image_generation/__init__.py +0 -0
- helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
- helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
- helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
- helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
- helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
- helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
- helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
- helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
- helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
- helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
- helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
- helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
- helm/benchmark/scenarios/imdb_scenario.py +0 -1
- helm/benchmark/scenarios/legalbench_scenario.py +123 -0
- helm/benchmark/scenarios/live_qa_scenario.py +94 -0
- helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
- helm/benchmark/scenarios/lsat_qa_scenario.py +4 -2
- helm/benchmark/scenarios/math_scenario.py +19 -2
- helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
- helm/benchmark/scenarios/numeracy_scenario.py +3 -3
- helm/benchmark/scenarios/opinions_qa_scenario.py +6 -10
- helm/benchmark/scenarios/raft_scenario.py +2 -6
- helm/benchmark/scenarios/scenario.py +14 -2
- helm/benchmark/scenarios/simple_scenarios.py +122 -1
- helm/benchmark/scenarios/test_math_scenario.py +22 -0
- helm/benchmark/scenarios/test_scenario.py +6 -3
- helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
- helm/benchmark/scenarios/the_pile_scenario.py +6 -7
- helm/benchmark/scenarios/unitxt_scenario.py +56 -0
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +92 -0
- helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
- helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
- helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
- helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
- helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
- helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
- helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
- helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +107 -0
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +1 -1
- helm/benchmark/scenarios/wmt_14_scenario.py +18 -18
- helm/benchmark/server.py +59 -2
- helm/benchmark/slurm_jobs.py +12 -0
- helm/benchmark/slurm_runner.py +79 -51
- helm/benchmark/static/benchmarking.js +3 -4
- helm/benchmark/static/contamination.yaml +1 -1
- helm/benchmark/static/images/organizations/together.png +0 -0
- helm/benchmark/static/json-urls.js +4 -0
- helm/benchmark/static/{schema.yaml → schema_classic.yaml} +346 -930
- helm/benchmark/static/schema_instruction_following.yaml +210 -0
- helm/benchmark/static/schema_lite.yaml +824 -0
- helm/benchmark/static/schema_mmlu.yaml +1507 -0
- helm/benchmark/static/schema_unitxt.yaml +428 -0
- helm/benchmark/static/schema_vlm.yaml +576 -0
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
- helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
- helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
- helm/benchmark/static_build/assets/index-5088afcb.css +1 -0
- helm/benchmark/static_build/assets/index-d839df55.js +9 -0
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
- helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/benchmark/static_build/config.js +4 -0
- helm/benchmark/static_build/index.html +20 -0
- helm/benchmark/test_data_preprocessor.py +3 -3
- helm/benchmark/test_model_deployment_definition.py +90 -0
- helm/benchmark/test_run_expander.py +1 -1
- helm/benchmark/tokenizer_config_registry.py +10 -14
- helm/benchmark/window_services/ai21_window_service.py +22 -33
- helm/benchmark/window_services/cohere_window_service.py +1 -63
- helm/benchmark/window_services/default_window_service.py +2 -35
- helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
- helm/benchmark/window_services/ice_window_service.py +0 -34
- helm/benchmark/window_services/image_generation/__init__.py +0 -0
- helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
- helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
- helm/benchmark/window_services/local_window_service.py +21 -4
- helm/benchmark/window_services/no_decoding_window_service.py +32 -0
- helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
- helm/benchmark/window_services/test_bloom_window_service.py +2 -1
- helm/benchmark/window_services/test_cohere_window_service.py +2 -1
- helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
- helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
- helm/benchmark/window_services/test_gptj_window_service.py +3 -2
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
- helm/benchmark/window_services/test_ice_window_service.py +2 -1
- helm/benchmark/window_services/test_openai_window_service.py +2 -1
- helm/benchmark/window_services/test_opt_window_service.py +3 -2
- helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
- helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
- helm/benchmark/window_services/test_t511b_window_service.py +2 -1
- helm/benchmark/window_services/test_ul2_window_service.py +2 -1
- helm/benchmark/window_services/test_utils.py +3 -2
- helm/benchmark/window_services/test_yalm_window_service.py +2 -1
- helm/benchmark/window_services/window_service.py +42 -0
- helm/benchmark/window_services/window_service_factory.py +24 -269
- helm/benchmark/window_services/yalm_window_service.py +0 -27
- helm/clients/__init__.py +0 -0
- helm/{proxy/clients → clients}/ai21_client.py +5 -12
- helm/clients/aleph_alpha_client.py +112 -0
- helm/{proxy/clients → clients}/anthropic_client.py +213 -24
- helm/clients/auto_client.py +215 -0
- helm/clients/bedrock_client.py +128 -0
- helm/clients/bedrock_utils.py +72 -0
- helm/{proxy/clients → clients}/client.py +67 -55
- helm/clients/clip_score_client.py +49 -0
- helm/clients/clip_scorers/__init__.py +0 -0
- helm/clients/clip_scorers/base_clip_scorer.py +18 -0
- helm/clients/clip_scorers/clip_scorer.py +50 -0
- helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
- helm/{proxy/clients → clients}/cohere_client.py +6 -17
- helm/clients/gcs_client.py +82 -0
- helm/{proxy/clients → clients}/google_client.py +7 -8
- helm/clients/google_translate_client.py +35 -0
- helm/{proxy/clients → clients}/http_model_client.py +6 -10
- helm/{proxy/clients → clients}/huggingface_client.py +134 -92
- helm/clients/image_generation/__init__.py +0 -0
- helm/clients/image_generation/adobe_vision_client.py +78 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
- helm/clients/image_generation/cogview2/__init__.py +0 -0
- helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
- helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
- helm/clients/image_generation/cogview2_client.py +191 -0
- helm/clients/image_generation/dalle2_client.py +192 -0
- helm/clients/image_generation/dalle3_client.py +108 -0
- helm/clients/image_generation/dalle_mini/__init__.py +3 -0
- helm/clients/image_generation/dalle_mini/data.py +442 -0
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
- helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
- helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
- helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
- helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
- helm/clients/image_generation/dalle_mini/model/text.py +251 -0
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
- helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
- helm/clients/image_generation/dalle_mini_client.py +190 -0
- helm/clients/image_generation/deep_floyd_client.py +78 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
- helm/clients/image_generation/image_generation_client_utils.py +9 -0
- helm/clients/image_generation/lexica_client.py +86 -0
- helm/clients/image_generation/mindalle/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/__init__.py +216 -0
- helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
- helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
- helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
- helm/clients/image_generation/mindalle/utils/config.py +129 -0
- helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
- helm/clients/image_generation/mindalle/utils/utils.py +89 -0
- helm/clients/image_generation/mindalle_client.py +115 -0
- helm/clients/image_generation/nudity_check_client.py +64 -0
- helm/clients/image_generation/together_image_generation_client.py +111 -0
- helm/{proxy/clients → clients}/lit_gpt_client.py +7 -5
- helm/{proxy/clients → clients}/megatron_client.py +13 -7
- helm/clients/mistral_client.py +134 -0
- helm/clients/moderation_api_client.py +109 -0
- helm/clients/open_lm_client.py +43 -0
- helm/clients/openai_client.py +302 -0
- helm/{proxy/clients → clients}/palmyra_client.py +15 -12
- helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
- helm/clients/simple_client.py +64 -0
- helm/{proxy/clients → clients}/test_auto_client.py +15 -15
- helm/clients/test_client.py +100 -0
- helm/clients/test_huggingface_client.py +70 -0
- helm/clients/test_simple_client.py +19 -0
- helm/{proxy/clients → clients}/test_together_client.py +23 -12
- helm/{proxy/clients → clients}/together_client.py +18 -71
- helm/clients/vertexai_client.py +391 -0
- helm/clients/vision_language/__init__.py +0 -0
- helm/clients/vision_language/huggingface_vlm_client.py +104 -0
- helm/{proxy/clients → clients}/vision_language/idefics_client.py +59 -52
- helm/clients/vision_language/open_flamingo/__init__.py +2 -0
- helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
- helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
- helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
- helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
- helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
- helm/clients/vision_language/open_flamingo_client.py +155 -0
- helm/clients/vision_language/qwen_vlm_client.py +171 -0
- helm/clients/vllm_client.py +46 -0
- helm/common/cache.py +24 -179
- helm/common/cache_backend_config.py +47 -0
- helm/common/clip_score_request.py +41 -0
- helm/common/concurrency.py +32 -0
- helm/common/credentials_utils.py +28 -0
- helm/common/file_caches/__init__.py +0 -0
- helm/common/file_caches/file_cache.py +16 -0
- helm/common/file_caches/local_file_cache.py +61 -0
- helm/common/file_caches/test_local_file_cache.py +25 -0
- helm/common/file_upload_request.py +27 -0
- helm/common/general.py +29 -10
- helm/common/image_generation_parameters.py +25 -0
- helm/common/images_utils.py +24 -1
- helm/common/key_value_store.py +113 -0
- helm/common/media_object.py +13 -0
- helm/common/moderations_api_request.py +71 -0
- helm/common/mongo_key_value_store.py +88 -0
- helm/common/multimodal_request_utils.py +31 -0
- helm/common/nudity_check_request.py +29 -0
- helm/common/object_spec.py +2 -2
- helm/common/request.py +36 -27
- helm/common/test_general.py +6 -0
- helm/common/tokenization_request.py +6 -3
- helm/config/__init__.py +0 -0
- helm/config/model_deployments.yaml +1942 -0
- helm/config/model_metadata.yaml +2201 -0
- helm/config/tokenizer_configs.yaml +362 -0
- helm/proxy/accounts.py +31 -4
- helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
- helm/proxy/critique/model_critique_client.py +13 -5
- helm/proxy/example_queries.py +29 -17
- helm/proxy/retry.py +8 -2
- helm/proxy/server.py +77 -5
- helm/proxy/services/remote_service.py +31 -0
- helm/proxy/services/server_service.py +103 -20
- helm/proxy/services/service.py +34 -2
- helm/proxy/services/test_remote_service.py +7 -6
- helm/proxy/services/test_service.py +27 -18
- helm/proxy/test_accounts.py +32 -0
- helm/proxy/token_counters/auto_token_counter.py +37 -37
- helm/proxy/token_counters/test_auto_token_counter.py +164 -0
- helm/proxy/token_counters/token_counter.py +3 -5
- helm/py.typed +0 -0
- helm/tokenizers/__init__.py +0 -0
- helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +3 -1
- helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +17 -11
- helm/tokenizers/auto_tokenizer.py +93 -0
- helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +8 -2
- helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +56 -60
- helm/tokenizers/simple_tokenizer.py +33 -0
- helm/tokenizers/test_anthropic_tokenizer.py +82 -0
- helm/tokenizers/test_huggingface_tokenizer.py +136 -0
- helm/tokenizers/test_simple_tokenizer.py +33 -0
- helm/tokenizers/vertexai_tokenizer.py +97 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
- helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
- helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
- crfm_helm-0.3.0.dist-info/RECORD +0 -396
- helm/benchmark/vlm_run_specs.py +0 -71
- helm/benchmark/window_services/anthropic_window_service.py +0 -68
- helm/benchmark/window_services/bloom_window_service.py +0 -35
- helm/benchmark/window_services/flan_t5_window_service.py +0 -29
- helm/benchmark/window_services/gpt2_window_service.py +0 -32
- helm/benchmark/window_services/gptj_window_service.py +0 -38
- helm/benchmark/window_services/gptneox_window_service.py +0 -41
- helm/benchmark/window_services/http_model_window_service.py +0 -28
- helm/benchmark/window_services/huggingface_window_service.py +0 -59
- helm/benchmark/window_services/lit_gpt_window_service.py +0 -27
- helm/benchmark/window_services/llama_window_service.py +0 -28
- helm/benchmark/window_services/luminous_window_service.py +0 -67
- helm/benchmark/window_services/megatron_window_service.py +0 -10
- helm/benchmark/window_services/mt_nlg_window_service.py +0 -27
- helm/benchmark/window_services/openai_window_service.py +0 -13
- helm/benchmark/window_services/opt_window_service.py +0 -35
- helm/benchmark/window_services/palmyra_window_service.py +0 -45
- helm/benchmark/window_services/remote_window_service.py +0 -48
- helm/benchmark/window_services/santacoder_window_service.py +0 -27
- helm/benchmark/window_services/starcoder_window_service.py +0 -27
- helm/benchmark/window_services/t0pp_window_service.py +0 -35
- helm/benchmark/window_services/t511b_window_service.py +0 -30
- helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
- helm/benchmark/window_services/ul2_window_service.py +0 -30
- helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
- helm/benchmark/window_services/wider_openai_window_service.py +0 -52
- helm/proxy/clients/aleph_alpha_client.py +0 -99
- helm/proxy/clients/auto_client.py +0 -461
- helm/proxy/clients/goose_ai_client.py +0 -100
- helm/proxy/clients/microsoft_client.py +0 -182
- helm/proxy/clients/openai_client.py +0 -206
- helm/proxy/clients/remote_model_registry.py +0 -28
- helm/proxy/clients/simple_client.py +0 -61
- helm/proxy/clients/test_anthropic_client.py +0 -63
- helm/proxy/clients/test_client.py +0 -31
- helm/proxy/clients/test_huggingface_client.py +0 -87
- helm/proxy/models.py +0 -963
- helm/proxy/test_models.py +0 -27
- helm/proxy/token_counters/ai21_token_counter.py +0 -20
- helm/proxy/token_counters/cohere_token_counter.py +0 -13
- helm/proxy/token_counters/free_token_counter.py +0 -12
- helm/proxy/token_counters/gooseai_token_counter.py +0 -24
- helm/proxy/token_counters/openai_token_counter.py +0 -22
- helm/proxy/token_counters/test_ai21_token_counter.py +0 -86
- helm/proxy/token_counters/test_openai_token_counter.py +0 -79
- helm/proxy/tokenizers/simple_tokenizer.py +0 -32
- helm/proxy/tokenizers/test_huggingface_tokenizer.py +0 -56
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/LICENSE +0 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/top_level.txt +0 -0
- /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
- /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
- /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
- /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
- /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
- /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
- /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
- /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
|
@@ -0,0 +1,2201 @@
|
|
|
1
|
+
# This file defines all the models officially supported by the Helm API.
|
|
2
|
+
# The model names here should match the model names in model_deployments.yaml.
|
|
3
|
+
|
|
4
|
+
# If you want to add a new model, you can technically do it here but we recommend
|
|
5
|
+
# you to do it in prod_env/model_metadata.yaml instead.
|
|
6
|
+
|
|
7
|
+
# Follow the template of this file to add a new model. You can copy paste this to get started:
|
|
8
|
+
# # This file contains the metadata for private models
|
|
9
|
+
# models: [] # Leave empty to disable private models
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
models:
|
|
13
|
+
|
|
14
|
+
- name: simple/model1
|
|
15
|
+
display_name: Simple Model 1
|
|
16
|
+
description: This is a test model.
|
|
17
|
+
creator_organization_name: Helm
|
|
18
|
+
access: open
|
|
19
|
+
release_date: 2023-01-01
|
|
20
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
21
|
+
|
|
22
|
+
# Adobe
|
|
23
|
+
- name: adobe/giga-gan
|
|
24
|
+
display_name: GigaGAN (1B)
|
|
25
|
+
description: GigaGAN is a GAN model that produces high-quality images extremely quickly. The model was trained on text and image pairs from LAION2B-en and COYO-700M. ([paper](https://arxiv.org/abs/2303.05511)).
|
|
26
|
+
creator_organization_name: Adobe
|
|
27
|
+
access: limited
|
|
28
|
+
num_parameters: 1000000000
|
|
29
|
+
release_date: 2023-06-22
|
|
30
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# AI21 Labs
|
|
34
|
+
- name: ai21/j1-jumbo # DEPRECATED
|
|
35
|
+
display_name: J1-Jumbo v1 (178B)
|
|
36
|
+
description: Jurassic-1 Jumbo (178B parameters) ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
|
|
37
|
+
creator_organization_name: AI21 Labs
|
|
38
|
+
access: limited
|
|
39
|
+
num_parameters: 178000000000
|
|
40
|
+
release_date: 2021-08-11
|
|
41
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
42
|
+
|
|
43
|
+
- name: ai21/j1-large # DEPRECATED
|
|
44
|
+
display_name: J1-Large v1 (7.5B)
|
|
45
|
+
description: Jurassic-1 Large (7.5B parameters) ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
|
|
46
|
+
creator_organization_name: AI21 Labs
|
|
47
|
+
access: limited
|
|
48
|
+
num_parameters: 7500000000
|
|
49
|
+
release_date: 2021-08-11
|
|
50
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
51
|
+
|
|
52
|
+
- name: ai21/j1-grande # DEPRECATED
|
|
53
|
+
display_name: J1-Grande v1 (17B)
|
|
54
|
+
description: Jurassic-1 Grande (17B parameters) with a "few tweaks" to the training process ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
|
|
55
|
+
creator_organization_name: AI21 Labs
|
|
56
|
+
access: limited
|
|
57
|
+
num_parameters: 17000000000
|
|
58
|
+
release_date: 2022-05-03
|
|
59
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
60
|
+
|
|
61
|
+
- name: ai21/j1-grande-v2-beta # DEPRECATED
|
|
62
|
+
display_name: J1-Grande v2 beta (17B)
|
|
63
|
+
description: Jurassic-1 Grande v2 beta (17B parameters)
|
|
64
|
+
creator_organization_name: AI21 Labs
|
|
65
|
+
access: limited
|
|
66
|
+
num_parameters: 17000000000
|
|
67
|
+
release_date: 2022-10-28
|
|
68
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
69
|
+
|
|
70
|
+
- name: ai21/j2-jumbo
|
|
71
|
+
display_name: Jurassic-2 Jumbo (178B)
|
|
72
|
+
description: Jurassic-2 Jumbo (178B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
|
|
73
|
+
creator_organization_name: AI21 Labs
|
|
74
|
+
access: limited
|
|
75
|
+
num_parameters: 178000000000
|
|
76
|
+
release_date: 2023-03-09
|
|
77
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
78
|
+
|
|
79
|
+
- name: ai21/j2-large
|
|
80
|
+
display_name: Jurassic-2 Large (7.5B)
|
|
81
|
+
description: Jurassic-2 Large (7.5B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
|
|
82
|
+
creator_organization_name: AI21 Labs
|
|
83
|
+
access: limited
|
|
84
|
+
num_parameters: 7500000000
|
|
85
|
+
release_date: 2023-03-09
|
|
86
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
87
|
+
|
|
88
|
+
- name: ai21/j2-grande
|
|
89
|
+
display_name: Jurassic-2 Grande (17B)
|
|
90
|
+
description: Jurassic-2 Grande (17B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
|
|
91
|
+
creator_organization_name: AI21 Labs
|
|
92
|
+
access: limited
|
|
93
|
+
num_parameters: 17000000000
|
|
94
|
+
release_date: 2023-03-09
|
|
95
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
96
|
+
|
|
97
|
+
# TODO(1524): Change AI21 model names
|
|
98
|
+
# - j2-jumbo -> j2-ultra
|
|
99
|
+
# - j2-grande -> j2-mid
|
|
100
|
+
# - j2-large -> j2-light
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# Aleph Alpha
|
|
105
|
+
# Aleph Alpha's Luminous models: https://docs.aleph-alpha.com/docs/introduction/luminous
|
|
106
|
+
# TODO: add Luminous World when it's released
|
|
107
|
+
- name: AlephAlpha/luminous-base
|
|
108
|
+
display_name: Luminous Base (13B)
|
|
109
|
+
description: Luminous Base (13B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/))
|
|
110
|
+
creator_organization_name: Aleph Alpha
|
|
111
|
+
access: limited
|
|
112
|
+
num_parameters: 13000000000
|
|
113
|
+
# TODO: get exact release date
|
|
114
|
+
release_date: 2022-01-01
|
|
115
|
+
# Does not support echo
|
|
116
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
117
|
+
|
|
118
|
+
- name: AlephAlpha/luminous-extended
|
|
119
|
+
display_name: Luminous Extended (30B)
|
|
120
|
+
description: Luminous Extended (30B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/))
|
|
121
|
+
creator_organization_name: Aleph Alpha
|
|
122
|
+
access: limited
|
|
123
|
+
num_parameters: 30000000000
|
|
124
|
+
release_date: 2022-01-01
|
|
125
|
+
# Does not support echo
|
|
126
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
127
|
+
|
|
128
|
+
- name: AlephAlpha/luminous-supreme
|
|
129
|
+
display_name: Luminous Supreme (70B)
|
|
130
|
+
description: Luminous Supreme (70B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/))
|
|
131
|
+
creator_organization_name: Aleph Alpha
|
|
132
|
+
access: limited
|
|
133
|
+
num_parameters: 70000000000
|
|
134
|
+
release_date: 2022-01-01
|
|
135
|
+
# Does not support echo.
|
|
136
|
+
# Currently, only Luminous-extended and Luminous-base support multimodal inputs
|
|
137
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
138
|
+
|
|
139
|
+
# TODO: Uncomment when luminous-world is released.
|
|
140
|
+
# - name: AlephAlpha/luminous-world # Not released yet.
|
|
141
|
+
# display_name: Luminous World (178B)
|
|
142
|
+
# description: Luminous World (178B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/))
|
|
143
|
+
# creator_organization_name: Aleph Alpha
|
|
144
|
+
# access: limited
|
|
145
|
+
# num_parameters: TBD
|
|
146
|
+
# release_date: TBD
|
|
147
|
+
# # Does not support echo.
|
|
148
|
+
# tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
149
|
+
|
|
150
|
+
- name: AlephAlpha/m-vader
|
|
151
|
+
display_name: MultiFusion (13B)
|
|
152
|
+
description: MultiFusion is a multimodal, multilingual diffusion model that extend the capabilities of Stable Diffusion v1.4 by integrating different pre-trained modules, which transfers capabilities to the downstream model ([paper](https://arxiv.org/abs/2305.15296))
|
|
153
|
+
creator_organization_name: Aleph Alpha
|
|
154
|
+
access: limited
|
|
155
|
+
num_parameters: 13000000000
|
|
156
|
+
release_date: 2023-05-24
|
|
157
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
# Amazon
|
|
161
|
+
# References for Amazon Titan models:
|
|
162
|
+
# - https://aws.amazon.com/bedrock/titan/
|
|
163
|
+
# - https://community.aws/content/2ZUVD3fkNtqEOYIa2iUJAFArS7c/family-of-titan-text-models---cli-demo
|
|
164
|
+
# - https://aws.amazon.com/about-aws/whats-new/2023/11/amazon-titan-models-express-lite-bedrock/
|
|
165
|
+
- name: amazon/titan-text-lite-v1
|
|
166
|
+
display_name: Amazon Titan Text Lite
|
|
167
|
+
description: Amazon Titan Text Lite is a lightweight, efficient model perfect for fine-tuning English-language tasks like summarization and copywriting. It caters to customers seeking a smaller, cost-effective, and highly customizable model. It supports various formats, including text generation, code generation, rich text formatting, and orchestration (agents). Key model attributes encompass fine-tuning, text generation, code generation, and rich text formatting.
|
|
168
|
+
creator_organization_name: Amazon
|
|
169
|
+
access: limited
|
|
170
|
+
release_date: 2023-11-29
|
|
171
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
172
|
+
|
|
173
|
+
- name: amazon/titan-tg1-large
|
|
174
|
+
display_name: Amazon Titan Large
|
|
175
|
+
description: Amazon Titan Large is efficient model perfect for fine-tuning English-language tasks like summarization, create article, marketing campaign.
|
|
176
|
+
creator_organization_name: Amazon
|
|
177
|
+
access: limited
|
|
178
|
+
release_date: 2023-11-29
|
|
179
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
180
|
+
|
|
181
|
+
- name: amazon/titan-text-express-v1
|
|
182
|
+
display_name: Amazon Titan Text Express
|
|
183
|
+
description: Amazon Titan Text Express, with a context length of up to 8,000 tokens, excels in advanced language tasks like open-ended text generation and conversational chat. It's also optimized for Retrieval Augmented Generation (RAG). Initially designed for English, the model offers preview multilingual support for over 100 additional languages.
|
|
184
|
+
creator_organization_name: Amazon
|
|
185
|
+
access: limited
|
|
186
|
+
release_date: 2023-11-29
|
|
187
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
# Anthropic
|
|
191
|
+
- name: anthropic/claude-v1.3
|
|
192
|
+
display_name: Anthropic Claude v1.3
|
|
193
|
+
description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
|
|
194
|
+
creator_organization_name: Anthropic
|
|
195
|
+
access: limited
|
|
196
|
+
num_parameters: 52000000000
|
|
197
|
+
release_date: 2023-03-17
|
|
198
|
+
tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
199
|
+
|
|
200
|
+
- name: anthropic/claude-instant-v1
|
|
201
|
+
display_name: Anthropic Claude Instant V1
|
|
202
|
+
description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)).
|
|
203
|
+
creator_organization_name: Anthropic
|
|
204
|
+
access: limited
|
|
205
|
+
release_date: 2023-03-17
|
|
206
|
+
tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
207
|
+
|
|
208
|
+
- name: anthropic/claude-instant-1.2
|
|
209
|
+
display_name: Anthropic Claude Instant 1.2
|
|
210
|
+
description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)).
|
|
211
|
+
creator_organization_name: Anthropic
|
|
212
|
+
access: limited
|
|
213
|
+
release_date: 2023-08-09
|
|
214
|
+
tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
215
|
+
|
|
216
|
+
- name: anthropic/claude-2.0
|
|
217
|
+
display_name: Anthropic Claude 2.0
|
|
218
|
+
description: Claude 2.0 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf))
|
|
219
|
+
creator_organization_name: Anthropic
|
|
220
|
+
access: limited
|
|
221
|
+
release_date: 2023-07-11
|
|
222
|
+
tags: [ANTHROPIC_CLAUDE_2_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
223
|
+
|
|
224
|
+
- name: anthropic/claude-2.1
|
|
225
|
+
display_name: Anthropic Claude 2.1
|
|
226
|
+
description: Claude 2.1 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf))
|
|
227
|
+
creator_organization_name: Anthropic
|
|
228
|
+
access: limited
|
|
229
|
+
release_date: 2023-11-21
|
|
230
|
+
tags: [ANTHROPIC_CLAUDE_2_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
231
|
+
|
|
232
|
+
- name: anthropic/claude-3-haiku-20240307
|
|
233
|
+
display_name: Claude 3 Haiku (20240307)
|
|
234
|
+
description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
|
|
235
|
+
creator_organization_name: Anthropic
|
|
236
|
+
access: limited
|
|
237
|
+
release_date: 2024-03-13 # https://www.anthropic.com/news/claude-3-haiku
|
|
238
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
239
|
+
|
|
240
|
+
- name: anthropic/claude-3-sonnet-20240229
|
|
241
|
+
display_name: Claude 3 Sonnet (20240229)
|
|
242
|
+
description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
|
|
243
|
+
creator_organization_name: Anthropic
|
|
244
|
+
access: limited
|
|
245
|
+
release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
|
|
246
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
247
|
+
|
|
248
|
+
- name: anthropic/claude-3-opus-20240229
|
|
249
|
+
display_name: Claude 3 Opus (20240229)
|
|
250
|
+
description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
|
|
251
|
+
creator_organization_name: Anthropic
|
|
252
|
+
access: limited
|
|
253
|
+
release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
|
|
254
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
255
|
+
|
|
256
|
+
# DEPRECATED: Please do not use.
|
|
257
|
+
- name: anthropic/stanford-online-all-v4-s3
|
|
258
|
+
display_name: Anthropic-LM v4-s3 (52B)
|
|
259
|
+
description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
|
|
260
|
+
creator_organization_name: Anthropic
|
|
261
|
+
access: closed
|
|
262
|
+
num_parameters: 52000000000
|
|
263
|
+
release_date: 2021-12-01
|
|
264
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
# Berkeley
|
|
269
|
+
- name: berkeley/koala-13b # NOT SUPPORTED
|
|
270
|
+
display_name: Koala (13B)
|
|
271
|
+
description: Koala (13B) is a chatbot fine-tuned from Llama (13B) on dialogue data gathered from the web. ([blog post](https://bair.berkeley.edu/blog/2023/04/03/koala/))
|
|
272
|
+
creator_organization_name: UC Berkeley
|
|
273
|
+
access: open
|
|
274
|
+
num_parameters: 13000000000
|
|
275
|
+
release_date: 2022-04-03
|
|
276
|
+
tags: [] # TODO: add tags
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
# BigScience
|
|
281
|
+
- name: bigscience/bloom
|
|
282
|
+
display_name: BLOOM (176B)
|
|
283
|
+
description: BLOOM (176B parameters) is an autoregressive model trained on 46 natural languages and 13 programming languages ([paper](https://arxiv.org/pdf/2211.05100.pdf)).
|
|
284
|
+
creator_organization_name: BigScience
|
|
285
|
+
access: open
|
|
286
|
+
num_parameters: 176000000000
|
|
287
|
+
release_date: 2022-06-28
|
|
288
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
|
|
289
|
+
|
|
290
|
+
- name: bigscience/bloomz # NOT SUPPORTED
|
|
291
|
+
display_name: BLOOMZ (176B)
|
|
292
|
+
description: BLOOMZ (176B parameters) is BLOOM that has been fine-tuned on natural language instructions ([details](https://huggingface.co/bigscience/bloomz)).
|
|
293
|
+
creator_organization_name: BigScience
|
|
294
|
+
access: open
|
|
295
|
+
num_parameters: 176000000000
|
|
296
|
+
release_date: 2022-11-03
|
|
297
|
+
tags: [] # TODO: add tags
|
|
298
|
+
|
|
299
|
+
- name: bigscience/t0pp
|
|
300
|
+
display_name: T0pp (11B)
|
|
301
|
+
description: T0pp (11B parameters) is an encoder-decoder model trained on a large set of different tasks specified in natural language prompts ([paper](https://arxiv.org/pdf/2110.08207.pdf)).
|
|
302
|
+
creator_organization_name: BigScience
|
|
303
|
+
access: open
|
|
304
|
+
num_parameters: 11000000000
|
|
305
|
+
release_date: 2021-10-15
|
|
306
|
+
# Does not support echo.
|
|
307
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
# BigCode
|
|
312
|
+
- name: bigcode/santacoder
|
|
313
|
+
display_name: SantaCoder (1.1B)
|
|
314
|
+
description: SantaCoder (1.1B parameters) model trained on the Python, Java, and JavaScript subset of The Stack (v1.1) ([model card](https://huggingface.co/bigcode/santacoder)).
|
|
315
|
+
creator_organization_name: BigCode
|
|
316
|
+
access: open
|
|
317
|
+
num_parameters: 1100000000
|
|
318
|
+
release_date: 2023-01-09 # ArXiv submission date
|
|
319
|
+
tags: [CODE_MODEL_TAG]
|
|
320
|
+
|
|
321
|
+
- name: bigcode/starcoder
|
|
322
|
+
display_name: StarCoder (15.5B)
|
|
323
|
+
description: The StarCoder (15.5B parameter) model trained on 80+ programming languages from The Stack (v1.2) ([model card](https://huggingface.co/bigcode/starcoder)).
|
|
324
|
+
creator_organization_name: BigCode
|
|
325
|
+
access: open
|
|
326
|
+
num_parameters: 15500000000
|
|
327
|
+
release_date: 2023-05-09 # ArXiv submission date
|
|
328
|
+
tags: [CODE_MODEL_TAG]
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
# Cerebras Systems
|
|
333
|
+
- name: cerebras/cerebras-gpt-6.7b # NOT SUPPORTED
|
|
334
|
+
display_name: Cerebras GPT (6.7B)
|
|
335
|
+
description: Cerebras GPT is a family of open compute-optimal language models scaled from 111M to 13B parameters trained on the Eleuther Pile. ([paper](https://arxiv.org/pdf/2304.03208.pdf))
|
|
336
|
+
creator_organization_name: Cerebras
|
|
337
|
+
access: limited
|
|
338
|
+
num_parameters: 6700000000
|
|
339
|
+
release_date: 2023-04-06
|
|
340
|
+
tags: [] # TODO: add tags
|
|
341
|
+
|
|
342
|
+
- name: cerebras/cerebras-gpt-13b # NOT SUPPORTED
|
|
343
|
+
display_name: Cerebras GPT (13B)
|
|
344
|
+
description: Cerebras GPT is a family of open compute-optimal language models scaled from 111M to 13B parameters trained on the Eleuther Pile. ([paper](https://arxiv.org/pdf/2304.03208.pdf))
|
|
345
|
+
creator_organization_name: Cerebras
|
|
346
|
+
access: limited
|
|
347
|
+
num_parameters: 13000000000
|
|
348
|
+
release_date: 2023-04-06
|
|
349
|
+
tags: [] # TODO: add tags
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
# Cohere
|
|
354
|
+
# Model versioning and the possible versions are not documented here:
|
|
355
|
+
# https://docs.cohere.ai/generate-reference#model-optional.
|
|
356
|
+
# So, instead, we got the names of the models from the Cohere Playground.
|
|
357
|
+
#
|
|
358
|
+
# Note that their tokenizer and model were trained on English text and
|
|
359
|
+
# they do not have a dedicated decode API endpoint, so the adaptation
|
|
360
|
+
# step for language modeling fails for certain Scenarios:
|
|
361
|
+
# the_pile:subset=ArXiv
|
|
362
|
+
# the_pile:subset=Github
|
|
363
|
+
# the_pile:subset=PubMed Central
|
|
364
|
+
|
|
365
|
+
# TODO: Consider renaming to new model names.
|
|
366
|
+
- name: cohere/xlarge-20220609
|
|
367
|
+
display_name: Cohere xlarge v20220609 (52.4B)
|
|
368
|
+
description: Cohere xlarge v20220609 (52.4B parameters)
|
|
369
|
+
creator_organization_name: Cohere
|
|
370
|
+
access: limited
|
|
371
|
+
num_parameters: 52400000000
|
|
372
|
+
release_date: 2022-06-09
|
|
373
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
374
|
+
|
|
375
|
+
- name: cohere/large-20220720 # DEPRECATED
|
|
376
|
+
display_name: Cohere large v20220720 (13.1B)
|
|
377
|
+
description: Cohere large v20220720 (13.1B parameters), which is deprecated by Cohere as of December 2, 2022.
|
|
378
|
+
creator_organization_name: Cohere
|
|
379
|
+
access: limited
|
|
380
|
+
num_parameters: 13100000000
|
|
381
|
+
release_date: 2022-07-20
|
|
382
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
383
|
+
|
|
384
|
+
- name: cohere/medium-20220720
|
|
385
|
+
display_name: Cohere medium v20220720 (6.1B)
|
|
386
|
+
description: Cohere medium v20220720 (6.1B parameters)
|
|
387
|
+
creator_organization_name: Cohere
|
|
388
|
+
access: limited
|
|
389
|
+
num_parameters: 6100000000
|
|
390
|
+
release_date: 2022-07-20
|
|
391
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
392
|
+
|
|
393
|
+
- name: cohere/small-20220720 # DEPRECATED
|
|
394
|
+
display_name: Cohere small v20220720 (410M)
|
|
395
|
+
description: Cohere small v20220720 (410M parameters), which is deprecated by Cohere as of December 2, 2022.
|
|
396
|
+
creator_organization_name: Cohere
|
|
397
|
+
access: limited
|
|
398
|
+
num_parameters: 410000000
|
|
399
|
+
release_date: 2022-07-20
|
|
400
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
401
|
+
|
|
402
|
+
- name: cohere/xlarge-20221108
|
|
403
|
+
display_name: Cohere xlarge v20221108 (52.4B)
|
|
404
|
+
description: Cohere xlarge v20221108 (52.4B parameters)
|
|
405
|
+
creator_organization_name: Cohere
|
|
406
|
+
access: limited
|
|
407
|
+
num_parameters: 52400000000
|
|
408
|
+
release_date: 2022-11-08
|
|
409
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
410
|
+
|
|
411
|
+
- name: cohere/medium-20221108 # DEPRECATED
|
|
412
|
+
display_name: Cohere medium v20221108 (6.1B)
|
|
413
|
+
description: Cohere medium v20221108 (6.1B parameters)
|
|
414
|
+
creator_organization_name: Cohere
|
|
415
|
+
access: limited
|
|
416
|
+
num_parameters: 6100000000
|
|
417
|
+
release_date: 2022-11-08
|
|
418
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
419
|
+
|
|
420
|
+
- name: cohere/command-medium-beta # DEPRECATED
|
|
421
|
+
display_name: Cohere Command beta (6.1B)
|
|
422
|
+
description: Cohere Command beta (6.1B parameters) is fine-tuned from the medium model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
|
|
423
|
+
creator_organization_name: Cohere
|
|
424
|
+
access: limited
|
|
425
|
+
num_parameters: 6100000000
|
|
426
|
+
release_date: 2022-11-08
|
|
427
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
428
|
+
|
|
429
|
+
- name: cohere/command-xlarge-beta # DEPRECATED
|
|
430
|
+
display_name: Cohere Command beta (52.4B)
|
|
431
|
+
description: Cohere Command beta (52.4B parameters) is fine-tuned from the XL model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
|
|
432
|
+
creator_organization_name: Cohere
|
|
433
|
+
access: limited
|
|
434
|
+
num_parameters: 52400000000
|
|
435
|
+
release_date: 2022-11-08
|
|
436
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
437
|
+
|
|
438
|
+
- name: cohere/command
|
|
439
|
+
display_name: Cohere Command
|
|
440
|
+
description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog)
|
|
441
|
+
creator_organization_name: Cohere
|
|
442
|
+
access: limited
|
|
443
|
+
release_date: 2023-09-29
|
|
444
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
445
|
+
|
|
446
|
+
- name: cohere/command-light
|
|
447
|
+
display_name: Cohere Command Light
|
|
448
|
+
description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog)
|
|
449
|
+
creator_organization_name: Cohere
|
|
450
|
+
access: limited
|
|
451
|
+
release_date: 2023-09-29
|
|
452
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
453
|
+
|
|
454
|
+
# Craiyon
|
|
455
|
+
- name: craiyon/dalle-mini
|
|
456
|
+
display_name: DALL-E mini (0.4B)
|
|
457
|
+
description: DALL-E mini is an open-source text-to-image model that attempt to reproduce OpenAI's DALL-E 1 ([code](https://github.com/borisdayma/dalle-mini)).
|
|
458
|
+
creator_organization_name: Craiyon
|
|
459
|
+
access: open
|
|
460
|
+
num_parameters: 400000000
|
|
461
|
+
release_date: 2022-04-21
|
|
462
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
463
|
+
|
|
464
|
+
- name: craiyon/dalle-mega
|
|
465
|
+
display_name: DALL-E mega (2.6B)
|
|
466
|
+
description: DALL-E mega is an open-source text-to-image model that attempt to reproduce OpenAI's DALL-E 1 ([code](https://github.com/borisdayma/dalle-mini)).
|
|
467
|
+
creator_organization_name: Craiyon
|
|
468
|
+
access: open
|
|
469
|
+
num_parameters: 2600000000
|
|
470
|
+
release_date: 2022-04-21
|
|
471
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
472
|
+
|
|
473
|
+
# DeepFloyd
|
|
474
|
+
- name: DeepFloyd/IF-I-M-v1.0
|
|
475
|
+
display_name: DeepFloyd IF Medium (0.4B)
|
|
476
|
+
description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
|
|
477
|
+
creator_organization_name: DeepFloyd
|
|
478
|
+
access: open
|
|
479
|
+
num_parameters: 400000000
|
|
480
|
+
release_date: 2023-04-28
|
|
481
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
482
|
+
|
|
483
|
+
- name: DeepFloyd/IF-I-L-v1.0
|
|
484
|
+
display_name: DeepFloyd IF Large (0.9B)
|
|
485
|
+
description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
|
|
486
|
+
creator_organization_name: DeepFloyd
|
|
487
|
+
access: open
|
|
488
|
+
num_parameters: 900000000
|
|
489
|
+
release_date: 2023-04-28
|
|
490
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
491
|
+
|
|
492
|
+
- name: DeepFloyd/IF-I-XL-v1.0
|
|
493
|
+
display_name: DeepFloyd IF X-Large (4.3B)
|
|
494
|
+
description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
|
|
495
|
+
creator_organization_name: DeepFloyd
|
|
496
|
+
access: open
|
|
497
|
+
num_parameters: 4300000000
|
|
498
|
+
release_date: 2023-04-28
|
|
499
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
# Databricks
|
|
503
|
+
- name: databricks/dolly-v2-3b
|
|
504
|
+
display_name: Dolly V2 (3B)
|
|
505
|
+
description: Dolly V2 (3B) is an instruction-following large language model trained on the Databricks machine learning platform. It is based on pythia-12b.
|
|
506
|
+
creator_organization_name: Databricks
|
|
507
|
+
access: open
|
|
508
|
+
num_parameters: 2517652480
|
|
509
|
+
release_date: 2023-04-12
|
|
510
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
511
|
+
|
|
512
|
+
- name: databricks/dolly-v2-7b
|
|
513
|
+
display_name: Dolly V2 (7B)
|
|
514
|
+
description: Dolly V2 (7B) is an instruction-following large language model trained on the Databricks machine learning platform. It is based on pythia-12b.
|
|
515
|
+
creator_organization_name: Databricks
|
|
516
|
+
access: open
|
|
517
|
+
num_parameters: 6444163072
|
|
518
|
+
release_date: 2023-04-12
|
|
519
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
520
|
+
|
|
521
|
+
- name: databricks/dolly-v2-12b
|
|
522
|
+
display_name: Dolly V2 (12B)
|
|
523
|
+
description: Dolly V2 (12B) is an instruction-following large language model trained on the Databricks machine learning platform. It is based on pythia-12b.
|
|
524
|
+
creator_organization_name: Databricks
|
|
525
|
+
access: open
|
|
526
|
+
num_parameters: 11327027200
|
|
527
|
+
release_date: 2023-04-12
|
|
528
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
529
|
+
|
|
530
|
+
- name: databricks/dbrx-instruct
|
|
531
|
+
display_name: DBRX Instruct
|
|
532
|
+
description: DBRX is a large language model with a fine-grained mixture-of-experts (MoE) architecture that uses 16 experts and chooses 4. It has 132B total parameters, of which 36B parameters are active on any input. ([blog post](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm))
|
|
533
|
+
creator_organization_name: Databricks
|
|
534
|
+
access: open
|
|
535
|
+
num_parameters: 132000000000
|
|
536
|
+
release_date: 2024-03-27
|
|
537
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
# DeepMind
|
|
541
|
+
- name: deepmind/gopher # NOT SUPPORTED
|
|
542
|
+
display_name: Gopher (280B)
|
|
543
|
+
description: Gopher (280B parameters) ([paper](https://arxiv.org/pdf/2112.11446.pdf)).
|
|
544
|
+
creator_organization_name: DeepMind
|
|
545
|
+
access: closed
|
|
546
|
+
num_parameters: 280000000000
|
|
547
|
+
release_date: 2021-12-08
|
|
548
|
+
tags: [] # TODO: add tags
|
|
549
|
+
|
|
550
|
+
- name: deepmind/chinchilla # NOT SUPPORTED
|
|
551
|
+
display_name: Chinchilla (70B)
|
|
552
|
+
description: Chinchilla (70B parameters) ([paper](https://arxiv.org/pdf/2203.15556.pdf)).
|
|
553
|
+
creator_organization_name: DeepMind
|
|
554
|
+
access: closed
|
|
555
|
+
num_parameters: 70000000000
|
|
556
|
+
release_date: 2022-03-31
|
|
557
|
+
tags: [] # TODO: add tags
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
# Deepseek
|
|
561
|
+
- name: deepseek-ai/deepseek-llm-67b-chat
|
|
562
|
+
display_name: DeepSeek Chat (67B)
|
|
563
|
+
description: DeepSeek Chat is a open-source language model trained on 2 trillion tokens in both English and Chinese, and fine-tuned supervised fine-tuning (SFT) and Direct Preference Optimization (DPO). ([paper](https://arxiv.org/abs/2401.02954))
|
|
564
|
+
creator_organization_name: DeepSeek
|
|
565
|
+
access: open
|
|
566
|
+
num_parameters: 67000000000
|
|
567
|
+
release_date: 2024-01-05
|
|
568
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
# EleutherAI
|
|
572
|
+
- name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together.
|
|
573
|
+
display_name: GPT-J (6B)
|
|
574
|
+
description: GPT-J (6B parameters) autoregressive language model trained on The Pile ([details](https://arankomatsuzaki.wordpress.com/2021/06/04/gpt-j/)).
|
|
575
|
+
creator_organization_name: EleutherAI
|
|
576
|
+
access: open
|
|
577
|
+
num_parameters: 6000000000
|
|
578
|
+
release_date: 2021-06-04
|
|
579
|
+
# TODO: The BUGGY_TEMP_0_TAG is a deployment related tag (Together).
|
|
580
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, BUGGY_TEMP_0_TAG]
|
|
581
|
+
|
|
582
|
+
- name: eleutherai/gpt-neox-20b # Served by GooseAi and Together.
|
|
583
|
+
display_name: GPT-NeoX (20B)
|
|
584
|
+
description: GPT-NeoX (20B parameters) autoregressive language model trained on The Pile ([paper](https://arxiv.org/pdf/2204.06745.pdf)).
|
|
585
|
+
creator_organization_name: EleutherAI
|
|
586
|
+
access: open
|
|
587
|
+
num_parameters: 20000000000
|
|
588
|
+
release_date: 2022-02-02
|
|
589
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
|
|
590
|
+
|
|
591
|
+
- name: eleutherai/pythia-1b-v0
|
|
592
|
+
display_name: Pythia (1B)
|
|
593
|
+
description: Pythia (1B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers.
|
|
594
|
+
creator_organization_name: EleutherAI
|
|
595
|
+
access: open
|
|
596
|
+
num_parameters: 805736448
|
|
597
|
+
release_date: 2023-02-13
|
|
598
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
599
|
+
|
|
600
|
+
- name: eleutherai/pythia-2.8b-v0
|
|
601
|
+
display_name: Pythia (2.8B)
|
|
602
|
+
description: Pythia (2.8B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers.
|
|
603
|
+
creator_organization_name: EleutherAI
|
|
604
|
+
access: open
|
|
605
|
+
num_parameters: 2517652480
|
|
606
|
+
release_date: 2023-02-13
|
|
607
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
608
|
+
|
|
609
|
+
- name: eleutherai/pythia-6.9b
|
|
610
|
+
display_name: Pythia (6.9B)
|
|
611
|
+
description: Pythia (6.9B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers.
|
|
612
|
+
creator_organization_name: EleutherAI
|
|
613
|
+
access: open
|
|
614
|
+
num_parameters: 6444163072
|
|
615
|
+
release_date: 2023-02-13
|
|
616
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
617
|
+
|
|
618
|
+
- name: eleutherai/pythia-12b-v0
|
|
619
|
+
display_name: Pythia (12B)
|
|
620
|
+
description: Pythia (12B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers.
|
|
621
|
+
creator_organization_name: EleutherAI
|
|
622
|
+
access: open
|
|
623
|
+
num_parameters: 11327027200
|
|
624
|
+
release_date: 2023-02-13
|
|
625
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
|
|
629
|
+
# Google
|
|
630
|
+
- name: google/t5-11b
|
|
631
|
+
display_name: T5 (11B)
|
|
632
|
+
description: T5 (11B parameters) is an encoder-decoder model trained on a multi-task mixture, where each task is converted into a text-to-text format ([paper](https://arxiv.org/pdf/1910.10683.pdf)).
|
|
633
|
+
creator_organization_name: Google
|
|
634
|
+
access: open
|
|
635
|
+
num_parameters: 11000000000
|
|
636
|
+
release_date: 2019-10-23
|
|
637
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
|
|
638
|
+
|
|
639
|
+
- name: google/ul2
|
|
640
|
+
display_name: UL2 (20B)
|
|
641
|
+
description: UL2 (20B parameters) is an encoder-decoder model trained on the C4 corpus. It's similar to T5 but trained with a different objective and slightly different scaling knobs ([paper](https://arxiv.org/pdf/2205.05131.pdf)).
|
|
642
|
+
creator_organization_name: Google
|
|
643
|
+
access: open
|
|
644
|
+
num_parameters: 20000000000
|
|
645
|
+
release_date: 2022-05-10
|
|
646
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG, NLG_PREFIX_TAG]
|
|
647
|
+
|
|
648
|
+
- name: google/flan-t5-xxl
|
|
649
|
+
display_name: Flan-T5 (11B)
|
|
650
|
+
description: Flan-T5 (11B parameters) is T5 fine-tuned on 1.8K tasks ([paper](https://arxiv.org/pdf/2210.11416.pdf)).
|
|
651
|
+
creator_organization_name: Google
|
|
652
|
+
access: open
|
|
653
|
+
num_parameters: 11000000000
|
|
654
|
+
release_date: 2022-12-06 # Paper date
|
|
655
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
656
|
+
|
|
657
|
+
- name: google/palm # NOT SUPPORTED
|
|
658
|
+
display_name: PaLM (540B)
|
|
659
|
+
description: Pathways Language Model (540B parameters) is trained using 6144 TPU v4 chips ([paper](https://arxiv.org/pdf/2204.02311.pdf)).
|
|
660
|
+
creator_organization_name: Google
|
|
661
|
+
access: closed
|
|
662
|
+
num_parameters: 540000000000
|
|
663
|
+
release_date: 2023-03-01 # was first announced on 2022-04 but remained private.
|
|
664
|
+
tags: [] # TODO: add tags
|
|
665
|
+
|
|
666
|
+
# Note: This is aliased to a snapshot of gemini-pro. When possible, please use a versioned snapshot instead.
|
|
667
|
+
- name: google/gemini-pro
|
|
668
|
+
display_name: Gemini Pro
|
|
669
|
+
description: Gemini Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
|
|
670
|
+
creator_organization_name: Google
|
|
671
|
+
access: limited
|
|
672
|
+
release_date: 2023-12-13
|
|
673
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
674
|
+
|
|
675
|
+
- name: google/gemini-1.0-pro-001
|
|
676
|
+
display_name: Gemini 1.0 Pro
|
|
677
|
+
description: Gemini 1.0 Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
|
|
678
|
+
creator_organization_name: Google
|
|
679
|
+
access: limited
|
|
680
|
+
release_date: 2023-12-13
|
|
681
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
682
|
+
|
|
683
|
+
# Note: This is aliased to a snapshot of gemini-pro-vision. When possible, please use a versioned snapshot instead.
|
|
684
|
+
- name: google/gemini-pro-vision
|
|
685
|
+
display_name: Gemini Pro Vision
|
|
686
|
+
description: Gemini Pro Vision is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
|
|
687
|
+
creator_organization_name: Google
|
|
688
|
+
access: limited
|
|
689
|
+
release_date: 2023-12-13
|
|
690
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG]
|
|
691
|
+
|
|
692
|
+
- name: google/gemini-1.0-pro-vision-001
|
|
693
|
+
display_name: Gemini 1.0 Pro Vision
|
|
694
|
+
description: Gemini 1.0 Pro Vision is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
|
|
695
|
+
creator_organization_name: Google
|
|
696
|
+
access: limited
|
|
697
|
+
release_date: 2023-12-13
|
|
698
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
699
|
+
|
|
700
|
+
- name: google/gemini-1.5-pro-preview-0409
|
|
701
|
+
display_name: Gemini 1.5 Pro
|
|
702
|
+
description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. ([paper](https://arxiv.org/abs/2403.05530))
|
|
703
|
+
creator_organization_name: Google
|
|
704
|
+
access: limited
|
|
705
|
+
release_date: 2024-04-10
|
|
706
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
707
|
+
|
|
708
|
+
- name: google/gemma-2b
|
|
709
|
+
display_name: Gemma (2B)
|
|
710
|
+
# TODO: Fill in Gemma description.
|
|
711
|
+
description: TBD
|
|
712
|
+
creator_organization_name: Google
|
|
713
|
+
access: open
|
|
714
|
+
release_date: 2024-02-21
|
|
715
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
716
|
+
|
|
717
|
+
- name: google/gemma-2b-it
|
|
718
|
+
display_name: Gemma Instruct (2B)
|
|
719
|
+
# TODO: Fill in Gemma description.
|
|
720
|
+
description: TBD
|
|
721
|
+
creator_organization_name: Google
|
|
722
|
+
access: open
|
|
723
|
+
release_date: 2024-02-21
|
|
724
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
725
|
+
|
|
726
|
+
- name: google/gemma-7b
|
|
727
|
+
display_name: Gemma (7B)
|
|
728
|
+
# TODO: Fill in Gemma description.
|
|
729
|
+
description: TBD
|
|
730
|
+
creator_organization_name: Google
|
|
731
|
+
access: open
|
|
732
|
+
release_date: 2024-02-21
|
|
733
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
734
|
+
|
|
735
|
+
- name: google/gemma-7b-it
|
|
736
|
+
display_name: Gemma Instruct (7B)
|
|
737
|
+
# TODO: Fill in Gemma description.
|
|
738
|
+
description: TBD
|
|
739
|
+
creator_organization_name: Google
|
|
740
|
+
access: open
|
|
741
|
+
release_date: 2024-02-21
|
|
742
|
+
# TODO: Add OUTPUT_FORMAT_INSTRUCTIONS_TAG tag
|
|
743
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
744
|
+
|
|
745
|
+
- name: google/text-bison@001
|
|
746
|
+
display_name: PaLM-2 (Bison)
|
|
747
|
+
description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
748
|
+
creator_organization_name: Google
|
|
749
|
+
access: limited
|
|
750
|
+
release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
|
|
751
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
752
|
+
|
|
753
|
+
- name: google/text-bison@002
|
|
754
|
+
display_name: PaLM-2 (Bison)
|
|
755
|
+
description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
756
|
+
creator_organization_name: Google
|
|
757
|
+
access: limited
|
|
758
|
+
release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
|
|
759
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
760
|
+
|
|
761
|
+
- name: google/text-bison-32k
|
|
762
|
+
display_name: PaLM-2 (Bison)
|
|
763
|
+
description: The best value PaLM model with a 32K context. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
764
|
+
creator_organization_name: Google
|
|
765
|
+
access: limited
|
|
766
|
+
release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
|
|
767
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
768
|
+
|
|
769
|
+
- name: google/text-unicorn@001
|
|
770
|
+
display_name: PaLM-2 (Unicorn)
|
|
771
|
+
description: The largest model in PaLM family. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
772
|
+
creator_organization_name: Google
|
|
773
|
+
access: limited
|
|
774
|
+
release_date: 2023-11-30 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
|
|
775
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
776
|
+
|
|
777
|
+
- name: google/code-bison@001
|
|
778
|
+
display_name: Codey PaLM-2 (Bison)
|
|
779
|
+
description: A model fine-tuned to generate code based on a natural language description of the desired code. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
780
|
+
creator_organization_name: Google
|
|
781
|
+
access: limited
|
|
782
|
+
release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
|
|
783
|
+
tags: [CODE_MODEL_TAG]
|
|
784
|
+
|
|
785
|
+
- name: google/code-bison@002
|
|
786
|
+
display_name: Codey PaLM-2 (Bison)
|
|
787
|
+
description: A model fine-tuned to generate code based on a natural language description of the desired code. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
788
|
+
creator_organization_name: Google
|
|
789
|
+
access: limited
|
|
790
|
+
release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
|
|
791
|
+
tags: [CODE_MODEL_TAG]
|
|
792
|
+
|
|
793
|
+
- name: google/code-bison-32k
|
|
794
|
+
display_name: Codey PaLM-2 (Bison)
|
|
795
|
+
description: Codey with a 32K context. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
796
|
+
creator_organization_name: Google
|
|
797
|
+
access: limited
|
|
798
|
+
release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
|
|
799
|
+
tags: [CODE_MODEL_TAG]
|
|
800
|
+
|
|
801
|
+
|
|
802
|
+
|
|
803
|
+
# HuggingFace
|
|
804
|
+
- name: HuggingFaceM4/idefics-9b
|
|
805
|
+
display_name: IDEFICS (9B)
|
|
806
|
+
description: IDEFICS (9B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics))
|
|
807
|
+
creator_organization_name: HuggingFace
|
|
808
|
+
access: open
|
|
809
|
+
num_parameters: 9000000000
|
|
810
|
+
release_date: 2023-08-22
|
|
811
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
812
|
+
|
|
813
|
+
- name: HuggingFaceM4/idefics-9b-instruct
|
|
814
|
+
display_name: IDEFICS instruct (9B)
|
|
815
|
+
description: IDEFICS instruct (9B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics))
|
|
816
|
+
creator_organization_name: HuggingFace
|
|
817
|
+
access: open
|
|
818
|
+
num_parameters: 9000000000
|
|
819
|
+
release_date: 2023-08-22
|
|
820
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, IDEFICS_INSTRUCT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
821
|
+
|
|
822
|
+
- name: HuggingFaceM4/idefics-80b
|
|
823
|
+
display_name: IDEFICS (80B)
|
|
824
|
+
description: IDEFICS (80B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics))
|
|
825
|
+
creator_organization_name: HuggingFace
|
|
826
|
+
access: open
|
|
827
|
+
num_parameters: 80000000000
|
|
828
|
+
release_date: 2023-08-22
|
|
829
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
830
|
+
|
|
831
|
+
- name: HuggingFaceM4/idefics-80b-instruct
|
|
832
|
+
display_name: IDEFICS instruct (80B)
|
|
833
|
+
description: IDEFICS instruct (80B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics))
|
|
834
|
+
creator_organization_name: HuggingFace
|
|
835
|
+
access: open
|
|
836
|
+
num_parameters: 80000000000
|
|
837
|
+
release_date: 2023-08-22
|
|
838
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, IDEFICS_INSTRUCT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
839
|
+
|
|
840
|
+
## Text-to-Image Diffusion Models
|
|
841
|
+
- name: huggingface/dreamlike-diffusion-v1-0
|
|
842
|
+
display_name: Dreamlike Diffusion v1.0 (1B)
|
|
843
|
+
description: Dreamlike Diffusion v1.0 is Stable Diffusion v1.5 fine tuned on high quality art ([HuggingFace model card](https://huggingface.co/dreamlike-art/dreamlike-diffusion-1.0))
|
|
844
|
+
creator_organization_name: dreamlike.art
|
|
845
|
+
access: open
|
|
846
|
+
num_parameters: 1000000000
|
|
847
|
+
release_date: 2023-03-08
|
|
848
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
849
|
+
|
|
850
|
+
- name: huggingface/dreamlike-photoreal-v2-0
|
|
851
|
+
display_name: Dreamlike Photoreal v2.0 (1B)
|
|
852
|
+
description: Dreamlike Photoreal v2.0 is a photorealistic model based on Stable Diffusion v1.5 ([HuggingFace model card](https://huggingface.co/dreamlike-art/dreamlike-photoreal-2.0))
|
|
853
|
+
creator_organization_name: dreamlike.art
|
|
854
|
+
access: open
|
|
855
|
+
num_parameters: 1000000000
|
|
856
|
+
release_date: 2022-11-23
|
|
857
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
858
|
+
|
|
859
|
+
- name: huggingface/openjourney-v1-0
|
|
860
|
+
display_name: Openjourney (1B)
|
|
861
|
+
description: Openjourney is an open source Stable Diffusion fine tuned model on Midjourney images ([HuggingFace model card](https://huggingface.co/prompthero/openjourney))
|
|
862
|
+
creator_organization_name: PromptHero
|
|
863
|
+
access: open
|
|
864
|
+
num_parameters: 1000000000
|
|
865
|
+
release_date: 2022-11-01 # TODO: get the exact date
|
|
866
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
867
|
+
|
|
868
|
+
- name: huggingface/openjourney-v2-0
|
|
869
|
+
display_name: Openjourney v2 (1B)
|
|
870
|
+
description: Openjourney v2 is an open source Stable Diffusion fine tuned model on Midjourney images. Openjourney v2 is now referred to as Openjourney v4 in Hugging Face ([HuggingFace model card](https://huggingface.co/prompthero/openjourney-v4)).
|
|
871
|
+
creator_organization_name: PromptHero
|
|
872
|
+
access: open
|
|
873
|
+
num_parameters: 1000000000
|
|
874
|
+
release_date: 2023-01-01 # TODO: get the exact date
|
|
875
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
876
|
+
|
|
877
|
+
- name: huggingface/promptist-stable-diffusion-v1-4
|
|
878
|
+
display_name: Promptist + Stable Diffusion v1.4 (1B)
|
|
879
|
+
description: Trained with human preferences, Promptist optimizes user input into model-preferred prompts for Stable Diffusion v1.4 ([paper](https://arxiv.org/abs/2212.09611))
|
|
880
|
+
creator_organization_name: Microsoft
|
|
881
|
+
access: open
|
|
882
|
+
num_parameters: 1000000000
|
|
883
|
+
release_date: 2022-12-19
|
|
884
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
885
|
+
|
|
886
|
+
- name: huggingface/redshift-diffusion
|
|
887
|
+
display_name: Redshift Diffusion (1B)
|
|
888
|
+
description: Redshift Diffusion is an open source Stable Diffusion model fine tuned on high resolution 3D artworks ([HuggingFace model card](https://huggingface.co/nitrosocke/redshift-diffusion))
|
|
889
|
+
creator_organization_name: nitrosocke
|
|
890
|
+
access: open
|
|
891
|
+
num_parameters: 1000000000
|
|
892
|
+
release_date: 2022-11-29
|
|
893
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
894
|
+
|
|
895
|
+
- name: huggingface/stable-diffusion-safe-weak
|
|
896
|
+
display_name: Safe Stable Diffusion weak (1B)
|
|
897
|
+
description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105)).
|
|
898
|
+
creator_organization_name: TU Darmstadt
|
|
899
|
+
access: open
|
|
900
|
+
num_parameters: 1000000000
|
|
901
|
+
release_date: 2022-11-09
|
|
902
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
903
|
+
|
|
904
|
+
- name: huggingface/stable-diffusion-safe-medium
|
|
905
|
+
display_name: Safe Stable Diffusion medium (1B)
|
|
906
|
+
description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
|
|
907
|
+
creator_organization_name: TU Darmstadt
|
|
908
|
+
access: open
|
|
909
|
+
num_parameters: 1000000000
|
|
910
|
+
release_date: 2022-11-09
|
|
911
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
912
|
+
|
|
913
|
+
- name: huggingface/stable-diffusion-safe-strong
|
|
914
|
+
display_name: Safe Stable Diffusion strong (1B)
|
|
915
|
+
description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
|
|
916
|
+
creator_organization_name: TU Darmstadt
|
|
917
|
+
access: open
|
|
918
|
+
num_parameters: 1000000000
|
|
919
|
+
release_date: 2022-11-09
|
|
920
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
921
|
+
|
|
922
|
+
- name: huggingface/stable-diffusion-safe-max
|
|
923
|
+
display_name: Safe Stable Diffusion max (1B)
|
|
924
|
+
description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
|
|
925
|
+
creator_organization_name: TU Darmstadt
|
|
926
|
+
access: open
|
|
927
|
+
num_parameters: 1000000000
|
|
928
|
+
release_date: 2022-11-09
|
|
929
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
930
|
+
|
|
931
|
+
- name: huggingface/stable-diffusion-v1-4
|
|
932
|
+
display_name: Stable Diffusion v1.4 (1B)
|
|
933
|
+
description: Stable Diffusion v1.4 is a latent text-to-image diffusion model capable of generating photorealistic images given any text input ([paper](https://arxiv.org/abs/2112.10752))
|
|
934
|
+
creator_organization_name: Ludwig Maximilian University of Munich CompVis
|
|
935
|
+
access: open
|
|
936
|
+
num_parameters: 1000000000
|
|
937
|
+
release_date: 2022-08-01
|
|
938
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
939
|
+
|
|
940
|
+
- name: huggingface/stable-diffusion-v1-5
|
|
941
|
+
display_name: Stable Diffusion v1.5 (1B)
|
|
942
|
+
description: The Stable-Diffusion-v1-5 checkpoint was initialized with the weights of the Stable-Diffusion-v1-2 checkpoint and subsequently fine-tuned on 595k steps at resolution 512x512 on laion-aesthetics v2 5+ and 10% dropping of the text-conditioning to improve classifier-free guidance sampling ([paper](https://arxiv.org/abs/2112.10752))
|
|
943
|
+
creator_organization_name: Runway
|
|
944
|
+
access: open
|
|
945
|
+
num_parameters: 1000000000
|
|
946
|
+
release_date: 2022-10-20
|
|
947
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
948
|
+
|
|
949
|
+
- name: huggingface/stable-diffusion-v2-base
|
|
950
|
+
display_name: Stable Diffusion v2 base (1B)
|
|
951
|
+
description: The model is trained from scratch 550k steps at resolution 256x256 on a subset of LAION-5B filtered for explicit pornographic material, using the LAION-NSFW classifier with punsafe=0.1 and an aesthetic score greater than 4.5. Then it is further trained for 850k steps at resolution 512x512 on the same dataset on images with resolution greater than 512x512 ([paper](https://arxiv.org/abs/2112.10752))
|
|
952
|
+
creator_organization_name: Stability AI
|
|
953
|
+
access: open
|
|
954
|
+
num_parameters: 1000000000
|
|
955
|
+
release_date: 2022-11-23
|
|
956
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
957
|
+
|
|
958
|
+
- name: huggingface/stable-diffusion-v2-1-base
|
|
959
|
+
display_name: Stable Diffusion v2.1 base (1B)
|
|
960
|
+
description: This stable-diffusion-2-1-base model fine-tunes stable-diffusion-2-base with 220k extra steps taken, with punsafe=0.98 on the same dataset ([paper](https://arxiv.org/abs/2112.10752))
|
|
961
|
+
creator_organization_name: Stability AI
|
|
962
|
+
access: open
|
|
963
|
+
num_parameters: 1000000000
|
|
964
|
+
release_date: 2022-11-23
|
|
965
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
966
|
+
|
|
967
|
+
- name: huggingface/vintedois-diffusion-v0-1
|
|
968
|
+
display_name: Vintedois (22h) Diffusion model v0.1 (1B)
|
|
969
|
+
description: Vintedois (22h) Diffusion model v0.1 is Stable Diffusion v1.5 that was finetuned on a large amount of high quality images with simple prompts to generate beautiful images without a lot of prompt engineering ([HuggingFace model card](https://huggingface.co/22h/vintedois-diffusion-v0-1))
|
|
970
|
+
creator_organization_name: 22 Hours
|
|
971
|
+
access: open
|
|
972
|
+
num_parameters: 1000000000
|
|
973
|
+
release_date: 2022-12-27
|
|
974
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
975
|
+
|
|
976
|
+
- name: segmind/Segmind-Vega
|
|
977
|
+
display_name: Segmind Stable Diffusion (0.74B)
|
|
978
|
+
description: The Segmind-Vega Model is a distilled version of the Stable Diffusion XL (SDXL), offering a remarkable 70% reduction in size and an impressive 100% speedup while retaining high-quality text-to-image generation capabilities. Trained on diverse datasets, including Grit and Midjourney scrape data, it excels at creating a wide range of visual content based on textual prompts. ([HuggingFace model card](https://huggingface.co/segmind/Segmind-Vega))
|
|
979
|
+
creator_organization_name: Segmind
|
|
980
|
+
access: open
|
|
981
|
+
num_parameters: 740000000
|
|
982
|
+
release_date: 2023-12-01
|
|
983
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
984
|
+
|
|
985
|
+
- name: segmind/SSD-1B
|
|
986
|
+
display_name: Segmind Stable Diffusion (1B)
|
|
987
|
+
description: The Segmind Stable Diffusion Model (SSD-1B) is a distilled 50% smaller version of the Stable Diffusion XL (SDXL), offering a 60% speedup while maintaining high-quality text-to-image generation capabilities. It has been trained on diverse datasets, including Grit and Midjourney scrape data, to enhance its ability to create a wide range of visual content based on textual prompts. ([HuggingFace model card](https://huggingface.co/segmind/SSD-1B))
|
|
988
|
+
creator_organization_name: Segmind
|
|
989
|
+
access: open
|
|
990
|
+
num_parameters: 1000000000
|
|
991
|
+
release_date: 2023-10-20
|
|
992
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
993
|
+
|
|
994
|
+
- name: stabilityai/stable-diffusion-xl-base-1.0
|
|
995
|
+
display_name: Stable Diffusion XL
|
|
996
|
+
description: Stable Diffusion XL (SDXL) consists of an ensemble of experts pipeline for latent diffusion. ([HuggingFace model card](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0))
|
|
997
|
+
creator_organization_name: Stability AI
|
|
998
|
+
access: open
|
|
999
|
+
num_parameters: 6600000000
|
|
1000
|
+
release_date: 2023-07-26
|
|
1001
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1002
|
+
|
|
1003
|
+
# Kakao
|
|
1004
|
+
- name: kakaobrain/mindall-e
|
|
1005
|
+
display_name: minDALL-E (1.3B)
|
|
1006
|
+
description: minDALL-E, named after minGPT, is an autoregressive text-to-image generation model trained on 14 million image-text pairs ([code](https://github.com/kakaobrain/minDALL-E))
|
|
1007
|
+
creator_organization_name: Kakao
|
|
1008
|
+
access: open
|
|
1009
|
+
num_parameters: 1300000000
|
|
1010
|
+
release_date: 2021-12-13
|
|
1011
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1012
|
+
|
|
1013
|
+
# Lexica
|
|
1014
|
+
- name: lexica/search-stable-diffusion-1.5
|
|
1015
|
+
display_name: Lexica Search with Stable Diffusion v1.5 (1B)
|
|
1016
|
+
description: Retrieves Stable Diffusion v1.5 images Lexica users generated ([docs](https://lexica.art/docs)).
|
|
1017
|
+
creator_organization_name: Lexica
|
|
1018
|
+
access: open
|
|
1019
|
+
release_date: 2023-01-01
|
|
1020
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1021
|
+
|
|
1022
|
+
|
|
1023
|
+
# Lightning AI
|
|
1024
|
+
- name: lightningai/lit-gpt
|
|
1025
|
+
display_name: Lit-GPT
|
|
1026
|
+
description: Lit-GPT is an optimized collection of open-source LLMs for finetuning and inference. It supports – Falcon, Llama 2, Vicuna, LongChat, and other top-performing open-source large language models.
|
|
1027
|
+
creator_organization_name: Lightning AI
|
|
1028
|
+
access: open
|
|
1029
|
+
release_date: 2023-04-04
|
|
1030
|
+
tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1031
|
+
|
|
1032
|
+
|
|
1033
|
+
|
|
1034
|
+
# LMSYS
|
|
1035
|
+
- name: lmsys/vicuna-7b-v1.3
|
|
1036
|
+
display_name: Vicuna v1.3 (7B)
|
|
1037
|
+
description: Vicuna v1.3 (7B) is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT.
|
|
1038
|
+
creator_organization_name: LMSYS
|
|
1039
|
+
access: open
|
|
1040
|
+
num_parameters: 7000000000
|
|
1041
|
+
release_date: 2023-06-22
|
|
1042
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1043
|
+
|
|
1044
|
+
- name: lmsys/vicuna-13b-v1.3
|
|
1045
|
+
display_name: Vicuna v1.3 (13B)
|
|
1046
|
+
description: Vicuna v1.3 (13B) is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT.
|
|
1047
|
+
creator_organization_name: LMSYS
|
|
1048
|
+
access: open
|
|
1049
|
+
num_parameters: 13000000000
|
|
1050
|
+
release_date: 2023-06-22
|
|
1051
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1052
|
+
|
|
1053
|
+
|
|
1054
|
+
|
|
1055
|
+
# Meta
|
|
1056
|
+
- name: meta/opt-iml-175b # NOT SUPPORTED
|
|
1057
|
+
display_name: OPT-IML (175B)
|
|
1058
|
+
description: OPT-IML (175B parameters) is a suite of decoder-only transformer LMs that are multi-task fine-tuned on 2000 datasets ([paper](https://arxiv.org/pdf/2212.12017.pdf)).
|
|
1059
|
+
creator_organization_name: Meta
|
|
1060
|
+
access: open
|
|
1061
|
+
num_parameters: 175000000000
|
|
1062
|
+
release_date: 2022-12-22
|
|
1063
|
+
tags: [] # TODO: add tags
|
|
1064
|
+
|
|
1065
|
+
- name: meta/opt-iml-30b # NOT SUPPORTED
|
|
1066
|
+
display_name: OPT-IML (30B)
|
|
1067
|
+
description: OPT-IML (30B parameters) is a suite of decoder-only transformer LMs that are multi-task fine-tuned on 2000 datasets ([paper](https://arxiv.org/pdf/2212.12017.pdf)).
|
|
1068
|
+
creator_organization_name: Meta
|
|
1069
|
+
access: open
|
|
1070
|
+
num_parameters: 30000000000
|
|
1071
|
+
release_date: 2022-12-22
|
|
1072
|
+
tags: [] # TODO: add tags
|
|
1073
|
+
|
|
1074
|
+
- name: meta/opt-175b
|
|
1075
|
+
display_name: OPT (175B)
|
|
1076
|
+
description: Open Pre-trained Transformers (175B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
|
|
1077
|
+
creator_organization_name: Meta
|
|
1078
|
+
access: open
|
|
1079
|
+
num_parameters: 175000000000
|
|
1080
|
+
release_date: 2022-05-02
|
|
1081
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
|
|
1082
|
+
|
|
1083
|
+
- name: meta/opt-66b
|
|
1084
|
+
display_name: OPT (66B)
|
|
1085
|
+
description: Open Pre-trained Transformers (66B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
|
|
1086
|
+
creator_organization_name: Meta
|
|
1087
|
+
access: open
|
|
1088
|
+
num_parameters: 66000000000
|
|
1089
|
+
release_date: 2022-05-02
|
|
1090
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
|
|
1091
|
+
|
|
1092
|
+
- name: meta/opt-6.7b
|
|
1093
|
+
display_name: OPT (6.7B)
|
|
1094
|
+
description: Open Pre-trained Transformers (6.7B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
|
|
1095
|
+
creator_organization_name: Meta
|
|
1096
|
+
access: open
|
|
1097
|
+
num_parameters: 6700000000
|
|
1098
|
+
release_date: 2022-05-02
|
|
1099
|
+
# TODO: The BUGGY_TEMP_0_TAG is a deployment related tag (Together).
|
|
1100
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, BUGGY_TEMP_0_TAG]
|
|
1101
|
+
|
|
1102
|
+
- name: meta/opt-1.3b
|
|
1103
|
+
display_name: OPT (1.3B)
|
|
1104
|
+
description: Open Pre-trained Transformers (1.3B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
|
|
1105
|
+
creator_organization_name: Meta
|
|
1106
|
+
access: open
|
|
1107
|
+
num_parameters: 1300000000
|
|
1108
|
+
release_date: 2022-05-02
|
|
1109
|
+
# TODO: The BUGGY_TEMP_0_TAG is a deployment related tag (Together).
|
|
1110
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, BUGGY_TEMP_0_TAG]
|
|
1111
|
+
|
|
1112
|
+
- name: meta/galactica-120b # NOT SUPPORTED
|
|
1113
|
+
display_name: Galactica (120B)
|
|
1114
|
+
description: Galactica (120B parameters) is trained on 48 million papers, textbooks, lectures notes, compounds and proteins, scientific websites, etc. ([paper](https://galactica.org/static/paper.pdf)).
|
|
1115
|
+
creator_organization_name: Meta
|
|
1116
|
+
access: open
|
|
1117
|
+
num_parameters: 120000000000
|
|
1118
|
+
release_date: 2022-11-15
|
|
1119
|
+
tags: [] # TODO: add tags
|
|
1120
|
+
|
|
1121
|
+
- name: meta/galactica-30b # NOT SUPPORTED
|
|
1122
|
+
display_name: Galactica (30B)
|
|
1123
|
+
description: Galactica (30B parameters) is trained on 48 million papers, textbooks, lectures notes, compounds and proteins, scientific websites, etc. ([paper](https://galactica.org/static/paper.pdf)).
|
|
1124
|
+
creator_organization_name: Meta
|
|
1125
|
+
access: open
|
|
1126
|
+
num_parameters: 30000000000
|
|
1127
|
+
release_date: 2022-11-15
|
|
1128
|
+
tags: [] # TODO: add tags
|
|
1129
|
+
|
|
1130
|
+
- name: meta/llama-7b
|
|
1131
|
+
display_name: LLaMA (7B)
|
|
1132
|
+
description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
|
|
1133
|
+
creator_organization_name: Meta
|
|
1134
|
+
access: open
|
|
1135
|
+
num_parameters: 7000000000
|
|
1136
|
+
release_date: 2023-02-24
|
|
1137
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1138
|
+
|
|
1139
|
+
- name: meta/llama-13b
|
|
1140
|
+
display_name: LLaMA (13B)
|
|
1141
|
+
description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
|
|
1142
|
+
creator_organization_name: Meta
|
|
1143
|
+
access: open
|
|
1144
|
+
num_parameters: 13000000000
|
|
1145
|
+
release_date: 2023-02-24
|
|
1146
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1147
|
+
|
|
1148
|
+
- name: meta/llama-30b
|
|
1149
|
+
display_name: LLaMA (30B)
|
|
1150
|
+
description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
|
|
1151
|
+
creator_organization_name: Meta
|
|
1152
|
+
access: open
|
|
1153
|
+
num_parameters: 30000000000
|
|
1154
|
+
release_date: 2023-02-24
|
|
1155
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1156
|
+
|
|
1157
|
+
- name: meta/llama-65b
|
|
1158
|
+
display_name: LLaMA (65B)
|
|
1159
|
+
description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
|
|
1160
|
+
creator_organization_name: Meta
|
|
1161
|
+
access: open
|
|
1162
|
+
num_parameters: 65000000000
|
|
1163
|
+
release_date: 2023-02-24
|
|
1164
|
+
# TODO(#1828): Upgrade to FULL_FUNCTIONALITY_TEXT_MODEL_TAG
|
|
1165
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1166
|
+
|
|
1167
|
+
- name: meta/llama-2-7b
|
|
1168
|
+
display_name: Llama 2 (7B)
|
|
1169
|
+
description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1.
|
|
1170
|
+
creator_organization_name: Meta
|
|
1171
|
+
access: open
|
|
1172
|
+
num_parameters: 7000000000
|
|
1173
|
+
release_date: 2023-07-18
|
|
1174
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1175
|
+
|
|
1176
|
+
- name: meta/llama-2-13b
|
|
1177
|
+
display_name: Llama 2 (13B)
|
|
1178
|
+
description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1.
|
|
1179
|
+
creator_organization_name: Meta
|
|
1180
|
+
access: open
|
|
1181
|
+
num_parameters: 13000000000
|
|
1182
|
+
release_date: 2023-07-18
|
|
1183
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1184
|
+
|
|
1185
|
+
- name: meta/llama-2-70b
|
|
1186
|
+
display_name: Llama 2 (70B)
|
|
1187
|
+
description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1.
|
|
1188
|
+
creator_organization_name: Meta
|
|
1189
|
+
access: open
|
|
1190
|
+
num_parameters: 70000000000
|
|
1191
|
+
release_date: 2023-07-18
|
|
1192
|
+
# TODO(#1828): Upgrade to FULL_FUNCTIONALITY_TEXT_MODEL_TAG
|
|
1193
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1194
|
+
|
|
1195
|
+
- name: meta/llama-3-8b
|
|
1196
|
+
display_name: Llama 3 (8B)
|
|
1197
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability.
|
|
1198
|
+
creator_organization_name: Meta
|
|
1199
|
+
access: open
|
|
1200
|
+
num_parameters: 8000000000
|
|
1201
|
+
release_date: 2024-04-18
|
|
1202
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1203
|
+
|
|
1204
|
+
- name: meta/llama-3-70b
|
|
1205
|
+
display_name: Llama 3 (70B)
|
|
1206
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability.
|
|
1207
|
+
creator_organization_name: Meta
|
|
1208
|
+
access: open
|
|
1209
|
+
num_parameters: 70000000000
|
|
1210
|
+
release_date: 2024-04-18
|
|
1211
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1212
|
+
|
|
1213
|
+
|
|
1214
|
+
# Microsoft/NVIDIA
|
|
1215
|
+
- name: microsoft/TNLGv2_530B
|
|
1216
|
+
display_name: TNLG v2 (530B)
|
|
1217
|
+
description: TNLG v2 (530B parameters) autoregressive language model trained on a filtered subset of the Pile and CommonCrawl ([paper](https://arxiv.org/pdf/2201.11990.pdf)).
|
|
1218
|
+
creator_organization_name: Microsoft/NVIDIA
|
|
1219
|
+
access: closed
|
|
1220
|
+
num_parameters: 530000000000
|
|
1221
|
+
release_date: 2022-01-28
|
|
1222
|
+
tags: [] # deprecated text model
|
|
1223
|
+
|
|
1224
|
+
- name: microsoft/TNLGv2_7B
|
|
1225
|
+
display_name: TNLG v2 (6.7B)
|
|
1226
|
+
description: TNLG v2 (6.7B parameters) autoregressive language model trained on a filtered subset of the Pile and CommonCrawl ([paper](https://arxiv.org/pdf/2201.11990.pdf)).
|
|
1227
|
+
creator_organization_name: Microsoft/NVIDIA
|
|
1228
|
+
access: closed
|
|
1229
|
+
num_parameters: 6700000000
|
|
1230
|
+
release_date: 2022-01-28
|
|
1231
|
+
tags: [] # deprecated text model
|
|
1232
|
+
|
|
1233
|
+
- name: microsoft/llava-1.5-7b-hf
|
|
1234
|
+
display_name: LLaVA 1.5 (7B)
|
|
1235
|
+
description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
|
|
1236
|
+
creator_organization_name: Microsoft
|
|
1237
|
+
access: open
|
|
1238
|
+
num_parameters: 7000000000
|
|
1239
|
+
release_date: 2023-10-05
|
|
1240
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1241
|
+
|
|
1242
|
+
- name: microsoft/llava-1.5-13b-hf
|
|
1243
|
+
display_name: LLaVA 1.5 (13B)
|
|
1244
|
+
description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
|
|
1245
|
+
creator_organization_name: Microsoft
|
|
1246
|
+
access: open
|
|
1247
|
+
num_parameters: 13000000000
|
|
1248
|
+
release_date: 2023-10-05
|
|
1249
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1250
|
+
|
|
1251
|
+
|
|
1252
|
+
- name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
|
|
1253
|
+
display_name: OpenFlamingo (9B)
|
|
1254
|
+
description: OpenFlamingo is an open source implementation of DeepMind's Flamingo models. This 9B-parameter model uses a CLIP ViT-L/14 vision encoder and MPT-7B language model. ([paper](https://arxiv.org/abs/2308.01390))
|
|
1255
|
+
creator_organization_name: OpenFlamingo
|
|
1256
|
+
access: open
|
|
1257
|
+
num_parameters: 9000000000
|
|
1258
|
+
release_date: 2023-08-02
|
|
1259
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, OPEN_FLAMINGO_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1260
|
+
|
|
1261
|
+
- name: microsoft/phi-2
|
|
1262
|
+
display_name: Phi-2
|
|
1263
|
+
description: Phi-2 is a Transformer with 2.7 billion parameters. It was trained using the same data sources as Phi-1.5, augmented with a new data source that consists of various NLP synthetic texts and filtered websites (for safety and educational value)
|
|
1264
|
+
creator_organization_name: Microsoft
|
|
1265
|
+
access: open
|
|
1266
|
+
num_parameters: 13000000000
|
|
1267
|
+
release_date: 2023-10-05
|
|
1268
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1269
|
+
|
|
1270
|
+
|
|
1271
|
+
|
|
1272
|
+
# 01.AI
|
|
1273
|
+
- name: 01-ai/yi-6b
|
|
1274
|
+
display_name: Yi (6B)
|
|
1275
|
+
description: The Yi models are large language models trained from scratch by developers at 01.AI.
|
|
1276
|
+
creator_organization_name: 01.AI
|
|
1277
|
+
access: open
|
|
1278
|
+
num_parameters: 6000000000
|
|
1279
|
+
release_date: 2023-11-02
|
|
1280
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1281
|
+
- name: 01-ai/yi-34b
|
|
1282
|
+
display_name: Yi (34B)
|
|
1283
|
+
description: The Yi models are large language models trained from scratch by developers at 01.AI.
|
|
1284
|
+
creator_organization_name: 01.AI
|
|
1285
|
+
access: open
|
|
1286
|
+
num_parameters: 34000000000
|
|
1287
|
+
release_date: 2023-11-02
|
|
1288
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1289
|
+
|
|
1290
|
+
|
|
1291
|
+
# Allen Institute for AI
|
|
1292
|
+
# OLMo Blog: https://blog.allenai.org/olmo-open-language-model-87ccfc95f580
|
|
1293
|
+
- name: allenai/olmo-7b
|
|
1294
|
+
display_name: OLMo (7B)
|
|
1295
|
+
description: OLMo is a series of Open Language Models trained on the Dolma dataset.
|
|
1296
|
+
creator_organization_name: Allen Institute for AI
|
|
1297
|
+
access: open
|
|
1298
|
+
num_parameters: 7000000000
|
|
1299
|
+
release_date: 2024-02-01
|
|
1300
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1301
|
+
|
|
1302
|
+
- name: allenai/olmo-7b-twin-2t
|
|
1303
|
+
display_name: OLMo (7B Twin 2T)
|
|
1304
|
+
description: OLMo is a series of Open Language Models trained on the Dolma dataset.
|
|
1305
|
+
creator_organization_name: Allen Institute for AI
|
|
1306
|
+
access: open
|
|
1307
|
+
num_parameters: 7000000000
|
|
1308
|
+
release_date: 2024-02-01
|
|
1309
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1310
|
+
|
|
1311
|
+
- name: allenai/olmo-7b-instruct
|
|
1312
|
+
display_name: OLMo (7B Instruct)
|
|
1313
|
+
description: OLMo is a series of Open Language Models trained on the Dolma dataset. The instruct versions was trained on the Tulu SFT mixture and a cleaned version of the UltraFeedback dataset.
|
|
1314
|
+
creator_organization_name: Allen Institute for AI
|
|
1315
|
+
access: open
|
|
1316
|
+
num_parameters: 7000000000
|
|
1317
|
+
release_date: 2024-02-01
|
|
1318
|
+
# TODO: Add instruct tag.
|
|
1319
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1320
|
+
|
|
1321
|
+
|
|
1322
|
+
# Mistral AI
|
|
1323
|
+
- name: mistralai/mistral-7b-v0.1
|
|
1324
|
+
display_name: Mistral v0.1 (7B)
|
|
1325
|
+
description: Mistral 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA).
|
|
1326
|
+
creator_organization_name: Mistral AI
|
|
1327
|
+
access: open
|
|
1328
|
+
num_parameters: 7300000000
|
|
1329
|
+
release_date: 2023-09-27
|
|
1330
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1331
|
+
|
|
1332
|
+
- name: mistralai/mixtral-8x7b-32kseqlen
|
|
1333
|
+
display_name: Mixtral (8x7B 32K seqlen)
|
|
1334
|
+
description: Mistral AI's mixture-of-experts model ([tweet](https://twitter.com/MistralAI/status/1733150512395038967)).
|
|
1335
|
+
creator_organization_name: Mistral AI
|
|
1336
|
+
access: open
|
|
1337
|
+
num_parameters: 46700000000
|
|
1338
|
+
release_date: 2023-12-08
|
|
1339
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1340
|
+
|
|
1341
|
+
- name: mistralai/mixtral-8x7b-instruct-v0.1
|
|
1342
|
+
display_name: Mixtral (8x7B Instruct)
|
|
1343
|
+
description: Mixtral (8x7B Instruct) is a version of Mixtral (8x7B) that was optimized through supervised fine-tuning and direct preference optimisation (DPO) for careful instruction following.
|
|
1344
|
+
creator_organization_name: Mistral AI
|
|
1345
|
+
access: open
|
|
1346
|
+
num_parameters: 46700000000
|
|
1347
|
+
# Blog post: https://mistral.ai/news/mixtral-of-experts/
|
|
1348
|
+
release_date: 2023-12-11
|
|
1349
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
|
|
1350
|
+
|
|
1351
|
+
- name: mistralai/mixtral-8x22b
|
|
1352
|
+
display_name: Mixtral (8x22B)
|
|
1353
|
+
description: Mistral AI's mixture-of-experts model ([tweet](https://twitter.com/MistralAI/status/1777869263778291896)).
|
|
1354
|
+
creator_organization_name: Mistral AI
|
|
1355
|
+
access: open
|
|
1356
|
+
num_parameters: 176000000000
|
|
1357
|
+
release_date: 2024-04-10
|
|
1358
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1359
|
+
|
|
1360
|
+
- name: mistralai/bakLlava-v1-hf
|
|
1361
|
+
display_name: BakLLaVA v1 (7B)
|
|
1362
|
+
description: BakLLaVA v1 is a Mistral 7B base augmented with the LLaVA 1.5 architecture. ([blog](https://huggingface.co/llava-hf/bakLlava-v1-hf))
|
|
1363
|
+
creator_organization_name: Mistral AI
|
|
1364
|
+
access: open
|
|
1365
|
+
num_parameters: 7000000000
|
|
1366
|
+
release_date: 2023-10-16
|
|
1367
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1368
|
+
|
|
1369
|
+
- name: mistralai/mistral-small-2402
|
|
1370
|
+
display_name: Mistral Small (2402)
|
|
1371
|
+
# TODO: Fill in description
|
|
1372
|
+
description: TBD
|
|
1373
|
+
creator_organization_name: Mistral AI
|
|
1374
|
+
access: limited
|
|
1375
|
+
# Blog post: https://mistral.ai/news/mistral-large/
|
|
1376
|
+
release_date: 2023-02-26
|
|
1377
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
|
|
1378
|
+
|
|
1379
|
+
- name: mistralai/mistral-medium-2312
|
|
1380
|
+
display_name: Mistral Medium (2312)
|
|
1381
|
+
description: Mistral is a transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA).
|
|
1382
|
+
creator_organization_name: Mistral AI
|
|
1383
|
+
access: limited
|
|
1384
|
+
release_date: 2023-12-11
|
|
1385
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
|
|
1386
|
+
|
|
1387
|
+
- name: mistralai/mistral-large-2402
|
|
1388
|
+
display_name: Mistral Large (2402)
|
|
1389
|
+
# TODO: Fill in description
|
|
1390
|
+
description: TBD
|
|
1391
|
+
creator_organization_name: Mistral AI
|
|
1392
|
+
access: limited
|
|
1393
|
+
# Blog post: https://mistral.ai/news/mistral-large/
|
|
1394
|
+
release_date: 2023-02-26
|
|
1395
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
|
|
1396
|
+
|
|
1397
|
+
|
|
1398
|
+
# MosaicML
|
|
1399
|
+
- name: mosaicml/mpt-7b
|
|
1400
|
+
display_name: MPT (7B)
|
|
1401
|
+
description: MPT (7B) is a Transformer trained from scratch on 1T tokens of text and code.
|
|
1402
|
+
creator_organization_name: MosaicML
|
|
1403
|
+
access: open
|
|
1404
|
+
num_parameters: 6700000000
|
|
1405
|
+
release_date: 2023-05-05
|
|
1406
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1407
|
+
|
|
1408
|
+
- name: mosaicml/mpt-7b-chat # NOT SUPPORTED
|
|
1409
|
+
display_name: MPT-Chat (7B)
|
|
1410
|
+
description: MPT-Chat (7B) is a chatbot-like model for dialogue generation. It is built by finetuning MPT (30B) , a Transformer trained from scratch on 1T tokens of text and code.
|
|
1411
|
+
creator_organization_name: MosaicML
|
|
1412
|
+
access: open
|
|
1413
|
+
num_parameters: 6700000000
|
|
1414
|
+
release_date: 2023-05-05
|
|
1415
|
+
tags: [] # TODO: add tags
|
|
1416
|
+
|
|
1417
|
+
- name: mosaicml/mpt-instruct-7b
|
|
1418
|
+
display_name: MPT-Instruct (7B)
|
|
1419
|
+
description: MPT-Instruct (7B) is a model for short-form instruction following. It is built by finetuning MPT (30B), a Transformer trained from scratch on 1T tokens of text and code.
|
|
1420
|
+
creator_organization_name: MosaicML
|
|
1421
|
+
access: open
|
|
1422
|
+
num_parameters: 6700000000
|
|
1423
|
+
release_date: 2023-05-05
|
|
1424
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1425
|
+
|
|
1426
|
+
- name: mosaicml/mpt-30b
|
|
1427
|
+
display_name: MPT (30B)
|
|
1428
|
+
description: MPT (30B) is a Transformer trained from scratch on 1T tokens of text and code.
|
|
1429
|
+
creator_organization_name: MosaicML
|
|
1430
|
+
access: open
|
|
1431
|
+
num_parameters: 30000000000
|
|
1432
|
+
release_date: 2023-06-22
|
|
1433
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1434
|
+
|
|
1435
|
+
- name: mosaicml/mpt-30b-chat # NOT SUPPORTED
|
|
1436
|
+
display_name: MPT-Chat (30B)
|
|
1437
|
+
description: MPT-Chat (30B) is a chatbot-like model for dialogue generation. It is built by finetuning MPT (30B), a Transformer trained from scratch on 1T tokens of text and code.
|
|
1438
|
+
creator_organization_name: MosaicML
|
|
1439
|
+
access: open
|
|
1440
|
+
num_parameters: 30000000000
|
|
1441
|
+
release_date: 2023-06-22
|
|
1442
|
+
tags: [] # TODO: add tags
|
|
1443
|
+
|
|
1444
|
+
- name: mosaicml/mpt-instruct-30b
|
|
1445
|
+
display_name: MPT-Instruct (30B)
|
|
1446
|
+
description: MPT-Instruct (30B) is a model for short-form instruction following. It is built by finetuning MPT (30B), a Transformer trained from scratch on 1T tokens of text and code.
|
|
1447
|
+
creator_organization_name: MosaicML
|
|
1448
|
+
access: open
|
|
1449
|
+
num_parameters: 30000000000
|
|
1450
|
+
release_date: 2023-06-22
|
|
1451
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1452
|
+
|
|
1453
|
+
|
|
1454
|
+
|
|
1455
|
+
# Neurips
|
|
1456
|
+
- name: neurips/local
|
|
1457
|
+
display_name: Neurips Local
|
|
1458
|
+
description: Neurips Local
|
|
1459
|
+
creator_organization_name: Neurips
|
|
1460
|
+
access: open
|
|
1461
|
+
release_date: 2023-06-01
|
|
1462
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1463
|
+
|
|
1464
|
+
|
|
1465
|
+
|
|
1466
|
+
# NVIDIA
|
|
1467
|
+
- name: nvidia/megatron-gpt2
|
|
1468
|
+
display_name: Megatron GPT2
|
|
1469
|
+
description: GPT-2 implemented in Megatron-LM ([paper](https://arxiv.org/abs/1909.08053)).
|
|
1470
|
+
creator_organization_name: NVIDIA
|
|
1471
|
+
access: open
|
|
1472
|
+
release_date: 2019-09-17 # paper date
|
|
1473
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, BUGGY_TEMP_0_TAG]
|
|
1474
|
+
|
|
1475
|
+
|
|
1476
|
+
|
|
1477
|
+
# OpenAI
|
|
1478
|
+
|
|
1479
|
+
## GPT 2 Models
|
|
1480
|
+
# Not served by OpenAI, instead served by HuggingFace.
|
|
1481
|
+
|
|
1482
|
+
- name: openai/gpt2
|
|
1483
|
+
display_name: GPT-2 (1.5B)
|
|
1484
|
+
description: GPT-2 (1.5B parameters) is a transformer model trained on a large corpus of English text in a self-supervised fashion ([paper](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)).
|
|
1485
|
+
creator_organization_name: OpenAI
|
|
1486
|
+
access: open
|
|
1487
|
+
num_parameters: 1500000000
|
|
1488
|
+
release_date: 2019-02-14
|
|
1489
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1490
|
+
|
|
1491
|
+
|
|
1492
|
+
## GPT 3 Models
|
|
1493
|
+
# The list of models can be found here: https://beta.openai.com/docs/engines/gpt-3
|
|
1494
|
+
|
|
1495
|
+
- name: openai/davinci-002
|
|
1496
|
+
display_name: davinci-002
|
|
1497
|
+
description: Replacement for the GPT-3 curie and davinci base models.
|
|
1498
|
+
creator_organization_name: OpenAI
|
|
1499
|
+
access: limited
|
|
1500
|
+
release_date: 2023-08-22
|
|
1501
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1502
|
+
|
|
1503
|
+
- name: openai/babbage-002
|
|
1504
|
+
display_name: babbage-002
|
|
1505
|
+
description: Replacement for the GPT-3 ada and babbage base models.
|
|
1506
|
+
creator_organization_name: OpenAI
|
|
1507
|
+
access: limited
|
|
1508
|
+
release_date: 2023-08-22
|
|
1509
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1510
|
+
|
|
1511
|
+
# DEPRECATED: Announced on July 06 2023 that these models will be shut down on January 04 2024.
|
|
1512
|
+
|
|
1513
|
+
- name: openai/davinci # DEPRECATED
|
|
1514
|
+
display_name: davinci (175B)
|
|
1515
|
+
description: Original GPT-3 (175B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1516
|
+
creator_organization_name: OpenAI
|
|
1517
|
+
access: limited
|
|
1518
|
+
num_parameters: 175000000000
|
|
1519
|
+
release_date: 2020-05-28
|
|
1520
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1521
|
+
|
|
1522
|
+
- name: openai/curie # DEPRECATED
|
|
1523
|
+
display_name: curie (6.7B)
|
|
1524
|
+
description: Original GPT-3 (6.7B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1525
|
+
creator_organization_name: OpenAI
|
|
1526
|
+
access: limited
|
|
1527
|
+
num_parameters: 6700000000
|
|
1528
|
+
release_date: 2020-05-28
|
|
1529
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1530
|
+
|
|
1531
|
+
- name: openai/babbage # DEPRECATED
|
|
1532
|
+
display_name: babbage (1.3B)
|
|
1533
|
+
description: Original GPT-3 (1.3B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1534
|
+
creator_organization_name: OpenAI
|
|
1535
|
+
access: limited
|
|
1536
|
+
num_parameters: 1300000000
|
|
1537
|
+
release_date: 2020-05-28
|
|
1538
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1539
|
+
|
|
1540
|
+
- name: openai/ada # DEPRECATED
|
|
1541
|
+
display_name: ada (350M)
|
|
1542
|
+
description: Original GPT-3 (350M parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1543
|
+
creator_organization_name: OpenAI
|
|
1544
|
+
access: limited
|
|
1545
|
+
num_parameters: 350000000
|
|
1546
|
+
release_date: 2020-05-28
|
|
1547
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1548
|
+
|
|
1549
|
+
- name: openai/text-davinci-003 # DEPRECATED
|
|
1550
|
+
display_name: GPT-3.5 (text-davinci-003)
|
|
1551
|
+
description: text-davinci-003 model that involves reinforcement learning (PPO) with reward models. Derived from text-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1552
|
+
creator_organization_name: OpenAI
|
|
1553
|
+
access: limited
|
|
1554
|
+
num_parameters: 175000000000
|
|
1555
|
+
release_date: 2022-11-28
|
|
1556
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1557
|
+
|
|
1558
|
+
# TODO: text-davinci-002 supports insertion. Support insertion in our framework.
|
|
1559
|
+
# https://github.com/stanford-crfm/benchmarking/issues/359
|
|
1560
|
+
- name: openai/text-davinci-002 # DEPRECATED
|
|
1561
|
+
display_name: GPT-3.5 (text-davinci-002)
|
|
1562
|
+
description: text-davinci-002 model that involves supervised fine-tuning on human-written demonstrations. Derived from code-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1563
|
+
creator_organization_name: OpenAI
|
|
1564
|
+
access: limited
|
|
1565
|
+
num_parameters: 175000000000
|
|
1566
|
+
release_date: 2022-01-27
|
|
1567
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1568
|
+
|
|
1569
|
+
- name: openai/text-davinci-001 # DEPRECATED
|
|
1570
|
+
display_name: GPT-3.5 (text-davinci-001)
|
|
1571
|
+
description: text-davinci-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1572
|
+
creator_organization_name: OpenAI
|
|
1573
|
+
access: limited
|
|
1574
|
+
num_parameters: 175000000000
|
|
1575
|
+
release_date: 2022-01-27
|
|
1576
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1577
|
+
|
|
1578
|
+
- name: openai/text-curie-001 # DEPRECATED
|
|
1579
|
+
display_name: text-curie-001
|
|
1580
|
+
description: text-curie-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1581
|
+
creator_organization_name: OpenAI
|
|
1582
|
+
access: limited
|
|
1583
|
+
num_parameters: 6700000000
|
|
1584
|
+
release_date: 2022-01-27
|
|
1585
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1586
|
+
|
|
1587
|
+
- name: openai/text-babbage-001 # DEPRECATED
|
|
1588
|
+
display_name: text-babbage-001
|
|
1589
|
+
description: text-babbage-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1590
|
+
creator_organization_name: OpenAI
|
|
1591
|
+
access: limited
|
|
1592
|
+
num_parameters: 1300000000
|
|
1593
|
+
release_date: 2022-01-27
|
|
1594
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1595
|
+
|
|
1596
|
+
- name: openai/text-ada-001 # DEPRECATED
|
|
1597
|
+
display_name: text-ada-001
|
|
1598
|
+
description: text-ada-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
|
|
1599
|
+
creator_organization_name: OpenAI
|
|
1600
|
+
access: limited
|
|
1601
|
+
num_parameters: 350000000
|
|
1602
|
+
release_date: 2022-01-27
|
|
1603
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1604
|
+
|
|
1605
|
+
|
|
1606
|
+
## GPT 3.5 Turbo Models
|
|
1607
|
+
# ChatGPT: https://openai.com/blog/chatgpt
|
|
1608
|
+
|
|
1609
|
+
- name: openai/gpt-3.5-turbo-instruct
|
|
1610
|
+
display_name: GPT-3.5 Turbo Instruct
|
|
1611
|
+
description: Similar capabilities as GPT-3 era models. Compatible with legacy Completions endpoint and not Chat Completions.
|
|
1612
|
+
creator_organization_name: OpenAI
|
|
1613
|
+
access: limited
|
|
1614
|
+
release_date: 2023-09-18
|
|
1615
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1616
|
+
|
|
1617
|
+
- name: openai/gpt-3.5-turbo-0301
|
|
1618
|
+
display_name: GPT-3.5 Turbo (0301)
|
|
1619
|
+
description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-03-01.
|
|
1620
|
+
creator_organization_name: OpenAI
|
|
1621
|
+
access: limited
|
|
1622
|
+
release_date: 2023-03-01
|
|
1623
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1624
|
+
|
|
1625
|
+
- name: openai/gpt-3.5-turbo-0613
|
|
1626
|
+
display_name: GPT-3.5 Turbo (0613)
|
|
1627
|
+
description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13.
|
|
1628
|
+
creator_organization_name: OpenAI
|
|
1629
|
+
access: limited
|
|
1630
|
+
release_date: 2023-06-13
|
|
1631
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1632
|
+
|
|
1633
|
+
- name: openai/gpt-3.5-turbo-1106
|
|
1634
|
+
display_name: GPT-3.5 Turbo (1106)
|
|
1635
|
+
description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-11-06.
|
|
1636
|
+
creator_organization_name: OpenAI
|
|
1637
|
+
access: limited
|
|
1638
|
+
# Actual release blog post was published on 2024-01-25:
|
|
1639
|
+
# https://openai.com/blog/new-embedding-models-and-api-updates
|
|
1640
|
+
release_date: 2024-01-25
|
|
1641
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1642
|
+
|
|
1643
|
+
- name: openai/gpt-3.5-turbo-0125
|
|
1644
|
+
display_name: gpt-3.5-turbo-0125
|
|
1645
|
+
description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2024-01-25.
|
|
1646
|
+
creator_organization_name: OpenAI
|
|
1647
|
+
access: limited
|
|
1648
|
+
# Release blog post was published on 2024-01-25:
|
|
1649
|
+
# https://openai.com/blog/new-embedding-models-and-api-updates
|
|
1650
|
+
# The actual release date is unclear - it was described as "next week".
|
|
1651
|
+
release_date: 2023-06-13
|
|
1652
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1653
|
+
|
|
1654
|
+
- name: openai/gpt-3.5-turbo-16k-0613
|
|
1655
|
+
display_name: gpt-3.5-turbo-16k-0613
|
|
1656
|
+
description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13 with a longer context length of 16,384 tokens.
|
|
1657
|
+
creator_organization_name: OpenAI
|
|
1658
|
+
access: limited
|
|
1659
|
+
release_date: 2023-06-13
|
|
1660
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1661
|
+
|
|
1662
|
+
|
|
1663
|
+
## GPT 4 Models
|
|
1664
|
+
|
|
1665
|
+
- name: openai/gpt-4-1106-preview
|
|
1666
|
+
display_name: GPT-4 Turbo (1106 preview)
|
|
1667
|
+
description: GPT-4 Turbo (preview) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Preview snapshot from 2023-11-06.
|
|
1668
|
+
creator_organization_name: OpenAI
|
|
1669
|
+
access: limited
|
|
1670
|
+
release_date: 2023-11-06
|
|
1671
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1672
|
+
|
|
1673
|
+
- name: openai/gpt-4-0314
|
|
1674
|
+
display_name: GPT-4 (0314)
|
|
1675
|
+
description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 from 2023-03-14.
|
|
1676
|
+
creator_organization_name: OpenAI
|
|
1677
|
+
access: limited
|
|
1678
|
+
release_date: 2023-03-14
|
|
1679
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1680
|
+
|
|
1681
|
+
- name: openai/gpt-4-32k-0314
|
|
1682
|
+
display_name: gpt-4-32k-0314
|
|
1683
|
+
description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 with a longer context length of 32,768 tokens from March 14th 2023.
|
|
1684
|
+
creator_organization_name: OpenAI
|
|
1685
|
+
access: limited
|
|
1686
|
+
release_date: 2023-03-14
|
|
1687
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1688
|
+
|
|
1689
|
+
- name: openai/gpt-4-0613
|
|
1690
|
+
display_name: GPT-4 (0613)
|
|
1691
|
+
description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 from 2023-06-13.
|
|
1692
|
+
creator_organization_name: OpenAI
|
|
1693
|
+
access: limited
|
|
1694
|
+
release_date: 2023-06-13
|
|
1695
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1696
|
+
|
|
1697
|
+
- name: openai/gpt-4-32k-0613
|
|
1698
|
+
display_name: gpt-4-32k-0613
|
|
1699
|
+
description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 with a longer context length of 32,768 tokens from 2023-06-13.
|
|
1700
|
+
creator_organization_name: OpenAI
|
|
1701
|
+
access: limited
|
|
1702
|
+
release_date: 2023-06-13
|
|
1703
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1704
|
+
|
|
1705
|
+
- name: openai/gpt-4-0125-preview
|
|
1706
|
+
display_name: GPT-4 Turbo (0125 preview)
|
|
1707
|
+
description: GPT-4 Turbo (preview) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Preview snapshot from 2023-01-25. This snapshot is intended to reduce cases of “laziness” where the model doesn’t complete a task.
|
|
1708
|
+
creator_organization_name: OpenAI
|
|
1709
|
+
access: limited
|
|
1710
|
+
# Actual release blog post was published on 2024-01-25:
|
|
1711
|
+
# https://openai.com/blog/new-embedding-models-and-api-updates
|
|
1712
|
+
release_date: 2024-01-25
|
|
1713
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1714
|
+
|
|
1715
|
+
- name: openai/gpt-4-turbo-2024-04-09
|
|
1716
|
+
display_name: GPT-4 Turbo (2024-04-09)
|
|
1717
|
+
description: GPT-4 Turbo (2024-04-09) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Snapshot from 2024-04-09.
|
|
1718
|
+
creator_organization_name: OpenAI
|
|
1719
|
+
access: limited
|
|
1720
|
+
release_date: 2024-04-09
|
|
1721
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1722
|
+
|
|
1723
|
+
- name: openai/gpt-4-vision-preview
|
|
1724
|
+
display_name: GPT-4V (preview)
|
|
1725
|
+
description: GPT-4V is a large multimodal model that accepts both text and images and is optimized for chat but works well for traditional completions tasks.
|
|
1726
|
+
creator_organization_name: OpenAI
|
|
1727
|
+
access: limited
|
|
1728
|
+
release_date: 2023-11-06
|
|
1729
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
1730
|
+
|
|
1731
|
+
## Codex Models
|
|
1732
|
+
# DEPRECATED: Codex models have been shut down on March 23 2023.
|
|
1733
|
+
|
|
1734
|
+
- name: openai/code-davinci-002 # DEPRECATED
|
|
1735
|
+
display_name: code-davinci-002
|
|
1736
|
+
description: Codex-style model that is designed for pure code-completion tasks ([docs](https://beta.openai.com/docs/models/codex)).
|
|
1737
|
+
creator_organization_name: OpenAI
|
|
1738
|
+
access: limited
|
|
1739
|
+
release_date: 2021-07-01 # TODO: Find correct date (this is for v1)
|
|
1740
|
+
tags: [CODE_MODEL_TAG]
|
|
1741
|
+
|
|
1742
|
+
- name: openai/code-davinci-001 # DEPRECATED
|
|
1743
|
+
display_name: code-davinci-001
|
|
1744
|
+
description: code-davinci-001 model
|
|
1745
|
+
creator_organization_name: OpenAI
|
|
1746
|
+
access: limited
|
|
1747
|
+
release_date: 2021-07-01 # Paper date
|
|
1748
|
+
tags: [CODE_MODEL_TAG]
|
|
1749
|
+
|
|
1750
|
+
- name: openai/code-cushman-001 # DEPRECATED
|
|
1751
|
+
display_name: code-cushman-001 (12B)
|
|
1752
|
+
description: Codex-style model that is a stronger, multilingual version of the Codex (12B) model in the [Codex paper](https://arxiv.org/pdf/2107.03374.pdf).
|
|
1753
|
+
creator_organization_name: OpenAI
|
|
1754
|
+
access: limited
|
|
1755
|
+
num_parameters: 12000000000
|
|
1756
|
+
release_date: 2021-07-01 # Paper date
|
|
1757
|
+
tags: [CODE_MODEL_TAG]
|
|
1758
|
+
|
|
1759
|
+
|
|
1760
|
+
## Text Similarity Models
|
|
1761
|
+
# OpenAI similarity embedding models: https://beta.openai.com/docs/guides/embeddings
|
|
1762
|
+
# The number of parameters is guessed based on the number of parameters of the
|
|
1763
|
+
# corresponding GPT-3 model.
|
|
1764
|
+
# DEPRECATED: Announced on July 06 2023 that first generation embeddings models
|
|
1765
|
+
# will be shut down on January 04 2024.
|
|
1766
|
+
|
|
1767
|
+
- name: openai/text-similarity-davinci-001 # DEPRECATED
|
|
1768
|
+
display_name: text-similarity-davinci-001
|
|
1769
|
+
description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
|
|
1770
|
+
creator_organization_name: OpenAI
|
|
1771
|
+
access: limited
|
|
1772
|
+
num_parameters: 175000000000
|
|
1773
|
+
release_date: 2022-01-25 # Blog post date
|
|
1774
|
+
tags: [TEXT_SIMILARITY_MODEL_TAG]
|
|
1775
|
+
|
|
1776
|
+
- name: openai/text-similarity-curie-001 # DEPRECATED
|
|
1777
|
+
display_name: text-similarity-curie-001
|
|
1778
|
+
description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
|
|
1779
|
+
creator_organization_name: OpenAI
|
|
1780
|
+
access: limited
|
|
1781
|
+
num_parameters: 6700000000
|
|
1782
|
+
release_date: 2022-01-25 # Blog post date
|
|
1783
|
+
tags: [TEXT_SIMILARITY_MODEL_TAG]
|
|
1784
|
+
|
|
1785
|
+
- name: openai/text-similarity-babbage-001 # DEPRECATED
|
|
1786
|
+
display_name: text-similarity-babbage-001
|
|
1787
|
+
description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
|
|
1788
|
+
creator_organization_name: OpenAI
|
|
1789
|
+
access: limited
|
|
1790
|
+
num_parameters: 1300000000
|
|
1791
|
+
release_date: 2022-01-25 # Blog post date
|
|
1792
|
+
tags: [TEXT_SIMILARITY_MODEL_TAG]
|
|
1793
|
+
|
|
1794
|
+
- name: openai/text-similarity-ada-001 # DEPRECATED
|
|
1795
|
+
display_name: text-similarity-ada-001
|
|
1796
|
+
description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
|
|
1797
|
+
creator_organization_name: OpenAI
|
|
1798
|
+
access: limited
|
|
1799
|
+
num_parameters: 350000000
|
|
1800
|
+
release_date: 2022-01-25 # Blog post date
|
|
1801
|
+
tags: [TEXT_SIMILARITY_MODEL_TAG]
|
|
1802
|
+
|
|
1803
|
+
- name: openai/text-embedding-ada-002
|
|
1804
|
+
display_name: text-embedding-ada-002
|
|
1805
|
+
description: An improved embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/new-and-improved-embedding-model)).
|
|
1806
|
+
creator_organization_name: OpenAI
|
|
1807
|
+
access: limited
|
|
1808
|
+
release_date: 2022-12-15 # Blog post date
|
|
1809
|
+
tags: [TEXT_SIMILARITY_MODEL_TAG]
|
|
1810
|
+
|
|
1811
|
+
# Text-to-image models
|
|
1812
|
+
- name: openai/dall-e-2
|
|
1813
|
+
display_name: DALL-E 2 (3.5B)
|
|
1814
|
+
description: DALL-E 2 is a encoder-decoder-based latent diffusion model trained on large-scale paired text-image datasets. The model is available via the OpenAI API ([paper](https://arxiv.org/abs/2204.06125)).
|
|
1815
|
+
creator_organization_name: OpenAI
|
|
1816
|
+
access: limited
|
|
1817
|
+
num_parameters: 3500000000
|
|
1818
|
+
release_date: 2022-04-13
|
|
1819
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1820
|
+
|
|
1821
|
+
- name: openai/dall-e-3
|
|
1822
|
+
display_name: DALL-E 3
|
|
1823
|
+
description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The default style, vivid, causes the model to lean towards generating hyper-real and dramatic images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
|
|
1824
|
+
creator_organization_name: OpenAI
|
|
1825
|
+
access: limited
|
|
1826
|
+
num_parameters: 0
|
|
1827
|
+
release_date: 2023-11-06
|
|
1828
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1829
|
+
|
|
1830
|
+
- name: openai/dall-e-3-natural
|
|
1831
|
+
display_name: DALL-E 3 (natural style)
|
|
1832
|
+
description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The natural style causes the model to produce more natural, less hyper-real looking images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
|
|
1833
|
+
creator_organization_name: OpenAI
|
|
1834
|
+
access: limited
|
|
1835
|
+
num_parameters: 0
|
|
1836
|
+
release_date: 2023-11-06
|
|
1837
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1838
|
+
|
|
1839
|
+
- name: openai/dall-e-3-hd
|
|
1840
|
+
display_name: DALL-E 3 HD
|
|
1841
|
+
description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The HD version creates images with finer details and greater consistency across the image, but generation is slower. The default style, vivid, causes the model to lean towards generating hyper-real and dramatic images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
|
|
1842
|
+
creator_organization_name: OpenAI
|
|
1843
|
+
access: limited
|
|
1844
|
+
num_parameters: 0
|
|
1845
|
+
release_date: 2023-11-06
|
|
1846
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1847
|
+
|
|
1848
|
+
- name: openai/dall-e-3-hd-natural
|
|
1849
|
+
display_name: DALL-E 3 HD (natural style)
|
|
1850
|
+
description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The HD version creates images with finer details and greater consistency across the image, but generation is slower. The natural style causes the model to produce more natural, less hyper-real looking images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
|
|
1851
|
+
creator_organization_name: OpenAI
|
|
1852
|
+
access: limited
|
|
1853
|
+
num_parameters: 0
|
|
1854
|
+
release_date: 2023-11-06
|
|
1855
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1856
|
+
|
|
1857
|
+
# Qwen
|
|
1858
|
+
|
|
1859
|
+
- name: qwen/qwen-7b
|
|
1860
|
+
display_name: Qwen
|
|
1861
|
+
description: 7B-parameter version of the large language model series, Qwen (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
|
|
1862
|
+
creator_organization_name: Qwen
|
|
1863
|
+
access: open
|
|
1864
|
+
release_date: 2024-02-05
|
|
1865
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1866
|
+
|
|
1867
|
+
- name: qwen/qwen1.5-7b
|
|
1868
|
+
display_name: Qwen1.5 (7B)
|
|
1869
|
+
description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
|
|
1870
|
+
creator_organization_name: Qwen
|
|
1871
|
+
access: open
|
|
1872
|
+
release_date: 2024-02-05
|
|
1873
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1874
|
+
|
|
1875
|
+
- name: qwen/qwen1.5-14b
|
|
1876
|
+
display_name: Qwen1.5 (14B)
|
|
1877
|
+
description: 14B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
|
|
1878
|
+
creator_organization_name: Qwen
|
|
1879
|
+
access: open
|
|
1880
|
+
release_date: 2024-02-05
|
|
1881
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1882
|
+
|
|
1883
|
+
- name: qwen/qwen1.5-32b
|
|
1884
|
+
display_name: Qwen1.5 (32B)
|
|
1885
|
+
description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
|
|
1886
|
+
creator_organization_name: Qwen
|
|
1887
|
+
access: open
|
|
1888
|
+
release_date: 2024-02-05
|
|
1889
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1890
|
+
|
|
1891
|
+
- name: qwen/qwen1.5-72b
|
|
1892
|
+
display_name: Qwen1.5 (72B)
|
|
1893
|
+
description: 72B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
|
|
1894
|
+
creator_organization_name: Qwen
|
|
1895
|
+
access: open
|
|
1896
|
+
release_date: 2024-02-05
|
|
1897
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1898
|
+
|
|
1899
|
+
- name: qwen/qwen-vl
|
|
1900
|
+
display_name: Qwen-VL
|
|
1901
|
+
description: Visual multimodal version of the large model series ([paper](https://arxiv.org/abs/2308.12966)).
|
|
1902
|
+
creator_organization_name: Alibaba Cloud
|
|
1903
|
+
access: open
|
|
1904
|
+
release_date: 2023-08-24
|
|
1905
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
1906
|
+
|
|
1907
|
+
- name: qwen/qwen-vl-chat
|
|
1908
|
+
display_name: Qwen-VL Chat
|
|
1909
|
+
description: Chat version of the visual multimodal model Qwen ([paper](https://arxiv.org/abs/2308.12966)).
|
|
1910
|
+
creator_organization_name: Alibaba Cloud
|
|
1911
|
+
access: open
|
|
1912
|
+
release_date: 2023-08-24
|
|
1913
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
1914
|
+
|
|
1915
|
+
# Salesforce
|
|
1916
|
+
- name: salesforce/codegen # NOT SUPPORTED
|
|
1917
|
+
display_name: CodeGen (16B)
|
|
1918
|
+
description: CodeGen (16B parameters) is an open dense code model trained for multi-turn program synthesis ([blog](https://arxiv.org/pdf/2203.13474.pdf)).
|
|
1919
|
+
creator_organization_name: Tsinghua
|
|
1920
|
+
access: open
|
|
1921
|
+
num_parameters: 16000000000
|
|
1922
|
+
release_date: 2022-03-25
|
|
1923
|
+
tags: [] # TODO: add tags
|
|
1924
|
+
|
|
1925
|
+
|
|
1926
|
+
|
|
1927
|
+
# Stability AI
|
|
1928
|
+
- name: stabilityai/stablelm-base-alpha-3b
|
|
1929
|
+
display_name: StableLM-Base-Alpha (3B)
|
|
1930
|
+
description: StableLM-Base-Alpha is a suite of 3B and 7B parameter decoder-only language models pre-trained on a diverse collection of English datasets with a sequence length of 4096 to push beyond the context window limitations of existing open-source language models.
|
|
1931
|
+
creator_organization_name: Stability AI
|
|
1932
|
+
access: open
|
|
1933
|
+
num_parameters: 3000000000
|
|
1934
|
+
release_date: 2023-04-20
|
|
1935
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1936
|
+
|
|
1937
|
+
- name: stabilityai/stablelm-base-alpha-7b
|
|
1938
|
+
display_name: StableLM-Base-Alpha (7B)
|
|
1939
|
+
description: StableLM-Base-Alpha is a suite of 3B and 7B parameter decoder-only language models pre-trained on a diverse collection of English datasets with a sequence length of 4096 to push beyond the context window limitations of existing open-source language models.
|
|
1940
|
+
creator_organization_name: Stability AI
|
|
1941
|
+
access: open
|
|
1942
|
+
num_parameters: 7000000000
|
|
1943
|
+
release_date: 2023-04-20
|
|
1944
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1945
|
+
|
|
1946
|
+
|
|
1947
|
+
|
|
1948
|
+
# Stanford
|
|
1949
|
+
- name: stanford/alpaca-7b
|
|
1950
|
+
display_name: Alpaca (7B)
|
|
1951
|
+
description: Alpaca 7B is a model fine-tuned from the LLaMA 7B model on 52K instruction-following demonstrations
|
|
1952
|
+
creator_organization_name: Stanford
|
|
1953
|
+
access: open
|
|
1954
|
+
num_parameters: 7000000000
|
|
1955
|
+
release_date: 2023-03-13
|
|
1956
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1957
|
+
|
|
1958
|
+
|
|
1959
|
+
|
|
1960
|
+
# TII UAE
|
|
1961
|
+
- name: tiiuae/falcon-7b
|
|
1962
|
+
display_name: Falcon (7B)
|
|
1963
|
+
description: Falcon-7B is a 7B parameters causal decoder-only model built by TII and trained on 1,500B tokens of RefinedWeb enhanced with curated corpora.
|
|
1964
|
+
creator_organization_name: TII UAE
|
|
1965
|
+
access: open
|
|
1966
|
+
num_parameters: 7000000000
|
|
1967
|
+
release_date: 2023-03-15
|
|
1968
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1969
|
+
|
|
1970
|
+
- name: tiiuae/falcon-7b-instruct
|
|
1971
|
+
display_name: Falcon-Instruct (7B)
|
|
1972
|
+
description: Falcon-7B-Instruct is a 7B parameters causal decoder-only model built by TII based on Falcon-7B and finetuned on a mixture of chat/instruct datasets.
|
|
1973
|
+
creator_organization_name: TII UAE
|
|
1974
|
+
access: open
|
|
1975
|
+
num_parameters: 7000000000
|
|
1976
|
+
release_date: 2023-03-15
|
|
1977
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1978
|
+
|
|
1979
|
+
- name: tiiuae/falcon-40b
|
|
1980
|
+
display_name: Falcon (40B)
|
|
1981
|
+
description: Falcon-40B is a 40B parameters causal decoder-only model built by TII and trained on 1,500B tokens of RefinedWeb enhanced with curated corpora.
|
|
1982
|
+
creator_organization_name: TII UAE
|
|
1983
|
+
access: open
|
|
1984
|
+
num_parameters: 40000000000
|
|
1985
|
+
release_date: 2023-05-25
|
|
1986
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1987
|
+
|
|
1988
|
+
- name: tiiuae/falcon-40b-instruct
|
|
1989
|
+
display_name: Falcon-Instruct (40B)
|
|
1990
|
+
description: Falcon-40B-Instruct is a 40B parameters causal decoder-only model built by TII based on Falcon-7B and finetuned on a mixture of chat/instruct datasets.
|
|
1991
|
+
creator_organization_name: TII UAE
|
|
1992
|
+
access: open
|
|
1993
|
+
num_parameters: 40000000000
|
|
1994
|
+
release_date: 2023-05-25
|
|
1995
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1996
|
+
|
|
1997
|
+
|
|
1998
|
+
|
|
1999
|
+
# Together
|
|
2000
|
+
- name: together/gpt-jt-6b-v1
|
|
2001
|
+
display_name: GPT-JT (6B)
|
|
2002
|
+
description: GPT-JT (6B parameters) is a fork of GPT-J ([blog post](https://www.together.xyz/blog/releasing-v1-of-gpt-jt-powered-by-open-source-ai)).
|
|
2003
|
+
creator_organization_name: Together
|
|
2004
|
+
access: open
|
|
2005
|
+
num_parameters: 6700000000
|
|
2006
|
+
release_date: 2022-11-29
|
|
2007
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2008
|
+
|
|
2009
|
+
- name: together/gpt-neoxt-chat-base-20b
|
|
2010
|
+
display_name: GPT-NeoXT-Chat-Base (20B)
|
|
2011
|
+
description: GPT-NeoXT-Chat-Base (20B) is fine-tuned from GPT-NeoX, serving as a base model for developing open-source chatbots.
|
|
2012
|
+
creator_organization_name: Together
|
|
2013
|
+
access: open
|
|
2014
|
+
num_parameters: 20000000000
|
|
2015
|
+
release_date: 2023-03-08
|
|
2016
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, CHATML_MODEL_TAG]
|
|
2017
|
+
|
|
2018
|
+
- name: together/redpajama-incite-base-3b-v1
|
|
2019
|
+
display_name: RedPajama-INCITE-Base-v1 (3B)
|
|
2020
|
+
description: RedPajama-INCITE-Base-v1 (3B parameters) is a 3 billion base model that aims to replicate the LLaMA recipe as closely as possible.
|
|
2021
|
+
creator_organization_name: Together
|
|
2022
|
+
access: open
|
|
2023
|
+
num_parameters: 3000000000
|
|
2024
|
+
release_date: 2023-05-05
|
|
2025
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2026
|
+
|
|
2027
|
+
- name: together/redpajama-incite-instruct-3b-v1
|
|
2028
|
+
display_name: RedPajama-INCITE-Instruct-v1 (3B)
|
|
2029
|
+
description: RedPajama-INCITE-Instruct-v1 (3B parameters) is a model fine-tuned for few-shot applications on the data of GPT-JT. It is built from RedPajama-INCITE-Base-v1 (3B), a 3 billion base model that aims to replicate the LLaMA recipe as closely as possible.
|
|
2030
|
+
creator_organization_name: Together
|
|
2031
|
+
access: open
|
|
2032
|
+
num_parameters: 3000000000
|
|
2033
|
+
release_date: 2023-05-05
|
|
2034
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2035
|
+
|
|
2036
|
+
- name: together/redpajama-incite-chat-3b-v1 # NOT SUPPORTED
|
|
2037
|
+
display_name: RedPajama-INCITE-Chat-v1 (3B)
|
|
2038
|
+
description: RedPajama-INCITE-Chat-v1 (3B parameters) is a model fine-tuned on OASST1 and Dolly2 to enhance chatting ability. It is built from RedPajama-INCITE-Base-v1 (3B), a 3 billion base model that aims to replicate the LLaMA recipe as closely as possible.
|
|
2039
|
+
creator_organization_name: Together
|
|
2040
|
+
access: open
|
|
2041
|
+
num_parameters: 3000000000
|
|
2042
|
+
release_date: 2023-05-05
|
|
2043
|
+
tafs: [] # TODO: add tags
|
|
2044
|
+
|
|
2045
|
+
- name: together/redpajama-incite-base-7b
|
|
2046
|
+
display_name: RedPajama-INCITE-Base (7B)
|
|
2047
|
+
description: RedPajama-INCITE-Base (7B parameters) is a 7 billion base model that aims to replicate the LLaMA recipe as closely as possible.
|
|
2048
|
+
creator_organization_name: Together
|
|
2049
|
+
access: open
|
|
2050
|
+
num_parameters: 7000000000
|
|
2051
|
+
release_date: 2023-05-05
|
|
2052
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2053
|
+
|
|
2054
|
+
- name: together/redpajama-incite-instruct-7b
|
|
2055
|
+
display_name: RedPajama-INCITE-Instruct (7B)
|
|
2056
|
+
description: RedPajama-INCITE-Instruct (7B parameters) is a model fine-tuned for few-shot applications on the data of GPT-JT. It is built from RedPajama-INCITE-Base (7B), a 7 billion base model that aims to replicate the LLaMA recipe as closely as possible.
|
|
2057
|
+
creator_organization_name: Together
|
|
2058
|
+
access: open
|
|
2059
|
+
num_parameters: 7000000000
|
|
2060
|
+
release_date: 2023-05-05
|
|
2061
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2062
|
+
|
|
2063
|
+
|
|
2064
|
+
|
|
2065
|
+
# Tsinghua
|
|
2066
|
+
|
|
2067
|
+
- name: thudm/cogview2
|
|
2068
|
+
display_name: CogView2 (6B)
|
|
2069
|
+
description: CogView2 is a hierarchical transformer (6B-9B-9B parameters) for text-to-image generation that supports both English and Chinese input text ([paper](https://arxiv.org/abs/2105.13290))
|
|
2070
|
+
creator_organization_name: Tsinghua
|
|
2071
|
+
access: open
|
|
2072
|
+
num_parameters: 6000000000
|
|
2073
|
+
release_date: 2022-06-15
|
|
2074
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
2075
|
+
|
|
2076
|
+
- name: tsinghua/glm
|
|
2077
|
+
display_name: GLM (130B)
|
|
2078
|
+
description: GLM (130B parameters) is an open bilingual (English & Chinese) bidirectional dense model that was trained using General Language Model (GLM) procedure ([paper](https://arxiv.org/pdf/2210.02414.pdf)).
|
|
2079
|
+
creator_organization_name: Tsinghua
|
|
2080
|
+
access: open
|
|
2081
|
+
num_parameters: 130000000000
|
|
2082
|
+
release_date: 2022-08-04
|
|
2083
|
+
# Inference with echo=True is not feasible -- in the prompt encoding phase, they use
|
|
2084
|
+
# bidirectional attention and do not perform predictions on them.
|
|
2085
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
|
|
2086
|
+
|
|
2087
|
+
- name: tsinghua/codegeex # NOT SUPPORTED
|
|
2088
|
+
display_name: CodeGeeX (13B)
|
|
2089
|
+
description: CodeGeeX (13B parameters) is an open dense code model trained on more than 20 programming languages on a corpus of more than 850B tokens ([blog](http://keg.cs.tsinghua.edu.cn/codegeex/)).
|
|
2090
|
+
creator_organization_name: Tsinghua
|
|
2091
|
+
access: open
|
|
2092
|
+
num_parameters: 13000000000
|
|
2093
|
+
release_date: 2022-09-19
|
|
2094
|
+
tags: [] # TODO: add tags
|
|
2095
|
+
|
|
2096
|
+
|
|
2097
|
+
|
|
2098
|
+
# Writer
|
|
2099
|
+
- name: writer/palmyra-base
|
|
2100
|
+
display_name: Palmyra Base (5B)
|
|
2101
|
+
description: Palmyra Base (5B)
|
|
2102
|
+
creator_organization_name: Writer
|
|
2103
|
+
access: limited
|
|
2104
|
+
num_parameters: 5000000000
|
|
2105
|
+
release_date: 2022-10-13
|
|
2106
|
+
# Does not support echo
|
|
2107
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2108
|
+
|
|
2109
|
+
- name: writer/palmyra-large
|
|
2110
|
+
display_name: Palmyra Large (20B)
|
|
2111
|
+
description: Palmyra Large (20B)
|
|
2112
|
+
creator_organization_name: Writer
|
|
2113
|
+
access: limited
|
|
2114
|
+
num_parameters: 20000000000
|
|
2115
|
+
release_date: 2022-12-23
|
|
2116
|
+
# Does not support echo
|
|
2117
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2118
|
+
|
|
2119
|
+
- name: writer/palmyra-instruct-30
|
|
2120
|
+
deprecated: true # Internal error
|
|
2121
|
+
display_name: InstructPalmyra (30B)
|
|
2122
|
+
description: InstructPalmyra (30B parameters) is trained using reinforcement learning techniques based on feedback from humans.
|
|
2123
|
+
creator_organization_name: Writer
|
|
2124
|
+
access: limited
|
|
2125
|
+
num_parameters: 30000000000
|
|
2126
|
+
release_date: 2023-02-16
|
|
2127
|
+
# Does not support echo
|
|
2128
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2129
|
+
|
|
2130
|
+
- name: writer/palmyra-e
|
|
2131
|
+
deprecated: true # Internal error
|
|
2132
|
+
display_name: Palmyra E (30B)
|
|
2133
|
+
description: Palmyra E (30B)
|
|
2134
|
+
creator_organization_name: Writer
|
|
2135
|
+
access: limited
|
|
2136
|
+
num_parameters: 30000000000
|
|
2137
|
+
release_date: 2023-03-03
|
|
2138
|
+
# Does not support echo
|
|
2139
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2140
|
+
|
|
2141
|
+
- name: writer/silk-road
|
|
2142
|
+
display_name: Silk Road (35B)
|
|
2143
|
+
description: Silk Road (35B)
|
|
2144
|
+
creator_organization_name: Writer
|
|
2145
|
+
access: limited
|
|
2146
|
+
num_parameters: 35000000000
|
|
2147
|
+
release_date: 2023-04-13
|
|
2148
|
+
# Does not support echo
|
|
2149
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2150
|
+
|
|
2151
|
+
- name: writer/palmyra-x
|
|
2152
|
+
display_name: Palmyra X (43B)
|
|
2153
|
+
description: Palmyra-X (43B parameters) is trained to adhere to instructions using human feedback and utilizes a technique called multiquery attention. Furthermore, a new feature called 'self-instruct' has been introduced, which includes the implementation of an early stopping criteria specifically designed for minimal instruction tuning ([paper](https://dev.writer.com/docs/becoming-self-instruct-introducing-early-stopping-criteria-for-minimal-instruct-tuning)).
|
|
2154
|
+
creator_organization_name: Writer
|
|
2155
|
+
access: limited
|
|
2156
|
+
num_parameters: 43000000000
|
|
2157
|
+
release_date: 2023-06-11
|
|
2158
|
+
# Does not support echo
|
|
2159
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2160
|
+
|
|
2161
|
+
- name: writer/palmyra-x-v2
|
|
2162
|
+
display_name: Palmyra X V2 (33B)
|
|
2163
|
+
description: Palmyra-X V2 (33B parameters) is a Transformer-based model, which is trained on extremely large-scale pre-training data. The pre-training data more than 2 trillion tokens types are diverse and cover a wide range of areas, used FlashAttention-2.
|
|
2164
|
+
creator_organization_name: Writer
|
|
2165
|
+
access: limited
|
|
2166
|
+
num_parameters: 33000000000
|
|
2167
|
+
release_date: 2023-12-01
|
|
2168
|
+
# Does not support echo
|
|
2169
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2170
|
+
|
|
2171
|
+
- name: writer/palmyra-x-v3
|
|
2172
|
+
display_name: Palmyra X V3 (72B)
|
|
2173
|
+
description: Palmyra-X V3 (72B parameters) is a Transformer-based model, which is trained on extremely large-scale pre-training data. It is trained via unsupervised learning and DPO and use multiquery attention.
|
|
2174
|
+
creator_organization_name: Writer
|
|
2175
|
+
access: limited
|
|
2176
|
+
num_parameters: 72000000000
|
|
2177
|
+
release_date: 2023-12-01
|
|
2178
|
+
# Does not support echo
|
|
2179
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2180
|
+
|
|
2181
|
+
- name: writer/palmyra-x-32k
|
|
2182
|
+
display_name: Palmyra X-32K (33B)
|
|
2183
|
+
description: Palmyra-X-32K (33B parameters) is a Transformer-based model, which is trained on large-scale pre-training data. The pre-training data types are diverse and cover a wide range of areas. These data types are used in conjunction and the alignment mechanism to extend context window.
|
|
2184
|
+
creator_organization_name: Writer
|
|
2185
|
+
access: limited
|
|
2186
|
+
num_parameters: 33000000000
|
|
2187
|
+
release_date: 2023-12-01
|
|
2188
|
+
# Does not support echo
|
|
2189
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
2190
|
+
|
|
2191
|
+
|
|
2192
|
+
|
|
2193
|
+
# Yandex
|
|
2194
|
+
- name: yandex/yalm
|
|
2195
|
+
display_name: YaLM (100B)
|
|
2196
|
+
description: YaLM (100B parameters) is an autoregressive language model trained on English and Russian text ([GitHub](https://github.com/yandex/YaLM-100B)).
|
|
2197
|
+
creator_organization_name: Yandex
|
|
2198
|
+
access: open
|
|
2199
|
+
num_parameters: 100000000000
|
|
2200
|
+
release_date: 2022-06-23
|
|
2201
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
|