crfm-helm 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/METADATA +138 -31
- crfm_helm-0.5.1.dist-info/RECORD +654 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +31 -3
- helm/benchmark/adaptation/adapters/adapter.py +2 -2
- helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
- helm/benchmark/adaptation/adapters/generation_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -4
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +2 -3
- helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py +7 -0
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +2 -0
- helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +32 -8
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +7 -19
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +60 -6
- helm/benchmark/adaptation/common_adapter_specs.py +376 -0
- helm/benchmark/adaptation/request_state.py +6 -1
- helm/benchmark/adaptation/scenario_state.py +6 -2
- helm/benchmark/annotation/annotator.py +43 -0
- helm/benchmark/annotation/annotator_factory.py +61 -0
- helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
- helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
- helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
- helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
- helm/benchmark/annotation/test_annotator_factory.py +26 -0
- helm/benchmark/annotation/test_dummy_annotator.py +44 -0
- helm/benchmark/annotation_executor.py +124 -0
- helm/benchmark/augmentations/data_augmenter.py +0 -2
- helm/benchmark/augmentations/gender_perturbation.py +1 -1
- helm/benchmark/augmentations/perturbation.py +25 -3
- helm/benchmark/augmentations/perturbation_description.py +1 -1
- helm/benchmark/augmentations/suffix_perturbation.py +29 -0
- helm/benchmark/augmentations/test_perturbation.py +41 -7
- helm/benchmark/augmentations/translate_perturbation.py +30 -0
- helm/benchmark/config_registry.py +7 -1
- helm/benchmark/executor.py +46 -16
- helm/benchmark/huggingface_registration.py +20 -7
- helm/benchmark/metrics/basic_metrics.py +169 -664
- helm/benchmark/metrics/bbq_metrics.py +3 -4
- helm/benchmark/metrics/bias_metrics.py +6 -6
- helm/benchmark/metrics/classification_metrics.py +11 -8
- helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
- helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
- helm/benchmark/metrics/code_metrics_helper.py +0 -2
- helm/benchmark/metrics/common_metric_specs.py +167 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
- helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
- helm/benchmark/metrics/disinformation_metrics.py +4 -110
- helm/benchmark/metrics/dry_run_metrics.py +2 -2
- helm/benchmark/metrics/efficiency_metrics.py +213 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
- helm/benchmark/metrics/evaluate_reference_metrics.py +392 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
- helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
- helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
- helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
- helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
- helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
- helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
- helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
- helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
- helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
- helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
- helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
- helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
- helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
- helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
- helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
- helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
- helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
- helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
- helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
- helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
- helm/benchmark/metrics/language_modeling_metrics.py +99 -0
- helm/benchmark/metrics/machine_translation_metrics.py +89 -0
- helm/benchmark/metrics/metric.py +93 -172
- helm/benchmark/metrics/metric_name.py +0 -1
- helm/benchmark/metrics/metric_service.py +16 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
- helm/benchmark/metrics/ranking_metrics.py +2 -2
- helm/benchmark/metrics/reference_metric.py +148 -0
- helm/benchmark/metrics/summac/model_summac.py +0 -2
- helm/benchmark/metrics/summarization_metrics.py +2 -2
- helm/benchmark/metrics/test_classification_metrics.py +8 -5
- helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
- helm/benchmark/metrics/{test_basic_metrics.py → test_evaluate_reference_metrics.py} +5 -1
- helm/benchmark/metrics/test_metric.py +2 -2
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +10 -2
- helm/benchmark/metrics/toxicity_metrics.py +1 -1
- helm/benchmark/metrics/toxicity_utils.py +23 -0
- helm/benchmark/metrics/unitxt_metrics.py +81 -0
- helm/benchmark/metrics/vision_language/__init__.py +0 -0
- helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +575 -0
- helm/benchmark/metrics/vision_language/image_utils.py +100 -0
- helm/benchmark/model_deployment_registry.py +74 -0
- helm/benchmark/model_metadata_registry.py +41 -1
- helm/benchmark/multi_gpu_runner.py +133 -0
- helm/benchmark/presentation/create_plots.py +8 -7
- helm/benchmark/presentation/run_display.py +26 -10
- helm/benchmark/presentation/schema.py +15 -40
- helm/benchmark/presentation/summarize.py +119 -79
- helm/benchmark/presentation/table.py +8 -8
- helm/benchmark/presentation/test_contamination.py +2 -2
- helm/benchmark/presentation/test_run_entry.py +1 -2
- helm/benchmark/presentation/test_summarize.py +3 -3
- helm/benchmark/run.py +54 -26
- helm/benchmark/run_expander.py +205 -35
- helm/benchmark/run_spec.py +93 -0
- helm/benchmark/run_spec_factory.py +163 -0
- helm/benchmark/run_specs/__init__.py +0 -0
- helm/benchmark/run_specs/classic_run_specs.py +1510 -0
- helm/benchmark/run_specs/cleva_run_specs.py +277 -0
- helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
- helm/benchmark/run_specs/heim_run_specs.py +623 -0
- helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
- helm/benchmark/run_specs/lite_run_specs.py +307 -0
- helm/benchmark/run_specs/simple_run_specs.py +104 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
- helm/benchmark/run_specs/vlm_run_specs.py +757 -0
- helm/benchmark/runner.py +51 -57
- helm/benchmark/runner_config_registry.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +2 -2
- helm/benchmark/scenarios/code_scenario.py +1 -0
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
- helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
- helm/benchmark/scenarios/image_generation/__init__.py +0 -0
- helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
- helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
- helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
- helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
- helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
- helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
- helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
- helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
- helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
- helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
- helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
- helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
- helm/benchmark/scenarios/imdb_scenario.py +0 -1
- helm/benchmark/scenarios/legalbench_scenario.py +6 -2
- helm/benchmark/scenarios/live_qa_scenario.py +94 -0
- helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
- helm/benchmark/scenarios/math_scenario.py +19 -2
- helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
- helm/benchmark/scenarios/numeracy_scenario.py +1 -1
- helm/benchmark/scenarios/opinions_qa_scenario.py +0 -4
- helm/benchmark/scenarios/scenario.py +4 -0
- helm/benchmark/scenarios/simple_scenarios.py +122 -1
- helm/benchmark/scenarios/test_math_scenario.py +6 -0
- helm/benchmark/scenarios/test_scenario.py +6 -3
- helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
- helm/benchmark/scenarios/unitxt_scenario.py +56 -0
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +83 -0
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
- helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +134 -0
- helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +74 -0
- helm/benchmark/scenarios/vision_language/gqa_scenario.py +91 -0
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +94 -0
- helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
- helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
- helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
- helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
- helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
- helm/benchmark/scenarios/vision_language/math_vista_scenario.py +117 -0
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
- helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +103 -0
- helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
- helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py +92 -0
- helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py +117 -0
- helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
- helm/benchmark/scenarios/vision_language/originality_scenario.py +35 -0
- helm/benchmark/scenarios/vision_language/pairs_scenario.py +246 -0
- helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +3 -4
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +5 -3
- helm/benchmark/scenarios/wmt_14_scenario.py +1 -1
- helm/benchmark/server.py +24 -1
- helm/benchmark/slurm_runner.py +70 -49
- helm/benchmark/static/benchmarking.js +1 -1
- helm/benchmark/static/schema_classic.yaml +258 -1066
- helm/benchmark/static/schema_image2structure.yaml +304 -0
- helm/benchmark/static/schema_instruction_following.yaml +210 -0
- helm/benchmark/static/schema_lite.yaml +2 -227
- helm/benchmark/static/schema_mmlu.yaml +1507 -0
- helm/benchmark/static/schema_unitxt.yaml +428 -0
- helm/benchmark/static/schema_vhelm_lite.yaml +164 -0
- helm/benchmark/static/schema_vlm.yaml +823 -0
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
- helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
- helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
- helm/benchmark/static_build/assets/index-737eef9e.js +10 -0
- helm/benchmark/static_build/assets/index-878a1094.css +1 -0
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
- helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/benchmark/static_build/config.js +4 -0
- helm/benchmark/static_build/index.html +20 -0
- helm/benchmark/test_data_preprocessor.py +3 -3
- helm/benchmark/test_run_expander.py +1 -1
- helm/benchmark/window_services/ai21_window_service.py +22 -33
- helm/benchmark/window_services/cohere_window_service.py +1 -63
- helm/benchmark/window_services/default_window_service.py +2 -44
- helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
- helm/benchmark/window_services/ice_window_service.py +0 -34
- helm/benchmark/window_services/image_generation/__init__.py +0 -0
- helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
- helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
- helm/benchmark/window_services/local_window_service.py +21 -4
- helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
- helm/benchmark/window_services/test_bloom_window_service.py +2 -1
- helm/benchmark/window_services/test_cohere_window_service.py +2 -1
- helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
- helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
- helm/benchmark/window_services/test_gptj_window_service.py +3 -2
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
- helm/benchmark/window_services/test_ice_window_service.py +2 -1
- helm/benchmark/window_services/test_openai_window_service.py +2 -1
- helm/benchmark/window_services/test_opt_window_service.py +3 -2
- helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
- helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
- helm/benchmark/window_services/test_t511b_window_service.py +2 -1
- helm/benchmark/window_services/test_ul2_window_service.py +2 -1
- helm/benchmark/window_services/test_utils.py +3 -2
- helm/benchmark/window_services/test_yalm_window_service.py +2 -1
- helm/benchmark/window_services/window_service.py +42 -0
- helm/benchmark/window_services/window_service_factory.py +4 -1
- helm/benchmark/window_services/yalm_window_service.py +0 -27
- helm/clients/__init__.py +0 -0
- helm/{proxy/clients → clients}/ai21_client.py +3 -9
- helm/clients/aleph_alpha_client.py +112 -0
- helm/{proxy/clients → clients}/anthropic_client.py +233 -18
- helm/{proxy/clients → clients}/auto_client.py +59 -31
- helm/clients/bedrock_client.py +128 -0
- helm/clients/bedrock_utils.py +72 -0
- helm/{proxy/clients → clients}/client.py +65 -7
- helm/clients/clip_score_client.py +49 -0
- helm/clients/clip_scorers/__init__.py +0 -0
- helm/clients/clip_scorers/base_clip_scorer.py +18 -0
- helm/clients/clip_scorers/clip_scorer.py +50 -0
- helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
- helm/{proxy/clients → clients}/cohere_client.py +4 -11
- helm/clients/gcs_client.py +82 -0
- helm/{proxy/clients → clients}/google_client.py +5 -5
- helm/clients/google_translate_client.py +35 -0
- helm/{proxy/clients → clients}/http_model_client.py +5 -7
- helm/{proxy/clients → clients}/huggingface_client.py +43 -64
- helm/clients/image_generation/__init__.py +0 -0
- helm/clients/image_generation/adobe_vision_client.py +78 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
- helm/clients/image_generation/cogview2/__init__.py +0 -0
- helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
- helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
- helm/clients/image_generation/cogview2_client.py +191 -0
- helm/clients/image_generation/dalle2_client.py +192 -0
- helm/clients/image_generation/dalle3_client.py +108 -0
- helm/clients/image_generation/dalle_mini/__init__.py +3 -0
- helm/clients/image_generation/dalle_mini/data.py +442 -0
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
- helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
- helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
- helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
- helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
- helm/clients/image_generation/dalle_mini/model/text.py +251 -0
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
- helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
- helm/clients/image_generation/dalle_mini_client.py +190 -0
- helm/clients/image_generation/deep_floyd_client.py +78 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
- helm/clients/image_generation/image_generation_client_utils.py +9 -0
- helm/clients/image_generation/lexica_client.py +86 -0
- helm/clients/image_generation/mindalle/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/__init__.py +216 -0
- helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
- helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
- helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
- helm/clients/image_generation/mindalle/utils/config.py +129 -0
- helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
- helm/clients/image_generation/mindalle/utils/utils.py +89 -0
- helm/clients/image_generation/mindalle_client.py +115 -0
- helm/clients/image_generation/nudity_check_client.py +64 -0
- helm/clients/image_generation/together_image_generation_client.py +111 -0
- helm/{proxy/clients → clients}/lit_gpt_client.py +4 -4
- helm/{proxy/clients → clients}/megatron_client.py +5 -5
- helm/clients/mistral_client.py +134 -0
- helm/clients/moderation_api_client.py +109 -0
- helm/clients/open_lm_client.py +43 -0
- helm/clients/openai_client.py +301 -0
- helm/{proxy/clients → clients}/palmyra_client.py +6 -8
- helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
- helm/clients/simple_client.py +64 -0
- helm/{proxy/clients → clients}/test_auto_client.py +13 -15
- helm/clients/test_client.py +100 -0
- helm/{proxy/clients → clients}/test_huggingface_client.py +15 -16
- helm/clients/test_simple_client.py +19 -0
- helm/{proxy/clients → clients}/test_together_client.py +20 -8
- helm/{proxy/clients → clients}/together_client.py +104 -73
- helm/clients/vertexai_client.py +400 -0
- helm/clients/vision_language/__init__.py +0 -0
- helm/clients/vision_language/huggingface_vision2seq_client.py +145 -0
- helm/clients/vision_language/huggingface_vlm_client.py +111 -0
- helm/{proxy/clients → clients}/vision_language/idefics_client.py +54 -49
- helm/clients/vision_language/open_flamingo/__init__.py +2 -0
- helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
- helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
- helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
- helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
- helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
- helm/clients/vision_language/open_flamingo_client.py +155 -0
- helm/clients/vision_language/qwen_vlm_client.py +171 -0
- helm/clients/vllm_client.py +46 -0
- helm/common/cache.py +16 -4
- helm/common/cache_backend_config.py +47 -0
- helm/common/clip_score_request.py +41 -0
- helm/common/file_caches/__init__.py +0 -0
- helm/common/file_caches/file_cache.py +16 -0
- helm/common/file_caches/local_file_cache.py +61 -0
- helm/common/file_caches/test_local_file_cache.py +25 -0
- helm/common/file_upload_request.py +27 -0
- helm/common/general.py +1 -1
- helm/common/image_generation_parameters.py +25 -0
- helm/common/images_utils.py +33 -3
- helm/common/key_value_store.py +35 -4
- helm/common/media_object.py +13 -0
- helm/common/moderations_api_request.py +71 -0
- helm/common/mongo_key_value_store.py +3 -3
- helm/common/multimodal_request_utils.py +31 -0
- helm/common/nudity_check_request.py +29 -0
- helm/common/request.py +15 -17
- helm/common/test_general.py +6 -0
- helm/common/tokenization_request.py +1 -1
- helm/config/model_deployments.yaml +1159 -538
- helm/config/model_metadata.yaml +868 -41
- helm/config/tokenizer_configs.yaml +149 -43
- helm/proxy/accounts.py +31 -4
- helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
- helm/proxy/critique/model_critique_client.py +8 -6
- helm/proxy/example_queries.py +29 -17
- helm/proxy/server.py +70 -5
- helm/proxy/services/remote_service.py +31 -0
- helm/proxy/services/server_service.py +96 -16
- helm/proxy/services/service.py +30 -0
- helm/proxy/services/test_remote_service.py +4 -3
- helm/proxy/services/test_service.py +0 -12
- helm/proxy/test_accounts.py +32 -0
- helm/proxy/token_counters/auto_token_counter.py +37 -37
- helm/proxy/token_counters/test_auto_token_counter.py +164 -0
- helm/proxy/token_counters/token_counter.py +3 -5
- helm/tokenizers/__init__.py +0 -0
- helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/auto_tokenizer.py +6 -9
- helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +7 -26
- helm/tokenizers/simple_tokenizer.py +33 -0
- helm/{proxy/tokenizers → tokenizers}/test_anthropic_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/test_huggingface_tokenizer.py +3 -0
- helm/tokenizers/test_simple_tokenizer.py +33 -0
- helm/{proxy/tokenizers → tokenizers}/vertexai_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
- helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
- helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
- crfm_helm-0.4.0.dist-info/RECORD +0 -397
- helm/benchmark/run_specs.py +0 -2762
- helm/benchmark/test_model_deployment_definition.py +0 -92
- helm/benchmark/test_model_properties.py +0 -1570
- helm/benchmark/vlm_run_specs.py +0 -97
- helm/benchmark/window_services/flan_t5_window_service.py +0 -29
- helm/benchmark/window_services/gpt2_window_service.py +0 -32
- helm/benchmark/window_services/huggingface_window_service.py +0 -60
- helm/benchmark/window_services/t0pp_window_service.py +0 -35
- helm/benchmark/window_services/t511b_window_service.py +0 -30
- helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
- helm/benchmark/window_services/ul2_window_service.py +0 -30
- helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
- helm/common/cache_utils.py +0 -14
- helm/proxy/clients/aleph_alpha_client.py +0 -95
- helm/proxy/clients/goose_ai_client.py +0 -99
- helm/proxy/clients/microsoft_client.py +0 -180
- helm/proxy/clients/openai_client.py +0 -206
- helm/proxy/clients/simple_client.py +0 -60
- helm/proxy/clients/test_client.py +0 -49
- helm/proxy/clients/vertexai_client.py +0 -115
- helm/proxy/token_counters/ai21_token_counter.py +0 -20
- helm/proxy/token_counters/cohere_token_counter.py +0 -13
- helm/proxy/token_counters/free_token_counter.py +0 -12
- helm/proxy/token_counters/gooseai_token_counter.py +0 -24
- helm/proxy/token_counters/openai_token_counter.py +0 -22
- helm/proxy/token_counters/test_ai21_token_counter.py +0 -88
- helm/proxy/token_counters/test_openai_token_counter.py +0 -81
- helm/proxy/tokenizers/simple_tokenizer.py +0 -32
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/LICENSE +0 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.1.dist-info}/top_level.txt +0 -0
- /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
- /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
- /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
- /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
- /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
- /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
- /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
- /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
helm/config/model_metadata.yaml
CHANGED
|
@@ -19,6 +19,17 @@ models:
|
|
|
19
19
|
release_date: 2023-01-01
|
|
20
20
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
21
21
|
|
|
22
|
+
# Adobe
|
|
23
|
+
- name: adobe/giga-gan
|
|
24
|
+
display_name: GigaGAN (1B)
|
|
25
|
+
description: GigaGAN is a GAN model that produces high-quality images extremely quickly. The model was trained on text and image pairs from LAION2B-en and COYO-700M. ([paper](https://arxiv.org/abs/2303.05511)).
|
|
26
|
+
creator_organization_name: Adobe
|
|
27
|
+
access: limited
|
|
28
|
+
num_parameters: 1000000000
|
|
29
|
+
release_date: 2023-06-22
|
|
30
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
31
|
+
|
|
32
|
+
|
|
22
33
|
# AI21 Labs
|
|
23
34
|
- name: ai21/j1-jumbo # DEPRECATED
|
|
24
35
|
display_name: J1-Jumbo v1 (178B)
|
|
@@ -102,7 +113,7 @@ models:
|
|
|
102
113
|
# TODO: get exact release date
|
|
103
114
|
release_date: 2022-01-01
|
|
104
115
|
# Does not support echo
|
|
105
|
-
tags: [TEXT_MODEL_TAG,
|
|
116
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
106
117
|
|
|
107
118
|
- name: AlephAlpha/luminous-extended
|
|
108
119
|
display_name: Luminous Extended (30B)
|
|
@@ -112,7 +123,7 @@ models:
|
|
|
112
123
|
num_parameters: 30000000000
|
|
113
124
|
release_date: 2022-01-01
|
|
114
125
|
# Does not support echo
|
|
115
|
-
tags: [TEXT_MODEL_TAG,
|
|
126
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
116
127
|
|
|
117
128
|
- name: AlephAlpha/luminous-supreme
|
|
118
129
|
display_name: Luminous Supreme (70B)
|
|
@@ -122,7 +133,7 @@ models:
|
|
|
122
133
|
num_parameters: 70000000000
|
|
123
134
|
release_date: 2022-01-01
|
|
124
135
|
# Does not support echo.
|
|
125
|
-
#
|
|
136
|
+
# Currently, only Luminous-extended and Luminous-base support multimodal inputs
|
|
126
137
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
127
138
|
|
|
128
139
|
# TODO: Uncomment when luminous-world is released.
|
|
@@ -135,12 +146,50 @@ models:
|
|
|
135
146
|
# release_date: TBD
|
|
136
147
|
# # Does not support echo.
|
|
137
148
|
# tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
149
|
+
|
|
150
|
+
- name: AlephAlpha/m-vader
|
|
151
|
+
display_name: MultiFusion (13B)
|
|
152
|
+
description: MultiFusion is a multimodal, multilingual diffusion model that extend the capabilities of Stable Diffusion v1.4 by integrating different pre-trained modules, which transfers capabilities to the downstream model ([paper](https://arxiv.org/abs/2305.15296))
|
|
153
|
+
creator_organization_name: Aleph Alpha
|
|
154
|
+
access: limited
|
|
155
|
+
num_parameters: 13000000000
|
|
156
|
+
release_date: 2023-05-24
|
|
157
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
158
|
+
|
|
138
159
|
|
|
160
|
+
# Amazon
|
|
161
|
+
# References for Amazon Titan models:
|
|
162
|
+
# - https://aws.amazon.com/bedrock/titan/
|
|
163
|
+
# - https://community.aws/content/2ZUVD3fkNtqEOYIa2iUJAFArS7c/family-of-titan-text-models---cli-demo
|
|
164
|
+
# - https://aws.amazon.com/about-aws/whats-new/2023/11/amazon-titan-models-express-lite-bedrock/
|
|
165
|
+
- name: amazon/titan-text-lite-v1
|
|
166
|
+
display_name: Amazon Titan Text Lite
|
|
167
|
+
description: Amazon Titan Text Lite is a lightweight, efficient model perfect for fine-tuning English-language tasks like summarization and copywriting. It caters to customers seeking a smaller, cost-effective, and highly customizable model. It supports various formats, including text generation, code generation, rich text formatting, and orchestration (agents). Key model attributes encompass fine-tuning, text generation, code generation, and rich text formatting.
|
|
168
|
+
creator_organization_name: Amazon
|
|
169
|
+
access: limited
|
|
170
|
+
release_date: 2023-11-29
|
|
171
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
172
|
+
|
|
173
|
+
- name: amazon/titan-tg1-large
|
|
174
|
+
display_name: Amazon Titan Large
|
|
175
|
+
description: Amazon Titan Large is efficient model perfect for fine-tuning English-language tasks like summarization, create article, marketing campaign.
|
|
176
|
+
creator_organization_name: Amazon
|
|
177
|
+
access: limited
|
|
178
|
+
release_date: 2023-11-29
|
|
179
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
180
|
+
|
|
181
|
+
- name: amazon/titan-text-express-v1
|
|
182
|
+
display_name: Amazon Titan Text Express
|
|
183
|
+
description: Amazon Titan Text Express, with a context length of up to 8,000 tokens, excels in advanced language tasks like open-ended text generation and conversational chat. It's also optimized for Retrieval Augmented Generation (RAG). Initially designed for English, the model offers preview multilingual support for over 100 additional languages.
|
|
184
|
+
creator_organization_name: Amazon
|
|
185
|
+
access: limited
|
|
186
|
+
release_date: 2023-11-29
|
|
187
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
139
188
|
|
|
140
189
|
|
|
141
190
|
# Anthropic
|
|
142
191
|
- name: anthropic/claude-v1.3
|
|
143
|
-
display_name:
|
|
192
|
+
display_name: Claude v1.3
|
|
144
193
|
description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
|
|
145
194
|
creator_organization_name: Anthropic
|
|
146
195
|
access: limited
|
|
@@ -149,7 +198,7 @@ models:
|
|
|
149
198
|
tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
150
199
|
|
|
151
200
|
- name: anthropic/claude-instant-v1
|
|
152
|
-
display_name:
|
|
201
|
+
display_name: Claude Instant V1
|
|
153
202
|
description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)).
|
|
154
203
|
creator_organization_name: Anthropic
|
|
155
204
|
access: limited
|
|
@@ -157,7 +206,7 @@ models:
|
|
|
157
206
|
tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
158
207
|
|
|
159
208
|
- name: anthropic/claude-instant-1.2
|
|
160
|
-
display_name:
|
|
209
|
+
display_name: Claude Instant 1.2
|
|
161
210
|
description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)).
|
|
162
211
|
creator_organization_name: Anthropic
|
|
163
212
|
access: limited
|
|
@@ -165,7 +214,7 @@ models:
|
|
|
165
214
|
tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
166
215
|
|
|
167
216
|
- name: anthropic/claude-2.0
|
|
168
|
-
display_name:
|
|
217
|
+
display_name: Claude 2.0
|
|
169
218
|
description: Claude 2.0 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf))
|
|
170
219
|
creator_organization_name: Anthropic
|
|
171
220
|
access: limited
|
|
@@ -173,13 +222,37 @@ models:
|
|
|
173
222
|
tags: [ANTHROPIC_CLAUDE_2_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
174
223
|
|
|
175
224
|
- name: anthropic/claude-2.1
|
|
176
|
-
display_name:
|
|
225
|
+
display_name: Claude 2.1
|
|
177
226
|
description: Claude 2.1 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf))
|
|
178
227
|
creator_organization_name: Anthropic
|
|
179
228
|
access: limited
|
|
180
229
|
release_date: 2023-11-21
|
|
181
230
|
tags: [ANTHROPIC_CLAUDE_2_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
182
231
|
|
|
232
|
+
- name: anthropic/claude-3-haiku-20240307
|
|
233
|
+
display_name: Claude 3 Haiku (20240307)
|
|
234
|
+
description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
|
|
235
|
+
creator_organization_name: Anthropic
|
|
236
|
+
access: limited
|
|
237
|
+
release_date: 2024-03-13 # https://www.anthropic.com/news/claude-3-haiku
|
|
238
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
239
|
+
|
|
240
|
+
- name: anthropic/claude-3-sonnet-20240229
|
|
241
|
+
display_name: Claude 3 Sonnet (20240229)
|
|
242
|
+
description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
|
|
243
|
+
creator_organization_name: Anthropic
|
|
244
|
+
access: limited
|
|
245
|
+
release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
|
|
246
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
247
|
+
|
|
248
|
+
- name: anthropic/claude-3-opus-20240229
|
|
249
|
+
display_name: Claude 3 Opus (20240229)
|
|
250
|
+
description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
|
|
251
|
+
access: limited
|
|
252
|
+
creator_organization_name: Anthropic
|
|
253
|
+
release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
|
|
254
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
255
|
+
|
|
183
256
|
# DEPRECATED: Please do not use.
|
|
184
257
|
- name: anthropic/stanford-online-all-v4-s3
|
|
185
258
|
display_name: Anthropic-LM v4-s3 (52B)
|
|
@@ -378,6 +451,52 @@ models:
|
|
|
378
451
|
release_date: 2023-09-29
|
|
379
452
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
380
453
|
|
|
454
|
+
# Craiyon
|
|
455
|
+
- name: craiyon/dalle-mini
|
|
456
|
+
display_name: DALL-E mini (0.4B)
|
|
457
|
+
description: DALL-E mini is an open-source text-to-image model that attempt to reproduce OpenAI's DALL-E 1 ([code](https://github.com/borisdayma/dalle-mini)).
|
|
458
|
+
creator_organization_name: Craiyon
|
|
459
|
+
access: open
|
|
460
|
+
num_parameters: 400000000
|
|
461
|
+
release_date: 2022-04-21
|
|
462
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
463
|
+
|
|
464
|
+
- name: craiyon/dalle-mega
|
|
465
|
+
display_name: DALL-E mega (2.6B)
|
|
466
|
+
description: DALL-E mega is an open-source text-to-image model that attempt to reproduce OpenAI's DALL-E 1 ([code](https://github.com/borisdayma/dalle-mini)).
|
|
467
|
+
creator_organization_name: Craiyon
|
|
468
|
+
access: open
|
|
469
|
+
num_parameters: 2600000000
|
|
470
|
+
release_date: 2022-04-21
|
|
471
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
472
|
+
|
|
473
|
+
# DeepFloyd
|
|
474
|
+
- name: DeepFloyd/IF-I-M-v1.0
|
|
475
|
+
display_name: DeepFloyd IF Medium (0.4B)
|
|
476
|
+
description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
|
|
477
|
+
creator_organization_name: DeepFloyd
|
|
478
|
+
access: open
|
|
479
|
+
num_parameters: 400000000
|
|
480
|
+
release_date: 2023-04-28
|
|
481
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
482
|
+
|
|
483
|
+
- name: DeepFloyd/IF-I-L-v1.0
|
|
484
|
+
display_name: DeepFloyd IF Large (0.9B)
|
|
485
|
+
description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
|
|
486
|
+
creator_organization_name: DeepFloyd
|
|
487
|
+
access: open
|
|
488
|
+
num_parameters: 900000000
|
|
489
|
+
release_date: 2023-04-28
|
|
490
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
491
|
+
|
|
492
|
+
- name: DeepFloyd/IF-I-XL-v1.0
|
|
493
|
+
display_name: DeepFloyd IF X-Large (4.3B)
|
|
494
|
+
description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
|
|
495
|
+
creator_organization_name: DeepFloyd
|
|
496
|
+
access: open
|
|
497
|
+
num_parameters: 4300000000
|
|
498
|
+
release_date: 2023-04-28
|
|
499
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
381
500
|
|
|
382
501
|
|
|
383
502
|
# Databricks
|
|
@@ -408,6 +527,14 @@ models:
|
|
|
408
527
|
release_date: 2023-04-12
|
|
409
528
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
410
529
|
|
|
530
|
+
- name: databricks/dbrx-instruct
|
|
531
|
+
display_name: DBRX Instruct
|
|
532
|
+
description: DBRX is a large language model with a fine-grained mixture-of-experts (MoE) architecture that uses 16 experts and chooses 4. It has 132B total parameters, of which 36B parameters are active on any input. ([blog post](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm))
|
|
533
|
+
creator_organization_name: Databricks
|
|
534
|
+
access: open
|
|
535
|
+
num_parameters: 132000000000
|
|
536
|
+
release_date: 2024-03-27
|
|
537
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
411
538
|
|
|
412
539
|
|
|
413
540
|
# DeepMind
|
|
@@ -430,6 +557,16 @@ models:
|
|
|
430
557
|
tags: [] # TODO: add tags
|
|
431
558
|
|
|
432
559
|
|
|
560
|
+
# Deepseek
|
|
561
|
+
- name: deepseek-ai/deepseek-llm-67b-chat
|
|
562
|
+
display_name: DeepSeek LLM Chat (67B)
|
|
563
|
+
description: DeepSeek LLM Chat is a open-source language model trained on 2 trillion tokens in both English and Chinese, and fine-tuned supervised fine-tuning (SFT) and Direct Preference Optimization (DPO). ([paper](https://arxiv.org/abs/2401.02954))
|
|
564
|
+
creator_organization_name: DeepSeek
|
|
565
|
+
access: open
|
|
566
|
+
num_parameters: 67000000000
|
|
567
|
+
release_date: 2024-01-05
|
|
568
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
569
|
+
|
|
433
570
|
|
|
434
571
|
# EleutherAI
|
|
435
572
|
- name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together.
|
|
@@ -526,6 +663,85 @@ models:
|
|
|
526
663
|
release_date: 2023-03-01 # was first announced on 2022-04 but remained private.
|
|
527
664
|
tags: [] # TODO: add tags
|
|
528
665
|
|
|
666
|
+
# Note: This is aliased to a snapshot of gemini-pro. When possible, please use a versioned snapshot instead.
|
|
667
|
+
- name: google/gemini-pro
|
|
668
|
+
display_name: Gemini Pro
|
|
669
|
+
description: Gemini Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
|
|
670
|
+
creator_organization_name: Google
|
|
671
|
+
access: limited
|
|
672
|
+
release_date: 2023-12-13
|
|
673
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
674
|
+
|
|
675
|
+
- name: google/gemini-1.0-pro-001
|
|
676
|
+
display_name: Gemini 1.0 Pro
|
|
677
|
+
description: Gemini 1.0 Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
|
|
678
|
+
creator_organization_name: Google
|
|
679
|
+
access: limited
|
|
680
|
+
release_date: 2023-12-13
|
|
681
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
682
|
+
|
|
683
|
+
# Note: This is aliased to a snapshot of gemini-pro-vision. When possible, please use a versioned snapshot instead.
|
|
684
|
+
- name: google/gemini-pro-vision
|
|
685
|
+
display_name: Gemini Pro Vision
|
|
686
|
+
description: Gemini Pro Vision is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
|
|
687
|
+
creator_organization_name: Google
|
|
688
|
+
access: limited
|
|
689
|
+
release_date: 2023-12-13
|
|
690
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG]
|
|
691
|
+
|
|
692
|
+
- name: google/gemini-1.0-pro-vision-001
|
|
693
|
+
display_name: Gemini 1.0 Pro Vision
|
|
694
|
+
description: Gemini 1.0 Pro Vision is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
|
|
695
|
+
creator_organization_name: Google
|
|
696
|
+
access: limited
|
|
697
|
+
release_date: 2023-12-13
|
|
698
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, GOOGLE_GEMINI_PRO_VISION_V1_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
699
|
+
|
|
700
|
+
- name: google/gemini-1.5-pro-preview-0409
|
|
701
|
+
display_name: Gemini 1.5 Pro (0409 preview)
|
|
702
|
+
description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. ([paper](https://arxiv.org/abs/2403.05530))
|
|
703
|
+
creator_organization_name: Google
|
|
704
|
+
access: limited
|
|
705
|
+
release_date: 2024-04-10
|
|
706
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
707
|
+
|
|
708
|
+
- name: google/gemma-2b
|
|
709
|
+
display_name: Gemma (2B)
|
|
710
|
+
# TODO: Fill in Gemma description.
|
|
711
|
+
description: TBD
|
|
712
|
+
creator_organization_name: Google
|
|
713
|
+
access: open
|
|
714
|
+
release_date: 2024-02-21
|
|
715
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
716
|
+
|
|
717
|
+
- name: google/gemma-2b-it
|
|
718
|
+
display_name: Gemma Instruct (2B)
|
|
719
|
+
# TODO: Fill in Gemma description.
|
|
720
|
+
description: TBD
|
|
721
|
+
creator_organization_name: Google
|
|
722
|
+
access: open
|
|
723
|
+
release_date: 2024-02-21
|
|
724
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
725
|
+
|
|
726
|
+
- name: google/gemma-7b
|
|
727
|
+
display_name: Gemma (7B)
|
|
728
|
+
# TODO: Fill in Gemma description.
|
|
729
|
+
description: TBD
|
|
730
|
+
creator_organization_name: Google
|
|
731
|
+
access: open
|
|
732
|
+
release_date: 2024-02-21
|
|
733
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
734
|
+
|
|
735
|
+
- name: google/gemma-7b-it
|
|
736
|
+
display_name: Gemma Instruct (7B)
|
|
737
|
+
# TODO: Fill in Gemma description.
|
|
738
|
+
description: TBD
|
|
739
|
+
creator_organization_name: Google
|
|
740
|
+
access: open
|
|
741
|
+
release_date: 2024-02-21
|
|
742
|
+
# TODO: Add OUTPUT_FORMAT_INSTRUCTIONS_TAG tag
|
|
743
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
744
|
+
|
|
529
745
|
- name: google/text-bison@001
|
|
530
746
|
display_name: PaLM-2 (Bison)
|
|
531
747
|
description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
@@ -534,6 +750,14 @@ models:
|
|
|
534
750
|
release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
|
|
535
751
|
tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
536
752
|
|
|
753
|
+
- name: google/text-bison@002
|
|
754
|
+
display_name: PaLM-2 (Bison)
|
|
755
|
+
description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
756
|
+
creator_organization_name: Google
|
|
757
|
+
access: limited
|
|
758
|
+
release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
|
|
759
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
760
|
+
|
|
537
761
|
- name: google/text-bison-32k
|
|
538
762
|
display_name: PaLM-2 (Bison)
|
|
539
763
|
description: The best value PaLM model with a 32K context. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
@@ -558,6 +782,14 @@ models:
|
|
|
558
782
|
release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
|
|
559
783
|
tags: [CODE_MODEL_TAG]
|
|
560
784
|
|
|
785
|
+
- name: google/code-bison@002
|
|
786
|
+
display_name: Codey PaLM-2 (Bison)
|
|
787
|
+
description: A model fine-tuned to generate code based on a natural language description of the desired code. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
788
|
+
creator_organization_name: Google
|
|
789
|
+
access: limited
|
|
790
|
+
release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
|
|
791
|
+
tags: [CODE_MODEL_TAG]
|
|
792
|
+
|
|
561
793
|
- name: google/code-bison-32k
|
|
562
794
|
display_name: Codey PaLM-2 (Bison)
|
|
563
795
|
description: Codey with a 32K context. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
@@ -568,55 +800,233 @@ models:
|
|
|
568
800
|
|
|
569
801
|
|
|
570
802
|
|
|
571
|
-
#
|
|
572
|
-
- name:
|
|
573
|
-
display_name:
|
|
574
|
-
description:
|
|
575
|
-
creator_organization_name:
|
|
803
|
+
# HuggingFace
|
|
804
|
+
- name: HuggingFaceM4/idefics2-8b
|
|
805
|
+
display_name: IDEFICS 2 (8B)
|
|
806
|
+
description: IDEFICS 2 (8B parameters) is an open multimodal model that accepts arbitrary sequences of image and text inputs and produces text outputs. ([blog](https://huggingface.co/blog/idefics2)).
|
|
807
|
+
creator_organization_name: HuggingFace
|
|
576
808
|
access: open
|
|
577
|
-
num_parameters:
|
|
578
|
-
release_date:
|
|
579
|
-
tags: [
|
|
809
|
+
num_parameters: 8000000000
|
|
810
|
+
release_date: 2024-04-15
|
|
811
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
580
812
|
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
# HuggingFace
|
|
584
813
|
- name: HuggingFaceM4/idefics-9b
|
|
585
814
|
display_name: IDEFICS (9B)
|
|
586
|
-
description: IDEFICS (9B parameters) is an open-source model based on DeepMind's Flamingo
|
|
815
|
+
description: IDEFICS (9B parameters) is an open-source model based on DeepMind's Flamingo ([blog](https://huggingface.co/blog/idefics)).
|
|
587
816
|
creator_organization_name: HuggingFace
|
|
588
817
|
access: open
|
|
589
818
|
num_parameters: 9000000000
|
|
590
819
|
release_date: 2023-08-22
|
|
591
|
-
tags: [VISION_LANGUAGE_MODEL_TAG]
|
|
820
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
592
821
|
|
|
593
822
|
- name: HuggingFaceM4/idefics-9b-instruct
|
|
594
|
-
display_name: IDEFICS
|
|
595
|
-
description: IDEFICS
|
|
823
|
+
display_name: IDEFICS-instruct (9B)
|
|
824
|
+
description: IDEFICS-instruct (9B parameters) is the instruction-tuned version of IDEFICS 9B ([blog](https://huggingface.co/blog/idefics)).
|
|
596
825
|
creator_organization_name: HuggingFace
|
|
597
826
|
access: open
|
|
598
827
|
num_parameters: 9000000000
|
|
599
828
|
release_date: 2023-08-22
|
|
600
|
-
tags: [VISION_LANGUAGE_MODEL_TAG]
|
|
829
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, IDEFICS_INSTRUCT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
601
830
|
|
|
602
831
|
- name: HuggingFaceM4/idefics-80b
|
|
603
832
|
display_name: IDEFICS (80B)
|
|
604
|
-
description: IDEFICS (80B parameters) is an open-source model based on DeepMind's Flamingo
|
|
833
|
+
description: IDEFICS (80B parameters) is an open-source model based on DeepMind's Flamingo ([blog](https://huggingface.co/blog/idefics)).
|
|
605
834
|
creator_organization_name: HuggingFace
|
|
606
835
|
access: open
|
|
607
836
|
num_parameters: 80000000000
|
|
608
837
|
release_date: 2023-08-22
|
|
609
|
-
tags: [VISION_LANGUAGE_MODEL_TAG]
|
|
838
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
610
839
|
|
|
611
840
|
- name: HuggingFaceM4/idefics-80b-instruct
|
|
612
|
-
display_name: IDEFICS
|
|
613
|
-
description: IDEFICS
|
|
841
|
+
display_name: IDEFICS-instruct (80B)
|
|
842
|
+
description: IDEFICS-instruct (80B parameters) is the instruction-tuned version of IDEFICS 80B ([blog](https://huggingface.co/blog/idefics)).
|
|
614
843
|
creator_organization_name: HuggingFace
|
|
615
844
|
access: open
|
|
616
845
|
num_parameters: 80000000000
|
|
617
846
|
release_date: 2023-08-22
|
|
618
|
-
tags: [VISION_LANGUAGE_MODEL_TAG]
|
|
847
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, IDEFICS_INSTRUCT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
848
|
+
|
|
849
|
+
## Text-to-Image Diffusion Models
|
|
850
|
+
- name: huggingface/dreamlike-diffusion-v1-0
|
|
851
|
+
display_name: Dreamlike Diffusion v1.0 (1B)
|
|
852
|
+
description: Dreamlike Diffusion v1.0 is Stable Diffusion v1.5 fine tuned on high quality art ([HuggingFace model card](https://huggingface.co/dreamlike-art/dreamlike-diffusion-1.0))
|
|
853
|
+
creator_organization_name: dreamlike.art
|
|
854
|
+
access: open
|
|
855
|
+
num_parameters: 1000000000
|
|
856
|
+
release_date: 2023-03-08
|
|
857
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
858
|
+
|
|
859
|
+
- name: huggingface/dreamlike-photoreal-v2-0
|
|
860
|
+
display_name: Dreamlike Photoreal v2.0 (1B)
|
|
861
|
+
description: Dreamlike Photoreal v2.0 is a photorealistic model based on Stable Diffusion v1.5 ([HuggingFace model card](https://huggingface.co/dreamlike-art/dreamlike-photoreal-2.0))
|
|
862
|
+
creator_organization_name: dreamlike.art
|
|
863
|
+
access: open
|
|
864
|
+
num_parameters: 1000000000
|
|
865
|
+
release_date: 2022-11-23
|
|
866
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
867
|
+
|
|
868
|
+
- name: huggingface/openjourney-v1-0
|
|
869
|
+
display_name: Openjourney (1B)
|
|
870
|
+
description: Openjourney is an open source Stable Diffusion fine tuned model on Midjourney images ([HuggingFace model card](https://huggingface.co/prompthero/openjourney))
|
|
871
|
+
creator_organization_name: PromptHero
|
|
872
|
+
access: open
|
|
873
|
+
num_parameters: 1000000000
|
|
874
|
+
release_date: 2022-11-01 # TODO: get the exact date
|
|
875
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
876
|
+
|
|
877
|
+
- name: huggingface/openjourney-v2-0
|
|
878
|
+
display_name: Openjourney v2 (1B)
|
|
879
|
+
description: Openjourney v2 is an open source Stable Diffusion fine tuned model on Midjourney images. Openjourney v2 is now referred to as Openjourney v4 in Hugging Face ([HuggingFace model card](https://huggingface.co/prompthero/openjourney-v4)).
|
|
880
|
+
creator_organization_name: PromptHero
|
|
881
|
+
access: open
|
|
882
|
+
num_parameters: 1000000000
|
|
883
|
+
release_date: 2023-01-01 # TODO: get the exact date
|
|
884
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
885
|
+
|
|
886
|
+
- name: huggingface/promptist-stable-diffusion-v1-4
|
|
887
|
+
display_name: Promptist + Stable Diffusion v1.4 (1B)
|
|
888
|
+
description: Trained with human preferences, Promptist optimizes user input into model-preferred prompts for Stable Diffusion v1.4 ([paper](https://arxiv.org/abs/2212.09611))
|
|
889
|
+
creator_organization_name: Microsoft
|
|
890
|
+
access: open
|
|
891
|
+
num_parameters: 1000000000
|
|
892
|
+
release_date: 2022-12-19
|
|
893
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
894
|
+
|
|
895
|
+
- name: huggingface/redshift-diffusion
|
|
896
|
+
display_name: Redshift Diffusion (1B)
|
|
897
|
+
description: Redshift Diffusion is an open source Stable Diffusion model fine tuned on high resolution 3D artworks ([HuggingFace model card](https://huggingface.co/nitrosocke/redshift-diffusion))
|
|
898
|
+
creator_organization_name: nitrosocke
|
|
899
|
+
access: open
|
|
900
|
+
num_parameters: 1000000000
|
|
901
|
+
release_date: 2022-11-29
|
|
902
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
903
|
+
|
|
904
|
+
- name: huggingface/stable-diffusion-safe-weak
|
|
905
|
+
display_name: Safe Stable Diffusion weak (1B)
|
|
906
|
+
description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105)).
|
|
907
|
+
creator_organization_name: TU Darmstadt
|
|
908
|
+
access: open
|
|
909
|
+
num_parameters: 1000000000
|
|
910
|
+
release_date: 2022-11-09
|
|
911
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
912
|
+
|
|
913
|
+
- name: huggingface/stable-diffusion-safe-medium
|
|
914
|
+
display_name: Safe Stable Diffusion medium (1B)
|
|
915
|
+
description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
|
|
916
|
+
creator_organization_name: TU Darmstadt
|
|
917
|
+
access: open
|
|
918
|
+
num_parameters: 1000000000
|
|
919
|
+
release_date: 2022-11-09
|
|
920
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
921
|
+
|
|
922
|
+
- name: huggingface/stable-diffusion-safe-strong
|
|
923
|
+
display_name: Safe Stable Diffusion strong (1B)
|
|
924
|
+
description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
|
|
925
|
+
creator_organization_name: TU Darmstadt
|
|
926
|
+
access: open
|
|
927
|
+
num_parameters: 1000000000
|
|
928
|
+
release_date: 2022-11-09
|
|
929
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
930
|
+
|
|
931
|
+
- name: huggingface/stable-diffusion-safe-max
|
|
932
|
+
display_name: Safe Stable Diffusion max (1B)
|
|
933
|
+
description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
|
|
934
|
+
creator_organization_name: TU Darmstadt
|
|
935
|
+
access: open
|
|
936
|
+
num_parameters: 1000000000
|
|
937
|
+
release_date: 2022-11-09
|
|
938
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
939
|
+
|
|
940
|
+
- name: huggingface/stable-diffusion-v1-4
|
|
941
|
+
display_name: Stable Diffusion v1.4 (1B)
|
|
942
|
+
description: Stable Diffusion v1.4 is a latent text-to-image diffusion model capable of generating photorealistic images given any text input ([paper](https://arxiv.org/abs/2112.10752))
|
|
943
|
+
creator_organization_name: Ludwig Maximilian University of Munich CompVis
|
|
944
|
+
access: open
|
|
945
|
+
num_parameters: 1000000000
|
|
946
|
+
release_date: 2022-08-01
|
|
947
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
948
|
+
|
|
949
|
+
- name: huggingface/stable-diffusion-v1-5
|
|
950
|
+
display_name: Stable Diffusion v1.5 (1B)
|
|
951
|
+
description: The Stable-Diffusion-v1-5 checkpoint was initialized with the weights of the Stable-Diffusion-v1-2 checkpoint and subsequently fine-tuned on 595k steps at resolution 512x512 on laion-aesthetics v2 5+ and 10% dropping of the text-conditioning to improve classifier-free guidance sampling ([paper](https://arxiv.org/abs/2112.10752))
|
|
952
|
+
creator_organization_name: Runway
|
|
953
|
+
access: open
|
|
954
|
+
num_parameters: 1000000000
|
|
955
|
+
release_date: 2022-10-20
|
|
956
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
957
|
+
|
|
958
|
+
- name: huggingface/stable-diffusion-v2-base
|
|
959
|
+
display_name: Stable Diffusion v2 base (1B)
|
|
960
|
+
description: The model is trained from scratch 550k steps at resolution 256x256 on a subset of LAION-5B filtered for explicit pornographic material, using the LAION-NSFW classifier with punsafe=0.1 and an aesthetic score greater than 4.5. Then it is further trained for 850k steps at resolution 512x512 on the same dataset on images with resolution greater than 512x512 ([paper](https://arxiv.org/abs/2112.10752))
|
|
961
|
+
creator_organization_name: Stability AI
|
|
962
|
+
access: open
|
|
963
|
+
num_parameters: 1000000000
|
|
964
|
+
release_date: 2022-11-23
|
|
965
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
966
|
+
|
|
967
|
+
- name: huggingface/stable-diffusion-v2-1-base
|
|
968
|
+
display_name: Stable Diffusion v2.1 base (1B)
|
|
969
|
+
description: This stable-diffusion-2-1-base model fine-tunes stable-diffusion-2-base with 220k extra steps taken, with punsafe=0.98 on the same dataset ([paper](https://arxiv.org/abs/2112.10752))
|
|
970
|
+
creator_organization_name: Stability AI
|
|
971
|
+
access: open
|
|
972
|
+
num_parameters: 1000000000
|
|
973
|
+
release_date: 2022-11-23
|
|
974
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
975
|
+
|
|
976
|
+
- name: huggingface/vintedois-diffusion-v0-1
|
|
977
|
+
display_name: Vintedois (22h) Diffusion model v0.1 (1B)
|
|
978
|
+
description: Vintedois (22h) Diffusion model v0.1 is Stable Diffusion v1.5 that was finetuned on a large amount of high quality images with simple prompts to generate beautiful images without a lot of prompt engineering ([HuggingFace model card](https://huggingface.co/22h/vintedois-diffusion-v0-1))
|
|
979
|
+
creator_organization_name: 22 Hours
|
|
980
|
+
access: open
|
|
981
|
+
num_parameters: 1000000000
|
|
982
|
+
release_date: 2022-12-27
|
|
983
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
619
984
|
|
|
985
|
+
- name: segmind/Segmind-Vega
|
|
986
|
+
display_name: Segmind Stable Diffusion (0.74B)
|
|
987
|
+
description: The Segmind-Vega Model is a distilled version of the Stable Diffusion XL (SDXL), offering a remarkable 70% reduction in size and an impressive 100% speedup while retaining high-quality text-to-image generation capabilities. Trained on diverse datasets, including Grit and Midjourney scrape data, it excels at creating a wide range of visual content based on textual prompts. ([HuggingFace model card](https://huggingface.co/segmind/Segmind-Vega))
|
|
988
|
+
creator_organization_name: Segmind
|
|
989
|
+
access: open
|
|
990
|
+
num_parameters: 740000000
|
|
991
|
+
release_date: 2023-12-01
|
|
992
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
993
|
+
|
|
994
|
+
- name: segmind/SSD-1B
|
|
995
|
+
display_name: Segmind Stable Diffusion (1B)
|
|
996
|
+
description: The Segmind Stable Diffusion Model (SSD-1B) is a distilled 50% smaller version of the Stable Diffusion XL (SDXL), offering a 60% speedup while maintaining high-quality text-to-image generation capabilities. It has been trained on diverse datasets, including Grit and Midjourney scrape data, to enhance its ability to create a wide range of visual content based on textual prompts. ([HuggingFace model card](https://huggingface.co/segmind/SSD-1B))
|
|
997
|
+
creator_organization_name: Segmind
|
|
998
|
+
access: open
|
|
999
|
+
num_parameters: 1000000000
|
|
1000
|
+
release_date: 2023-10-20
|
|
1001
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1002
|
+
|
|
1003
|
+
- name: stabilityai/stable-diffusion-xl-base-1.0
|
|
1004
|
+
display_name: Stable Diffusion XL
|
|
1005
|
+
description: Stable Diffusion XL (SDXL) consists of an ensemble of experts pipeline for latent diffusion. ([HuggingFace model card](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0))
|
|
1006
|
+
creator_organization_name: Stability AI
|
|
1007
|
+
access: open
|
|
1008
|
+
num_parameters: 6600000000
|
|
1009
|
+
release_date: 2023-07-26
|
|
1010
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1011
|
+
|
|
1012
|
+
# Kakao
|
|
1013
|
+
- name: kakaobrain/mindall-e
|
|
1014
|
+
display_name: minDALL-E (1.3B)
|
|
1015
|
+
description: minDALL-E, named after minGPT, is an autoregressive text-to-image generation model trained on 14 million image-text pairs ([code](https://github.com/kakaobrain/minDALL-E))
|
|
1016
|
+
creator_organization_name: Kakao
|
|
1017
|
+
access: open
|
|
1018
|
+
num_parameters: 1300000000
|
|
1019
|
+
release_date: 2021-12-13
|
|
1020
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1021
|
+
|
|
1022
|
+
# Lexica
|
|
1023
|
+
- name: lexica/search-stable-diffusion-1.5
|
|
1024
|
+
display_name: Lexica Search with Stable Diffusion v1.5 (1B)
|
|
1025
|
+
description: Retrieves Stable Diffusion v1.5 images Lexica users generated ([docs](https://lexica.art/docs)).
|
|
1026
|
+
creator_organization_name: Lexica
|
|
1027
|
+
access: open
|
|
1028
|
+
release_date: 2023-01-01
|
|
1029
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
620
1030
|
|
|
621
1031
|
|
|
622
1032
|
# Lightning AI
|
|
@@ -791,6 +1201,41 @@ models:
|
|
|
791
1201
|
# TODO(#1828): Upgrade to FULL_FUNCTIONALITY_TEXT_MODEL_TAG
|
|
792
1202
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
793
1203
|
|
|
1204
|
+
- name: meta/llama-3-8b
|
|
1205
|
+
display_name: Llama 3 (8B)
|
|
1206
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability.
|
|
1207
|
+
creator_organization_name: Meta
|
|
1208
|
+
access: open
|
|
1209
|
+
num_parameters: 8000000000
|
|
1210
|
+
release_date: 2024-04-18
|
|
1211
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1212
|
+
|
|
1213
|
+
- name: meta/llama-3-70b
|
|
1214
|
+
display_name: Llama 3 (70B)
|
|
1215
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability.
|
|
1216
|
+
creator_organization_name: Meta
|
|
1217
|
+
access: open
|
|
1218
|
+
num_parameters: 70000000000
|
|
1219
|
+
release_date: 2024-04-18
|
|
1220
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1221
|
+
|
|
1222
|
+
- name: meta/llama-3-8b-chat
|
|
1223
|
+
display_name: Llama 3 Chat (8B)
|
|
1224
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training.
|
|
1225
|
+
creator_organization_name: Meta
|
|
1226
|
+
access: open
|
|
1227
|
+
num_parameters: 8000000000
|
|
1228
|
+
release_date: 2024-04-18
|
|
1229
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1230
|
+
|
|
1231
|
+
- name: meta/llama-3-70b-chat
|
|
1232
|
+
display_name: Llama 3 Chat (70B)
|
|
1233
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training.
|
|
1234
|
+
creator_organization_name: Meta
|
|
1235
|
+
access: open
|
|
1236
|
+
num_parameters: 70000000000
|
|
1237
|
+
release_date: 2024-04-18
|
|
1238
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
794
1239
|
|
|
795
1240
|
|
|
796
1241
|
# Microsoft/NVIDIA
|
|
@@ -801,7 +1246,7 @@ models:
|
|
|
801
1246
|
access: closed
|
|
802
1247
|
num_parameters: 530000000000
|
|
803
1248
|
release_date: 2022-01-28
|
|
804
|
-
tags: [
|
|
1249
|
+
tags: [] # deprecated text model
|
|
805
1250
|
|
|
806
1251
|
- name: microsoft/TNLGv2_7B
|
|
807
1252
|
display_name: TNLG v2 (6.7B)
|
|
@@ -810,7 +1255,79 @@ models:
|
|
|
810
1255
|
access: closed
|
|
811
1256
|
num_parameters: 6700000000
|
|
812
1257
|
release_date: 2022-01-28
|
|
813
|
-
tags: [
|
|
1258
|
+
tags: [] # deprecated text model
|
|
1259
|
+
|
|
1260
|
+
- name: microsoft/llava-1.5-7b-hf
|
|
1261
|
+
display_name: LLaVA 1.5 (7B)
|
|
1262
|
+
description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
|
|
1263
|
+
creator_organization_name: Microsoft
|
|
1264
|
+
access: open
|
|
1265
|
+
num_parameters: 7000000000
|
|
1266
|
+
release_date: 2023-10-05
|
|
1267
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1268
|
+
|
|
1269
|
+
- name: microsoft/llava-1.5-13b-hf
|
|
1270
|
+
display_name: LLaVA 1.5 (13B)
|
|
1271
|
+
description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
|
|
1272
|
+
creator_organization_name: Microsoft
|
|
1273
|
+
access: open
|
|
1274
|
+
num_parameters: 13000000000
|
|
1275
|
+
release_date: 2023-10-05
|
|
1276
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1277
|
+
|
|
1278
|
+
- name: uw-madison/llava-v1.6-vicuna-7b-hf
|
|
1279
|
+
display_name: LLaVA 1.6 (7B)
|
|
1280
|
+
description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
|
|
1281
|
+
creator_organization_name: Microsoft
|
|
1282
|
+
access: open
|
|
1283
|
+
num_parameters: 7000000000
|
|
1284
|
+
release_date: 2024-01-01
|
|
1285
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1286
|
+
|
|
1287
|
+
- name: uw-madison/llava-v1.6-vicuna-13b-hf
|
|
1288
|
+
display_name: LLaVA 1.6 (13B)
|
|
1289
|
+
description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
|
|
1290
|
+
creator_organization_name: Microsoft
|
|
1291
|
+
access: open
|
|
1292
|
+
num_parameters: 13000000000
|
|
1293
|
+
release_date: 2024-01-01
|
|
1294
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1295
|
+
|
|
1296
|
+
- name: uw-madison/llava-v1.6-mistral-7b-hf
|
|
1297
|
+
display_name: LLaVA 1.6 + Mistral (7B)
|
|
1298
|
+
description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
|
|
1299
|
+
creator_organization_name: Microsoft
|
|
1300
|
+
access: open
|
|
1301
|
+
num_parameters: 7000000000
|
|
1302
|
+
release_date: 2024-01-01
|
|
1303
|
+
tags: [ VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG ]
|
|
1304
|
+
|
|
1305
|
+
- name: uw-madison/llava-v1.6-34b-hf
|
|
1306
|
+
display_name: LLaVA + Nous-Hermes-2-Yi-34B (34B)
|
|
1307
|
+
description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
|
|
1308
|
+
creator_organization_name: Microsoft
|
|
1309
|
+
access: open
|
|
1310
|
+
num_parameters: 34000000000
|
|
1311
|
+
release_date: 2024-01-01
|
|
1312
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1313
|
+
|
|
1314
|
+
- name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
|
|
1315
|
+
display_name: OpenFlamingo (9B)
|
|
1316
|
+
description: OpenFlamingo is an open source implementation of DeepMind's Flamingo models. This 9B-parameter model uses a CLIP ViT-L/14 vision encoder and MPT-7B language model ([paper](https://arxiv.org/abs/2308.01390)).
|
|
1317
|
+
creator_organization_name: OpenFlamingo
|
|
1318
|
+
access: open
|
|
1319
|
+
num_parameters: 9000000000
|
|
1320
|
+
release_date: 2023-08-02
|
|
1321
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, OPEN_FLAMINGO_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1322
|
+
|
|
1323
|
+
- name: microsoft/phi-2
|
|
1324
|
+
display_name: Phi-2
|
|
1325
|
+
description: Phi-2 is a Transformer with 2.7 billion parameters. It was trained using the same data sources as Phi-1.5, augmented with a new data source that consists of various NLP synthetic texts and filtered websites (for safety and educational value)
|
|
1326
|
+
creator_organization_name: Microsoft
|
|
1327
|
+
access: open
|
|
1328
|
+
num_parameters: 13000000000
|
|
1329
|
+
release_date: 2023-10-05
|
|
1330
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
814
1331
|
|
|
815
1332
|
|
|
816
1333
|
|
|
@@ -831,6 +1348,52 @@ models:
|
|
|
831
1348
|
num_parameters: 34000000000
|
|
832
1349
|
release_date: 2023-11-02
|
|
833
1350
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1351
|
+
- name: 01-ai/yi-6b-chat
|
|
1352
|
+
display_name: Yi Chat (6B)
|
|
1353
|
+
description: The Yi models are large language models trained from scratch by developers at 01.AI.
|
|
1354
|
+
creator_organization_name: 01.AI
|
|
1355
|
+
access: open
|
|
1356
|
+
num_parameters: 6000000000
|
|
1357
|
+
release_date: 2023-11-23
|
|
1358
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1359
|
+
- name: 01-ai/yi-34b-chat
|
|
1360
|
+
display_name: Yi Chat (34B)
|
|
1361
|
+
description: The Yi models are large language models trained from scratch by developers at 01.AI.
|
|
1362
|
+
creator_organization_name: 01.AI
|
|
1363
|
+
access: open
|
|
1364
|
+
num_parameters: 34000000000
|
|
1365
|
+
release_date: 2023-11-23
|
|
1366
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1367
|
+
|
|
1368
|
+
# Allen Institute for AI
|
|
1369
|
+
# OLMo Blog: https://blog.allenai.org/olmo-open-language-model-87ccfc95f580
|
|
1370
|
+
- name: allenai/olmo-7b
|
|
1371
|
+
display_name: OLMo (7B)
|
|
1372
|
+
description: OLMo is a series of Open Language Models trained on the Dolma dataset.
|
|
1373
|
+
creator_organization_name: Allen Institute for AI
|
|
1374
|
+
access: open
|
|
1375
|
+
num_parameters: 7000000000
|
|
1376
|
+
release_date: 2024-02-01
|
|
1377
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1378
|
+
|
|
1379
|
+
- name: allenai/olmo-7b-twin-2t
|
|
1380
|
+
display_name: OLMo (7B Twin 2T)
|
|
1381
|
+
description: OLMo is a series of Open Language Models trained on the Dolma dataset.
|
|
1382
|
+
creator_organization_name: Allen Institute for AI
|
|
1383
|
+
access: open
|
|
1384
|
+
num_parameters: 7000000000
|
|
1385
|
+
release_date: 2024-02-01
|
|
1386
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1387
|
+
|
|
1388
|
+
- name: allenai/olmo-7b-instruct
|
|
1389
|
+
display_name: OLMo (7B Instruct)
|
|
1390
|
+
description: OLMo is a series of Open Language Models trained on the Dolma dataset. The instruct versions was trained on the Tulu SFT mixture and a cleaned version of the UltraFeedback dataset.
|
|
1391
|
+
creator_organization_name: Allen Institute for AI
|
|
1392
|
+
access: open
|
|
1393
|
+
num_parameters: 7000000000
|
|
1394
|
+
release_date: 2024-02-01
|
|
1395
|
+
# TODO: Add instruct tag.
|
|
1396
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
834
1397
|
|
|
835
1398
|
|
|
836
1399
|
# Mistral AI
|
|
@@ -841,17 +1404,81 @@ models:
|
|
|
841
1404
|
access: open
|
|
842
1405
|
num_parameters: 7300000000
|
|
843
1406
|
release_date: 2023-09-27
|
|
844
|
-
tags: [TEXT_MODEL_TAG,
|
|
1407
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
845
1408
|
|
|
846
1409
|
- name: mistralai/mixtral-8x7b-32kseqlen
|
|
847
1410
|
display_name: Mixtral (8x7B 32K seqlen)
|
|
848
1411
|
description: Mistral AI's mixture-of-experts model ([tweet](https://twitter.com/MistralAI/status/1733150512395038967)).
|
|
849
1412
|
creator_organization_name: Mistral AI
|
|
850
1413
|
access: open
|
|
851
|
-
num_parameters:
|
|
1414
|
+
num_parameters: 46700000000
|
|
852
1415
|
release_date: 2023-12-08
|
|
853
|
-
tags: [TEXT_MODEL_TAG,
|
|
1416
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
854
1417
|
|
|
1418
|
+
- name: mistralai/mixtral-8x7b-instruct-v0.1
|
|
1419
|
+
display_name: Mixtral (8x7B Instruct)
|
|
1420
|
+
description: Mixtral (8x7B Instruct) is a version of Mixtral (8x7B) that was optimized through supervised fine-tuning and direct preference optimisation (DPO) for careful instruction following.
|
|
1421
|
+
creator_organization_name: Mistral AI
|
|
1422
|
+
access: open
|
|
1423
|
+
num_parameters: 46700000000
|
|
1424
|
+
# Blog post: https://mistral.ai/news/mixtral-of-experts/
|
|
1425
|
+
release_date: 2023-12-11
|
|
1426
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
|
|
1427
|
+
|
|
1428
|
+
- name: mistralai/mixtral-8x22b
|
|
1429
|
+
display_name: Mixtral (8x22B)
|
|
1430
|
+
description: Mistral AI's mixture-of-experts model that uses 39B active parameters out of 141B ([blog post](https://mistral.ai/news/mixtral-8x22b/)).
|
|
1431
|
+
creator_organization_name: Mistral AI
|
|
1432
|
+
access: open
|
|
1433
|
+
num_parameters: 176000000000
|
|
1434
|
+
release_date: 2024-04-10
|
|
1435
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1436
|
+
|
|
1437
|
+
- name: mistralai/mixtral-8x22b-instruct-v0.1
|
|
1438
|
+
display_name: Mixtral Instruct (8x22B)
|
|
1439
|
+
description: Mistral AI's mixture-of-experts model that uses 39B active parameters out of 141B ([blog post](https://mistral.ai/news/mixtral-8x22b/)).
|
|
1440
|
+
creator_organization_name: Mistral AI
|
|
1441
|
+
access: open
|
|
1442
|
+
num_parameters: 176000000000
|
|
1443
|
+
release_date: 2024-04-10
|
|
1444
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1445
|
+
|
|
1446
|
+
- name: mistralai/bakLlava-v1-hf
|
|
1447
|
+
display_name: BakLLaVA v1 (7B)
|
|
1448
|
+
description: BakLLaVA v1 is a Mistral 7B base augmented with the LLaVA 1.5 architecture. ([blog](https://huggingface.co/llava-hf/bakLlava-v1-hf))
|
|
1449
|
+
creator_organization_name: Mistral AI
|
|
1450
|
+
access: open
|
|
1451
|
+
num_parameters: 7000000000
|
|
1452
|
+
release_date: 2023-10-16
|
|
1453
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1454
|
+
|
|
1455
|
+
- name: mistralai/mistral-small-2402
|
|
1456
|
+
display_name: Mistral Small (2402)
|
|
1457
|
+
# TODO: Fill in description
|
|
1458
|
+
description: TBD
|
|
1459
|
+
creator_organization_name: Mistral AI
|
|
1460
|
+
access: limited
|
|
1461
|
+
# Blog post: https://mistral.ai/news/mistral-large/
|
|
1462
|
+
release_date: 2023-02-26
|
|
1463
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
|
|
1464
|
+
|
|
1465
|
+
- name: mistralai/mistral-medium-2312
|
|
1466
|
+
display_name: Mistral Medium (2312)
|
|
1467
|
+
description: Mistral is a transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA).
|
|
1468
|
+
creator_organization_name: Mistral AI
|
|
1469
|
+
access: limited
|
|
1470
|
+
release_date: 2023-12-11
|
|
1471
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
|
|
1472
|
+
|
|
1473
|
+
- name: mistralai/mistral-large-2402
|
|
1474
|
+
display_name: Mistral Large (2402)
|
|
1475
|
+
# TODO: Fill in description
|
|
1476
|
+
description: TBD
|
|
1477
|
+
creator_organization_name: Mistral AI
|
|
1478
|
+
access: limited
|
|
1479
|
+
# Blog post: https://mistral.ai/news/mistral-large/
|
|
1480
|
+
release_date: 2023-02-26
|
|
1481
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
|
|
855
1482
|
|
|
856
1483
|
|
|
857
1484
|
# MosaicML
|
|
@@ -950,6 +1577,23 @@ models:
|
|
|
950
1577
|
|
|
951
1578
|
## GPT 3 Models
|
|
952
1579
|
# The list of models can be found here: https://beta.openai.com/docs/engines/gpt-3
|
|
1580
|
+
|
|
1581
|
+
- name: openai/davinci-002
|
|
1582
|
+
display_name: davinci-002
|
|
1583
|
+
description: Replacement for the GPT-3 curie and davinci base models.
|
|
1584
|
+
creator_organization_name: OpenAI
|
|
1585
|
+
access: limited
|
|
1586
|
+
release_date: 2023-08-22
|
|
1587
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1588
|
+
|
|
1589
|
+
- name: openai/babbage-002
|
|
1590
|
+
display_name: babbage-002
|
|
1591
|
+
description: Replacement for the GPT-3 ada and babbage base models.
|
|
1592
|
+
creator_organization_name: OpenAI
|
|
1593
|
+
access: limited
|
|
1594
|
+
release_date: 2023-08-22
|
|
1595
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1596
|
+
|
|
953
1597
|
# DEPRECATED: Announced on July 06 2023 that these models will be shut down on January 04 2024.
|
|
954
1598
|
|
|
955
1599
|
- name: openai/davinci # DEPRECATED
|
|
@@ -1048,9 +1692,17 @@ models:
|
|
|
1048
1692
|
## GPT 3.5 Turbo Models
|
|
1049
1693
|
# ChatGPT: https://openai.com/blog/chatgpt
|
|
1050
1694
|
|
|
1695
|
+
- name: openai/gpt-3.5-turbo-instruct
|
|
1696
|
+
display_name: GPT-3.5 Turbo Instruct
|
|
1697
|
+
description: Similar capabilities as GPT-3 era models. Compatible with legacy Completions endpoint and not Chat Completions.
|
|
1698
|
+
creator_organization_name: OpenAI
|
|
1699
|
+
access: limited
|
|
1700
|
+
release_date: 2023-09-18
|
|
1701
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1702
|
+
|
|
1051
1703
|
- name: openai/gpt-3.5-turbo-0301
|
|
1052
1704
|
display_name: GPT-3.5 Turbo (0301)
|
|
1053
|
-
description: Sibling model of text-davinci-003 is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-03-01.
|
|
1705
|
+
description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-03-01.
|
|
1054
1706
|
creator_organization_name: OpenAI
|
|
1055
1707
|
access: limited
|
|
1056
1708
|
release_date: 2023-03-01
|
|
@@ -1058,17 +1710,36 @@ models:
|
|
|
1058
1710
|
|
|
1059
1711
|
- name: openai/gpt-3.5-turbo-0613
|
|
1060
1712
|
display_name: GPT-3.5 Turbo (0613)
|
|
1061
|
-
description: Sibling model of text-davinci-003 is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13.
|
|
1713
|
+
description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13.
|
|
1714
|
+
creator_organization_name: OpenAI
|
|
1715
|
+
access: limited
|
|
1716
|
+
release_date: 2023-06-13
|
|
1717
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1718
|
+
|
|
1719
|
+
- name: openai/gpt-3.5-turbo-1106
|
|
1720
|
+
display_name: GPT-3.5 Turbo (1106)
|
|
1721
|
+
description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-11-06.
|
|
1722
|
+
creator_organization_name: OpenAI
|
|
1723
|
+
access: limited
|
|
1724
|
+
# Actual release blog post was published on 2024-01-25:
|
|
1725
|
+
# https://openai.com/blog/new-embedding-models-and-api-updates
|
|
1726
|
+
release_date: 2024-01-25
|
|
1727
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1728
|
+
|
|
1729
|
+
- name: openai/gpt-3.5-turbo-0125
|
|
1730
|
+
display_name: gpt-3.5-turbo-0125
|
|
1731
|
+
description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2024-01-25.
|
|
1062
1732
|
creator_organization_name: OpenAI
|
|
1063
1733
|
access: limited
|
|
1734
|
+
# Release blog post was published on 2024-01-25:
|
|
1735
|
+
# https://openai.com/blog/new-embedding-models-and-api-updates
|
|
1736
|
+
# The actual release date is unclear - it was described as "next week".
|
|
1064
1737
|
release_date: 2023-06-13
|
|
1065
1738
|
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1066
1739
|
|
|
1067
|
-
# Claimed length is 16,384; we round down to 16,000 for the same reasons as explained
|
|
1068
|
-
# in the openai/gpt-3.5-turbo-0613 comment
|
|
1069
1740
|
- name: openai/gpt-3.5-turbo-16k-0613
|
|
1070
1741
|
display_name: gpt-3.5-turbo-16k-0613
|
|
1071
|
-
description: Sibling model of text-davinci-003 is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13 with a longer context length of 16,384 tokens.
|
|
1742
|
+
description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13 with a longer context length of 16,384 tokens.
|
|
1072
1743
|
creator_organization_name: OpenAI
|
|
1073
1744
|
access: limited
|
|
1074
1745
|
release_date: 2023-06-13
|
|
@@ -1079,7 +1750,7 @@ models:
|
|
|
1079
1750
|
|
|
1080
1751
|
- name: openai/gpt-4-1106-preview
|
|
1081
1752
|
display_name: GPT-4 Turbo (1106 preview)
|
|
1082
|
-
description: GPT-4 Turbo (preview) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Preview snapshot from
|
|
1753
|
+
description: GPT-4 Turbo (preview) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Preview snapshot from 2023-11-06.
|
|
1083
1754
|
creator_organization_name: OpenAI
|
|
1084
1755
|
access: limited
|
|
1085
1756
|
release_date: 2023-11-06
|
|
@@ -1087,7 +1758,7 @@ models:
|
|
|
1087
1758
|
|
|
1088
1759
|
- name: openai/gpt-4-0314
|
|
1089
1760
|
display_name: GPT-4 (0314)
|
|
1090
|
-
description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 from
|
|
1761
|
+
description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 from 2023-03-14.
|
|
1091
1762
|
creator_organization_name: OpenAI
|
|
1092
1763
|
access: limited
|
|
1093
1764
|
release_date: 2023-03-14
|
|
@@ -1117,6 +1788,40 @@ models:
|
|
|
1117
1788
|
release_date: 2023-06-13
|
|
1118
1789
|
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1119
1790
|
|
|
1791
|
+
- name: openai/gpt-4-0125-preview
|
|
1792
|
+
display_name: GPT-4 Turbo (0125 preview)
|
|
1793
|
+
description: GPT-4 Turbo (preview) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Preview snapshot from 2023-01-25. This snapshot is intended to reduce cases of “laziness” where the model doesn’t complete a task.
|
|
1794
|
+
creator_organization_name: OpenAI
|
|
1795
|
+
access: limited
|
|
1796
|
+
# Actual release blog post was published on 2024-01-25:
|
|
1797
|
+
# https://openai.com/blog/new-embedding-models-and-api-updates
|
|
1798
|
+
release_date: 2024-01-25
|
|
1799
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1800
|
+
|
|
1801
|
+
- name: openai/gpt-4-turbo-2024-04-09
|
|
1802
|
+
display_name: GPT-4 Turbo (2024-04-09)
|
|
1803
|
+
description: GPT-4 Turbo (2024-04-09) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Snapshot from 2024-04-09.
|
|
1804
|
+
creator_organization_name: OpenAI
|
|
1805
|
+
access: limited
|
|
1806
|
+
release_date: 2024-04-09
|
|
1807
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1808
|
+
|
|
1809
|
+
- name: openai/gpt-4-vision-preview
|
|
1810
|
+
# According to https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4, this model has pointed gpt-4-1106-vision-preview.
|
|
1811
|
+
display_name: GPT-4V (1106 preview)
|
|
1812
|
+
description: GPT-4V is a large multimodal model that accepts both text and images and is optimized for chat ([model card](https://openai.com/research/gpt-4v-system-card)).
|
|
1813
|
+
creator_organization_name: OpenAI
|
|
1814
|
+
access: limited
|
|
1815
|
+
release_date: 2023-11-06
|
|
1816
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
1817
|
+
|
|
1818
|
+
- name: openai/gpt-4-1106-vision-preview
|
|
1819
|
+
display_name: GPT-4V (1106 preview)
|
|
1820
|
+
description: GPT-4V is a large multimodal model that accepts both text and images and is optimized for chat ([model card](https://openai.com/research/gpt-4v-system-card)).
|
|
1821
|
+
creator_organization_name: OpenAI
|
|
1822
|
+
access: limited
|
|
1823
|
+
release_date: 2023-11-06
|
|
1824
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
1120
1825
|
|
|
1121
1826
|
## Codex Models
|
|
1122
1827
|
# DEPRECATED: Codex models have been shut down on March 23 2023.
|
|
@@ -1198,7 +1903,109 @@ models:
|
|
|
1198
1903
|
release_date: 2022-12-15 # Blog post date
|
|
1199
1904
|
tags: [TEXT_SIMILARITY_MODEL_TAG]
|
|
1200
1905
|
|
|
1906
|
+
# Text-to-image models
|
|
1907
|
+
- name: openai/dall-e-2
|
|
1908
|
+
display_name: DALL-E 2 (3.5B)
|
|
1909
|
+
description: DALL-E 2 is a encoder-decoder-based latent diffusion model trained on large-scale paired text-image datasets. The model is available via the OpenAI API ([paper](https://arxiv.org/abs/2204.06125)).
|
|
1910
|
+
creator_organization_name: OpenAI
|
|
1911
|
+
access: limited
|
|
1912
|
+
num_parameters: 3500000000
|
|
1913
|
+
release_date: 2022-04-13
|
|
1914
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1201
1915
|
|
|
1916
|
+
- name: openai/dall-e-3
|
|
1917
|
+
display_name: DALL-E 3
|
|
1918
|
+
description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The default style, vivid, causes the model to lean towards generating hyper-real and dramatic images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
|
|
1919
|
+
creator_organization_name: OpenAI
|
|
1920
|
+
access: limited
|
|
1921
|
+
num_parameters: 0
|
|
1922
|
+
release_date: 2023-11-06
|
|
1923
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1924
|
+
|
|
1925
|
+
- name: openai/dall-e-3-natural
|
|
1926
|
+
display_name: DALL-E 3 (natural style)
|
|
1927
|
+
description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The natural style causes the model to produce more natural, less hyper-real looking images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
|
|
1928
|
+
creator_organization_name: OpenAI
|
|
1929
|
+
access: limited
|
|
1930
|
+
num_parameters: 0
|
|
1931
|
+
release_date: 2023-11-06
|
|
1932
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1933
|
+
|
|
1934
|
+
- name: openai/dall-e-3-hd
|
|
1935
|
+
display_name: DALL-E 3 HD
|
|
1936
|
+
description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The HD version creates images with finer details and greater consistency across the image, but generation is slower. The default style, vivid, causes the model to lean towards generating hyper-real and dramatic images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
|
|
1937
|
+
creator_organization_name: OpenAI
|
|
1938
|
+
access: limited
|
|
1939
|
+
num_parameters: 0
|
|
1940
|
+
release_date: 2023-11-06
|
|
1941
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1942
|
+
|
|
1943
|
+
- name: openai/dall-e-3-hd-natural
|
|
1944
|
+
display_name: DALL-E 3 HD (natural style)
|
|
1945
|
+
description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The HD version creates images with finer details and greater consistency across the image, but generation is slower. The natural style causes the model to produce more natural, less hyper-real looking images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
|
|
1946
|
+
creator_organization_name: OpenAI
|
|
1947
|
+
access: limited
|
|
1948
|
+
num_parameters: 0
|
|
1949
|
+
release_date: 2023-11-06
|
|
1950
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1951
|
+
|
|
1952
|
+
# Qwen
|
|
1953
|
+
|
|
1954
|
+
- name: qwen/qwen-7b
|
|
1955
|
+
display_name: Qwen
|
|
1956
|
+
description: 7B-parameter version of the large language model series, Qwen (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
|
|
1957
|
+
creator_organization_name: Qwen
|
|
1958
|
+
access: open
|
|
1959
|
+
release_date: 2024-02-05
|
|
1960
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1961
|
+
|
|
1962
|
+
- name: qwen/qwen1.5-7b
|
|
1963
|
+
display_name: Qwen1.5 (7B)
|
|
1964
|
+
description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
|
|
1965
|
+
creator_organization_name: Qwen
|
|
1966
|
+
access: open
|
|
1967
|
+
release_date: 2024-02-05
|
|
1968
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1969
|
+
|
|
1970
|
+
- name: qwen/qwen1.5-14b
|
|
1971
|
+
display_name: Qwen1.5 (14B)
|
|
1972
|
+
description: 14B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
|
|
1973
|
+
creator_organization_name: Qwen
|
|
1974
|
+
access: open
|
|
1975
|
+
release_date: 2024-02-05
|
|
1976
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1977
|
+
|
|
1978
|
+
- name: qwen/qwen1.5-32b
|
|
1979
|
+
display_name: Qwen1.5 (32B)
|
|
1980
|
+
description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
|
|
1981
|
+
creator_organization_name: Qwen
|
|
1982
|
+
access: open
|
|
1983
|
+
release_date: 2024-02-05
|
|
1984
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1985
|
+
|
|
1986
|
+
- name: qwen/qwen1.5-72b
|
|
1987
|
+
display_name: Qwen1.5 (72B)
|
|
1988
|
+
description: 72B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
|
|
1989
|
+
creator_organization_name: Qwen
|
|
1990
|
+
access: open
|
|
1991
|
+
release_date: 2024-02-05
|
|
1992
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1993
|
+
|
|
1994
|
+
- name: qwen/qwen-vl
|
|
1995
|
+
display_name: Qwen-VL
|
|
1996
|
+
description: Visual multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2308.12966)).
|
|
1997
|
+
creator_organization_name: Alibaba Cloud
|
|
1998
|
+
access: open
|
|
1999
|
+
release_date: 2023-08-24
|
|
2000
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
2001
|
+
|
|
2002
|
+
- name: qwen/qwen-vl-chat
|
|
2003
|
+
display_name: Qwen-VL Chat
|
|
2004
|
+
description: Chat version of Qwen-VL ([paper](https://arxiv.org/abs/2308.12966)).
|
|
2005
|
+
creator_organization_name: Alibaba Cloud
|
|
2006
|
+
access: open
|
|
2007
|
+
release_date: 2023-08-24
|
|
2008
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
1202
2009
|
|
|
1203
2010
|
# Salesforce
|
|
1204
2011
|
- name: salesforce/codegen # NOT SUPPORTED
|
|
@@ -1211,6 +2018,16 @@ models:
|
|
|
1211
2018
|
tags: [] # TODO: add tags
|
|
1212
2019
|
|
|
1213
2020
|
|
|
2021
|
+
# Snowflake
|
|
2022
|
+
- name: snowflake/snowflake-arctic-instruct
|
|
2023
|
+
display_name: Arctic Instruct
|
|
2024
|
+
description: Arctic combines a 10B dense transformer model with a residual 128x3.66B MoE MLP resulting in 480B total and 17B active parameters chosen using a top-2 gating.
|
|
2025
|
+
creator_organization_name: Snowflake
|
|
2026
|
+
access: open
|
|
2027
|
+
num_parameters: 482000000000
|
|
2028
|
+
release_date: 2024-04-24
|
|
2029
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2030
|
+
|
|
1214
2031
|
|
|
1215
2032
|
# Stability AI
|
|
1216
2033
|
- name: stabilityai/stablelm-base-alpha-3b
|
|
@@ -1351,6 +2168,16 @@ models:
|
|
|
1351
2168
|
|
|
1352
2169
|
|
|
1353
2170
|
# Tsinghua
|
|
2171
|
+
|
|
2172
|
+
- name: thudm/cogview2
|
|
2173
|
+
display_name: CogView2 (6B)
|
|
2174
|
+
description: CogView2 is a hierarchical transformer (6B-9B-9B parameters) for text-to-image generation that supports both English and Chinese input text ([paper](https://arxiv.org/abs/2105.13290))
|
|
2175
|
+
creator_organization_name: Tsinghua
|
|
2176
|
+
access: open
|
|
2177
|
+
num_parameters: 6000000000
|
|
2178
|
+
release_date: 2022-06-15
|
|
2179
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
2180
|
+
|
|
1354
2181
|
- name: tsinghua/glm
|
|
1355
2182
|
display_name: GLM (130B)
|
|
1356
2183
|
description: GLM (130B parameters) is an open bilingual (English & Chinese) bidirectional dense model that was trained using General Language Model (GLM) procedure ([paper](https://arxiv.org/pdf/2210.02414.pdf)).
|