crfm-helm 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/METADATA +134 -31
- crfm_helm-0.5.0.dist-info/RECORD +642 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +31 -3
- helm/benchmark/adaptation/adapters/adapter.py +2 -2
- helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
- helm/benchmark/adaptation/adapters/generation_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -4
- helm/benchmark/adaptation/adapters/language_modeling_adapter.py +2 -3
- helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
- helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +32 -8
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +7 -19
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +60 -6
- helm/benchmark/adaptation/common_adapter_specs.py +376 -0
- helm/benchmark/adaptation/request_state.py +6 -1
- helm/benchmark/adaptation/scenario_state.py +6 -2
- helm/benchmark/annotation/annotator.py +43 -0
- helm/benchmark/annotation/annotator_factory.py +61 -0
- helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
- helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
- helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
- helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
- helm/benchmark/annotation/test_annotator_factory.py +26 -0
- helm/benchmark/annotation/test_dummy_annotator.py +44 -0
- helm/benchmark/annotation_executor.py +124 -0
- helm/benchmark/augmentations/data_augmenter.py +0 -2
- helm/benchmark/augmentations/gender_perturbation.py +1 -1
- helm/benchmark/augmentations/perturbation.py +8 -2
- helm/benchmark/augmentations/perturbation_description.py +1 -1
- helm/benchmark/augmentations/suffix_perturbation.py +29 -0
- helm/benchmark/augmentations/test_perturbation.py +11 -7
- helm/benchmark/augmentations/translate_perturbation.py +30 -0
- helm/benchmark/config_registry.py +7 -1
- helm/benchmark/executor.py +46 -16
- helm/benchmark/huggingface_registration.py +20 -7
- helm/benchmark/metrics/basic_metrics.py +169 -664
- helm/benchmark/metrics/bbq_metrics.py +3 -4
- helm/benchmark/metrics/bias_metrics.py +6 -6
- helm/benchmark/metrics/classification_metrics.py +11 -8
- helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
- helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
- helm/benchmark/metrics/code_metrics_helper.py +0 -2
- helm/benchmark/metrics/common_metric_specs.py +167 -0
- helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
- helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
- helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
- helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
- helm/benchmark/metrics/disinformation_metrics.py +4 -110
- helm/benchmark/metrics/dry_run_metrics.py +2 -2
- helm/benchmark/metrics/efficiency_metrics.py +206 -0
- helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
- helm/benchmark/metrics/evaluate_reference_metrics.py +376 -0
- helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
- helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
- helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
- helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
- helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
- helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
- helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
- helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
- helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
- helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
- helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
- helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
- helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
- helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
- helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
- helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
- helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
- helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
- helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
- helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
- helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
- helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
- helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
- helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
- helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
- helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
- helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
- helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
- helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
- helm/benchmark/metrics/language_modeling_metrics.py +99 -0
- helm/benchmark/metrics/machine_translation_metrics.py +89 -0
- helm/benchmark/metrics/metric.py +93 -172
- helm/benchmark/metrics/metric_name.py +0 -1
- helm/benchmark/metrics/metric_service.py +16 -0
- helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
- helm/benchmark/metrics/ranking_metrics.py +2 -2
- helm/benchmark/metrics/reference_metric.py +148 -0
- helm/benchmark/metrics/summac/model_summac.py +0 -2
- helm/benchmark/metrics/summarization_metrics.py +2 -2
- helm/benchmark/metrics/test_classification_metrics.py +8 -5
- helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
- helm/benchmark/metrics/{test_basic_metrics.py → test_evaluate_reference_metrics.py} +5 -1
- helm/benchmark/metrics/test_metric.py +2 -2
- helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +10 -2
- helm/benchmark/metrics/toxicity_metrics.py +1 -1
- helm/benchmark/metrics/toxicity_utils.py +23 -0
- helm/benchmark/metrics/unitxt_metrics.py +81 -0
- helm/benchmark/metrics/vision_language/__init__.py +0 -0
- helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +450 -0
- helm/benchmark/metrics/vision_language/image_utils.py +100 -0
- helm/benchmark/model_deployment_registry.py +74 -0
- helm/benchmark/model_metadata_registry.py +36 -0
- helm/benchmark/multi_gpu_runner.py +133 -0
- helm/benchmark/presentation/create_plots.py +8 -7
- helm/benchmark/presentation/run_display.py +26 -10
- helm/benchmark/presentation/schema.py +15 -40
- helm/benchmark/presentation/summarize.py +119 -79
- helm/benchmark/presentation/table.py +8 -8
- helm/benchmark/presentation/test_contamination.py +2 -2
- helm/benchmark/presentation/test_run_entry.py +1 -2
- helm/benchmark/presentation/test_summarize.py +3 -3
- helm/benchmark/run.py +54 -26
- helm/benchmark/run_expander.py +214 -16
- helm/benchmark/run_spec.py +93 -0
- helm/benchmark/run_spec_factory.py +162 -0
- helm/benchmark/run_specs/__init__.py +0 -0
- helm/benchmark/run_specs/classic_run_specs.py +1510 -0
- helm/benchmark/run_specs/cleva_run_specs.py +277 -0
- helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
- helm/benchmark/run_specs/heim_run_specs.py +623 -0
- helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
- helm/benchmark/run_specs/lite_run_specs.py +307 -0
- helm/benchmark/run_specs/simple_run_specs.py +104 -0
- helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
- helm/benchmark/run_specs/vlm_run_specs.py +501 -0
- helm/benchmark/runner.py +51 -57
- helm/benchmark/runner_config_registry.py +21 -0
- helm/benchmark/scenarios/bbq_scenario.py +1 -1
- helm/benchmark/scenarios/bold_scenario.py +2 -2
- helm/benchmark/scenarios/code_scenario.py +1 -0
- helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
- helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
- helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
- helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
- helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
- helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
- helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
- helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
- helm/benchmark/scenarios/image_generation/__init__.py +0 -0
- helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
- helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
- helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
- helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
- helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
- helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
- helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
- helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
- helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
- helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
- helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
- helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
- helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
- helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
- helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
- helm/benchmark/scenarios/imdb_scenario.py +0 -1
- helm/benchmark/scenarios/live_qa_scenario.py +94 -0
- helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
- helm/benchmark/scenarios/math_scenario.py +19 -2
- helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
- helm/benchmark/scenarios/numeracy_scenario.py +1 -1
- helm/benchmark/scenarios/opinions_qa_scenario.py +0 -4
- helm/benchmark/scenarios/scenario.py +4 -0
- helm/benchmark/scenarios/simple_scenarios.py +122 -1
- helm/benchmark/scenarios/test_math_scenario.py +6 -0
- helm/benchmark/scenarios/test_scenario.py +6 -3
- helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
- helm/benchmark/scenarios/unitxt_scenario.py +56 -0
- helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
- helm/benchmark/scenarios/vicuna_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +92 -0
- helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
- helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
- helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
- helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
- helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
- helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
- helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
- helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -2
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +1 -1
- helm/benchmark/scenarios/wmt_14_scenario.py +1 -1
- helm/benchmark/server.py +24 -1
- helm/benchmark/slurm_runner.py +70 -49
- helm/benchmark/static/benchmarking.js +1 -1
- helm/benchmark/static/schema_classic.yaml +258 -1066
- helm/benchmark/static/schema_instruction_following.yaml +210 -0
- helm/benchmark/static/schema_lite.yaml +2 -227
- helm/benchmark/static/schema_mmlu.yaml +1507 -0
- helm/benchmark/static/schema_unitxt.yaml +428 -0
- helm/benchmark/static/schema_vlm.yaml +576 -0
- helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
- helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
- helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
- helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
- helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
- helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
- helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
- helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
- helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
- helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
- helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
- helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
- helm/benchmark/static_build/assets/index-5088afcb.css +1 -0
- helm/benchmark/static_build/assets/index-d839df55.js +9 -0
- helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
- helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
- helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
- helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
- helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
- helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
- helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
- helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
- helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
- helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
- helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
- helm/benchmark/static_build/config.js +4 -0
- helm/benchmark/static_build/index.html +20 -0
- helm/benchmark/test_data_preprocessor.py +3 -3
- helm/benchmark/test_model_deployment_definition.py +14 -16
- helm/benchmark/test_run_expander.py +1 -1
- helm/benchmark/window_services/ai21_window_service.py +22 -33
- helm/benchmark/window_services/cohere_window_service.py +1 -63
- helm/benchmark/window_services/default_window_service.py +2 -44
- helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
- helm/benchmark/window_services/ice_window_service.py +0 -34
- helm/benchmark/window_services/image_generation/__init__.py +0 -0
- helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
- helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
- helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
- helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
- helm/benchmark/window_services/local_window_service.py +21 -4
- helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
- helm/benchmark/window_services/test_bloom_window_service.py +2 -1
- helm/benchmark/window_services/test_cohere_window_service.py +2 -1
- helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
- helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
- helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
- helm/benchmark/window_services/test_gptj_window_service.py +3 -2
- helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
- helm/benchmark/window_services/test_ice_window_service.py +2 -1
- helm/benchmark/window_services/test_openai_window_service.py +2 -1
- helm/benchmark/window_services/test_opt_window_service.py +3 -2
- helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
- helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
- helm/benchmark/window_services/test_t511b_window_service.py +2 -1
- helm/benchmark/window_services/test_ul2_window_service.py +2 -1
- helm/benchmark/window_services/test_utils.py +3 -2
- helm/benchmark/window_services/test_yalm_window_service.py +2 -1
- helm/benchmark/window_services/window_service.py +42 -0
- helm/benchmark/window_services/window_service_factory.py +4 -1
- helm/benchmark/window_services/yalm_window_service.py +0 -27
- helm/clients/__init__.py +0 -0
- helm/{proxy/clients → clients}/ai21_client.py +3 -9
- helm/clients/aleph_alpha_client.py +112 -0
- helm/{proxy/clients → clients}/anthropic_client.py +203 -18
- helm/{proxy/clients → clients}/auto_client.py +59 -31
- helm/clients/bedrock_client.py +128 -0
- helm/clients/bedrock_utils.py +72 -0
- helm/{proxy/clients → clients}/client.py +65 -7
- helm/clients/clip_score_client.py +49 -0
- helm/clients/clip_scorers/__init__.py +0 -0
- helm/clients/clip_scorers/base_clip_scorer.py +18 -0
- helm/clients/clip_scorers/clip_scorer.py +50 -0
- helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
- helm/{proxy/clients → clients}/cohere_client.py +4 -11
- helm/clients/gcs_client.py +82 -0
- helm/{proxy/clients → clients}/google_client.py +5 -5
- helm/clients/google_translate_client.py +35 -0
- helm/{proxy/clients → clients}/http_model_client.py +5 -7
- helm/{proxy/clients → clients}/huggingface_client.py +43 -64
- helm/clients/image_generation/__init__.py +0 -0
- helm/clients/image_generation/adobe_vision_client.py +78 -0
- helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
- helm/clients/image_generation/cogview2/__init__.py +0 -0
- helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
- helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
- helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
- helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
- helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
- helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
- helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
- helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
- helm/clients/image_generation/cogview2_client.py +191 -0
- helm/clients/image_generation/dalle2_client.py +192 -0
- helm/clients/image_generation/dalle3_client.py +108 -0
- helm/clients/image_generation/dalle_mini/__init__.py +3 -0
- helm/clients/image_generation/dalle_mini/data.py +442 -0
- helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
- helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
- helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
- helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
- helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
- helm/clients/image_generation/dalle_mini/model/text.py +251 -0
- helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
- helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
- helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
- helm/clients/image_generation/dalle_mini_client.py +190 -0
- helm/clients/image_generation/deep_floyd_client.py +78 -0
- helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
- helm/clients/image_generation/image_generation_client_utils.py +9 -0
- helm/clients/image_generation/lexica_client.py +86 -0
- helm/clients/image_generation/mindalle/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/__init__.py +216 -0
- helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
- helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
- helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
- helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
- helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
- helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
- helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
- helm/clients/image_generation/mindalle/utils/config.py +129 -0
- helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
- helm/clients/image_generation/mindalle/utils/utils.py +89 -0
- helm/clients/image_generation/mindalle_client.py +115 -0
- helm/clients/image_generation/nudity_check_client.py +64 -0
- helm/clients/image_generation/together_image_generation_client.py +111 -0
- helm/{proxy/clients → clients}/lit_gpt_client.py +4 -4
- helm/{proxy/clients → clients}/megatron_client.py +5 -5
- helm/clients/mistral_client.py +134 -0
- helm/clients/moderation_api_client.py +109 -0
- helm/clients/open_lm_client.py +43 -0
- helm/clients/openai_client.py +302 -0
- helm/{proxy/clients → clients}/palmyra_client.py +6 -8
- helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
- helm/clients/simple_client.py +64 -0
- helm/{proxy/clients → clients}/test_auto_client.py +13 -15
- helm/clients/test_client.py +100 -0
- helm/{proxy/clients → clients}/test_huggingface_client.py +15 -16
- helm/clients/test_simple_client.py +19 -0
- helm/{proxy/clients → clients}/test_together_client.py +20 -8
- helm/{proxy/clients → clients}/together_client.py +12 -72
- helm/clients/vertexai_client.py +391 -0
- helm/clients/vision_language/__init__.py +0 -0
- helm/clients/vision_language/huggingface_vlm_client.py +104 -0
- helm/{proxy/clients → clients}/vision_language/idefics_client.py +53 -48
- helm/clients/vision_language/open_flamingo/__init__.py +2 -0
- helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
- helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
- helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
- helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
- helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
- helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
- helm/clients/vision_language/open_flamingo_client.py +155 -0
- helm/clients/vision_language/qwen_vlm_client.py +171 -0
- helm/clients/vllm_client.py +46 -0
- helm/common/cache.py +16 -4
- helm/common/cache_backend_config.py +47 -0
- helm/common/clip_score_request.py +41 -0
- helm/common/file_caches/__init__.py +0 -0
- helm/common/file_caches/file_cache.py +16 -0
- helm/common/file_caches/local_file_cache.py +61 -0
- helm/common/file_caches/test_local_file_cache.py +25 -0
- helm/common/file_upload_request.py +27 -0
- helm/common/general.py +1 -1
- helm/common/image_generation_parameters.py +25 -0
- helm/common/images_utils.py +24 -1
- helm/common/key_value_store.py +35 -4
- helm/common/media_object.py +13 -0
- helm/common/moderations_api_request.py +71 -0
- helm/common/mongo_key_value_store.py +3 -3
- helm/common/multimodal_request_utils.py +31 -0
- helm/common/nudity_check_request.py +29 -0
- helm/common/request.py +15 -17
- helm/common/test_general.py +6 -0
- helm/common/tokenization_request.py +1 -1
- helm/config/model_deployments.yaml +1069 -546
- helm/config/model_metadata.yaml +753 -31
- helm/config/tokenizer_configs.yaml +142 -43
- helm/proxy/accounts.py +31 -4
- helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
- helm/proxy/critique/model_critique_client.py +8 -6
- helm/proxy/example_queries.py +29 -17
- helm/proxy/server.py +70 -5
- helm/proxy/services/remote_service.py +31 -0
- helm/proxy/services/server_service.py +96 -16
- helm/proxy/services/service.py +30 -0
- helm/proxy/services/test_remote_service.py +4 -3
- helm/proxy/services/test_service.py +0 -12
- helm/proxy/test_accounts.py +32 -0
- helm/proxy/token_counters/auto_token_counter.py +37 -37
- helm/proxy/token_counters/test_auto_token_counter.py +164 -0
- helm/proxy/token_counters/token_counter.py +3 -5
- helm/tokenizers/__init__.py +0 -0
- helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/auto_tokenizer.py +6 -9
- helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
- helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +7 -26
- helm/tokenizers/simple_tokenizer.py +33 -0
- helm/{proxy/tokenizers → tokenizers}/test_anthropic_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/test_huggingface_tokenizer.py +3 -0
- helm/tokenizers/test_simple_tokenizer.py +33 -0
- helm/{proxy/tokenizers → tokenizers}/vertexai_tokenizer.py +1 -1
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
- helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
- helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
- helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
- crfm_helm-0.4.0.dist-info/RECORD +0 -397
- helm/benchmark/run_specs.py +0 -2762
- helm/benchmark/test_model_properties.py +0 -1570
- helm/benchmark/vlm_run_specs.py +0 -97
- helm/benchmark/window_services/flan_t5_window_service.py +0 -29
- helm/benchmark/window_services/gpt2_window_service.py +0 -32
- helm/benchmark/window_services/huggingface_window_service.py +0 -60
- helm/benchmark/window_services/t0pp_window_service.py +0 -35
- helm/benchmark/window_services/t511b_window_service.py +0 -30
- helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
- helm/benchmark/window_services/ul2_window_service.py +0 -30
- helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
- helm/common/cache_utils.py +0 -14
- helm/proxy/clients/aleph_alpha_client.py +0 -95
- helm/proxy/clients/goose_ai_client.py +0 -99
- helm/proxy/clients/microsoft_client.py +0 -180
- helm/proxy/clients/openai_client.py +0 -206
- helm/proxy/clients/simple_client.py +0 -60
- helm/proxy/clients/test_client.py +0 -49
- helm/proxy/clients/vertexai_client.py +0 -115
- helm/proxy/token_counters/ai21_token_counter.py +0 -20
- helm/proxy/token_counters/cohere_token_counter.py +0 -13
- helm/proxy/token_counters/free_token_counter.py +0 -12
- helm/proxy/token_counters/gooseai_token_counter.py +0 -24
- helm/proxy/token_counters/openai_token_counter.py +0 -22
- helm/proxy/token_counters/test_ai21_token_counter.py +0 -88
- helm/proxy/token_counters/test_openai_token_counter.py +0 -81
- helm/proxy/tokenizers/simple_tokenizer.py +0 -32
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/LICENSE +0 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.4.0.dist-info → crfm_helm-0.5.0.dist-info}/top_level.txt +0 -0
- /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
- /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
- /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
- /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
- /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
- /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
- /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
- /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
- /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
helm/config/model_metadata.yaml
CHANGED
|
@@ -19,6 +19,17 @@ models:
|
|
|
19
19
|
release_date: 2023-01-01
|
|
20
20
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
21
21
|
|
|
22
|
+
# Adobe
|
|
23
|
+
- name: adobe/giga-gan
|
|
24
|
+
display_name: GigaGAN (1B)
|
|
25
|
+
description: GigaGAN is a GAN model that produces high-quality images extremely quickly. The model was trained on text and image pairs from LAION2B-en and COYO-700M. ([paper](https://arxiv.org/abs/2303.05511)).
|
|
26
|
+
creator_organization_name: Adobe
|
|
27
|
+
access: limited
|
|
28
|
+
num_parameters: 1000000000
|
|
29
|
+
release_date: 2023-06-22
|
|
30
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
31
|
+
|
|
32
|
+
|
|
22
33
|
# AI21 Labs
|
|
23
34
|
- name: ai21/j1-jumbo # DEPRECATED
|
|
24
35
|
display_name: J1-Jumbo v1 (178B)
|
|
@@ -102,7 +113,7 @@ models:
|
|
|
102
113
|
# TODO: get exact release date
|
|
103
114
|
release_date: 2022-01-01
|
|
104
115
|
# Does not support echo
|
|
105
|
-
tags: [TEXT_MODEL_TAG,
|
|
116
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
106
117
|
|
|
107
118
|
- name: AlephAlpha/luminous-extended
|
|
108
119
|
display_name: Luminous Extended (30B)
|
|
@@ -112,7 +123,7 @@ models:
|
|
|
112
123
|
num_parameters: 30000000000
|
|
113
124
|
release_date: 2022-01-01
|
|
114
125
|
# Does not support echo
|
|
115
|
-
tags: [TEXT_MODEL_TAG,
|
|
126
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
116
127
|
|
|
117
128
|
- name: AlephAlpha/luminous-supreme
|
|
118
129
|
display_name: Luminous Supreme (70B)
|
|
@@ -122,7 +133,7 @@ models:
|
|
|
122
133
|
num_parameters: 70000000000
|
|
123
134
|
release_date: 2022-01-01
|
|
124
135
|
# Does not support echo.
|
|
125
|
-
#
|
|
136
|
+
# Currently, only Luminous-extended and Luminous-base support multimodal inputs
|
|
126
137
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
127
138
|
|
|
128
139
|
# TODO: Uncomment when luminous-world is released.
|
|
@@ -135,8 +146,46 @@ models:
|
|
|
135
146
|
# release_date: TBD
|
|
136
147
|
# # Does not support echo.
|
|
137
148
|
# tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
149
|
+
|
|
150
|
+
- name: AlephAlpha/m-vader
|
|
151
|
+
display_name: MultiFusion (13B)
|
|
152
|
+
description: MultiFusion is a multimodal, multilingual diffusion model that extend the capabilities of Stable Diffusion v1.4 by integrating different pre-trained modules, which transfers capabilities to the downstream model ([paper](https://arxiv.org/abs/2305.15296))
|
|
153
|
+
creator_organization_name: Aleph Alpha
|
|
154
|
+
access: limited
|
|
155
|
+
num_parameters: 13000000000
|
|
156
|
+
release_date: 2023-05-24
|
|
157
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
138
158
|
|
|
139
159
|
|
|
160
|
+
# Amazon
|
|
161
|
+
# References for Amazon Titan models:
|
|
162
|
+
# - https://aws.amazon.com/bedrock/titan/
|
|
163
|
+
# - https://community.aws/content/2ZUVD3fkNtqEOYIa2iUJAFArS7c/family-of-titan-text-models---cli-demo
|
|
164
|
+
# - https://aws.amazon.com/about-aws/whats-new/2023/11/amazon-titan-models-express-lite-bedrock/
|
|
165
|
+
- name: amazon/titan-text-lite-v1
|
|
166
|
+
display_name: Amazon Titan Text Lite
|
|
167
|
+
description: Amazon Titan Text Lite is a lightweight, efficient model perfect for fine-tuning English-language tasks like summarization and copywriting. It caters to customers seeking a smaller, cost-effective, and highly customizable model. It supports various formats, including text generation, code generation, rich text formatting, and orchestration (agents). Key model attributes encompass fine-tuning, text generation, code generation, and rich text formatting.
|
|
168
|
+
creator_organization_name: Amazon
|
|
169
|
+
access: limited
|
|
170
|
+
release_date: 2023-11-29
|
|
171
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
172
|
+
|
|
173
|
+
- name: amazon/titan-tg1-large
|
|
174
|
+
display_name: Amazon Titan Large
|
|
175
|
+
description: Amazon Titan Large is efficient model perfect for fine-tuning English-language tasks like summarization, create article, marketing campaign.
|
|
176
|
+
creator_organization_name: Amazon
|
|
177
|
+
access: limited
|
|
178
|
+
release_date: 2023-11-29
|
|
179
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
180
|
+
|
|
181
|
+
- name: amazon/titan-text-express-v1
|
|
182
|
+
display_name: Amazon Titan Text Express
|
|
183
|
+
description: Amazon Titan Text Express, with a context length of up to 8,000 tokens, excels in advanced language tasks like open-ended text generation and conversational chat. It's also optimized for Retrieval Augmented Generation (RAG). Initially designed for English, the model offers preview multilingual support for over 100 additional languages.
|
|
184
|
+
creator_organization_name: Amazon
|
|
185
|
+
access: limited
|
|
186
|
+
release_date: 2023-11-29
|
|
187
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
188
|
+
|
|
140
189
|
|
|
141
190
|
# Anthropic
|
|
142
191
|
- name: anthropic/claude-v1.3
|
|
@@ -180,6 +229,30 @@ models:
|
|
|
180
229
|
release_date: 2023-11-21
|
|
181
230
|
tags: [ANTHROPIC_CLAUDE_2_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
182
231
|
|
|
232
|
+
- name: anthropic/claude-3-haiku-20240307
|
|
233
|
+
display_name: Claude 3 Haiku (20240307)
|
|
234
|
+
description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
|
|
235
|
+
creator_organization_name: Anthropic
|
|
236
|
+
access: limited
|
|
237
|
+
release_date: 2024-03-13 # https://www.anthropic.com/news/claude-3-haiku
|
|
238
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
239
|
+
|
|
240
|
+
- name: anthropic/claude-3-sonnet-20240229
|
|
241
|
+
display_name: Claude 3 Sonnet (20240229)
|
|
242
|
+
description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
|
|
243
|
+
creator_organization_name: Anthropic
|
|
244
|
+
access: limited
|
|
245
|
+
release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
|
|
246
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
247
|
+
|
|
248
|
+
- name: anthropic/claude-3-opus-20240229
|
|
249
|
+
display_name: Claude 3 Opus (20240229)
|
|
250
|
+
description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
|
|
251
|
+
creator_organization_name: Anthropic
|
|
252
|
+
access: limited
|
|
253
|
+
release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
|
|
254
|
+
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
255
|
+
|
|
183
256
|
# DEPRECATED: Please do not use.
|
|
184
257
|
- name: anthropic/stanford-online-all-v4-s3
|
|
185
258
|
display_name: Anthropic-LM v4-s3 (52B)
|
|
@@ -378,6 +451,52 @@ models:
|
|
|
378
451
|
release_date: 2023-09-29
|
|
379
452
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
380
453
|
|
|
454
|
+
# Craiyon
|
|
455
|
+
- name: craiyon/dalle-mini
|
|
456
|
+
display_name: DALL-E mini (0.4B)
|
|
457
|
+
description: DALL-E mini is an open-source text-to-image model that attempt to reproduce OpenAI's DALL-E 1 ([code](https://github.com/borisdayma/dalle-mini)).
|
|
458
|
+
creator_organization_name: Craiyon
|
|
459
|
+
access: open
|
|
460
|
+
num_parameters: 400000000
|
|
461
|
+
release_date: 2022-04-21
|
|
462
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
463
|
+
|
|
464
|
+
- name: craiyon/dalle-mega
|
|
465
|
+
display_name: DALL-E mega (2.6B)
|
|
466
|
+
description: DALL-E mega is an open-source text-to-image model that attempt to reproduce OpenAI's DALL-E 1 ([code](https://github.com/borisdayma/dalle-mini)).
|
|
467
|
+
creator_organization_name: Craiyon
|
|
468
|
+
access: open
|
|
469
|
+
num_parameters: 2600000000
|
|
470
|
+
release_date: 2022-04-21
|
|
471
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
472
|
+
|
|
473
|
+
# DeepFloyd
|
|
474
|
+
- name: DeepFloyd/IF-I-M-v1.0
|
|
475
|
+
display_name: DeepFloyd IF Medium (0.4B)
|
|
476
|
+
description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
|
|
477
|
+
creator_organization_name: DeepFloyd
|
|
478
|
+
access: open
|
|
479
|
+
num_parameters: 400000000
|
|
480
|
+
release_date: 2023-04-28
|
|
481
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
482
|
+
|
|
483
|
+
- name: DeepFloyd/IF-I-L-v1.0
|
|
484
|
+
display_name: DeepFloyd IF Large (0.9B)
|
|
485
|
+
description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
|
|
486
|
+
creator_organization_name: DeepFloyd
|
|
487
|
+
access: open
|
|
488
|
+
num_parameters: 900000000
|
|
489
|
+
release_date: 2023-04-28
|
|
490
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
491
|
+
|
|
492
|
+
- name: DeepFloyd/IF-I-XL-v1.0
|
|
493
|
+
display_name: DeepFloyd IF X-Large (4.3B)
|
|
494
|
+
description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
|
|
495
|
+
creator_organization_name: DeepFloyd
|
|
496
|
+
access: open
|
|
497
|
+
num_parameters: 4300000000
|
|
498
|
+
release_date: 2023-04-28
|
|
499
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
381
500
|
|
|
382
501
|
|
|
383
502
|
# Databricks
|
|
@@ -408,6 +527,14 @@ models:
|
|
|
408
527
|
release_date: 2023-04-12
|
|
409
528
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
410
529
|
|
|
530
|
+
- name: databricks/dbrx-instruct
|
|
531
|
+
display_name: DBRX Instruct
|
|
532
|
+
description: DBRX is a large language model with a fine-grained mixture-of-experts (MoE) architecture that uses 16 experts and chooses 4. It has 132B total parameters, of which 36B parameters are active on any input. ([blog post](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm))
|
|
533
|
+
creator_organization_name: Databricks
|
|
534
|
+
access: open
|
|
535
|
+
num_parameters: 132000000000
|
|
536
|
+
release_date: 2024-03-27
|
|
537
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
411
538
|
|
|
412
539
|
|
|
413
540
|
# DeepMind
|
|
@@ -430,6 +557,16 @@ models:
|
|
|
430
557
|
tags: [] # TODO: add tags
|
|
431
558
|
|
|
432
559
|
|
|
560
|
+
# Deepseek
|
|
561
|
+
- name: deepseek-ai/deepseek-llm-67b-chat
|
|
562
|
+
display_name: DeepSeek Chat (67B)
|
|
563
|
+
description: DeepSeek Chat is a open-source language model trained on 2 trillion tokens in both English and Chinese, and fine-tuned supervised fine-tuning (SFT) and Direct Preference Optimization (DPO). ([paper](https://arxiv.org/abs/2401.02954))
|
|
564
|
+
creator_organization_name: DeepSeek
|
|
565
|
+
access: open
|
|
566
|
+
num_parameters: 67000000000
|
|
567
|
+
release_date: 2024-01-05
|
|
568
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
569
|
+
|
|
433
570
|
|
|
434
571
|
# EleutherAI
|
|
435
572
|
- name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together.
|
|
@@ -526,6 +663,85 @@ models:
|
|
|
526
663
|
release_date: 2023-03-01 # was first announced on 2022-04 but remained private.
|
|
527
664
|
tags: [] # TODO: add tags
|
|
528
665
|
|
|
666
|
+
# Note: This is aliased to a snapshot of gemini-pro. When possible, please use a versioned snapshot instead.
|
|
667
|
+
- name: google/gemini-pro
|
|
668
|
+
display_name: Gemini Pro
|
|
669
|
+
description: Gemini Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
|
|
670
|
+
creator_organization_name: Google
|
|
671
|
+
access: limited
|
|
672
|
+
release_date: 2023-12-13
|
|
673
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
674
|
+
|
|
675
|
+
- name: google/gemini-1.0-pro-001
|
|
676
|
+
display_name: Gemini 1.0 Pro
|
|
677
|
+
description: Gemini 1.0 Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
|
|
678
|
+
creator_organization_name: Google
|
|
679
|
+
access: limited
|
|
680
|
+
release_date: 2023-12-13
|
|
681
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
682
|
+
|
|
683
|
+
# Note: This is aliased to a snapshot of gemini-pro-vision. When possible, please use a versioned snapshot instead.
|
|
684
|
+
- name: google/gemini-pro-vision
|
|
685
|
+
display_name: Gemini Pro Vision
|
|
686
|
+
description: Gemini Pro Vision is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
|
|
687
|
+
creator_organization_name: Google
|
|
688
|
+
access: limited
|
|
689
|
+
release_date: 2023-12-13
|
|
690
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG]
|
|
691
|
+
|
|
692
|
+
- name: google/gemini-1.0-pro-vision-001
|
|
693
|
+
display_name: Gemini 1.0 Pro Vision
|
|
694
|
+
description: Gemini 1.0 Pro Vision is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
|
|
695
|
+
creator_organization_name: Google
|
|
696
|
+
access: limited
|
|
697
|
+
release_date: 2023-12-13
|
|
698
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
699
|
+
|
|
700
|
+
- name: google/gemini-1.5-pro-preview-0409
|
|
701
|
+
display_name: Gemini 1.5 Pro
|
|
702
|
+
description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. ([paper](https://arxiv.org/abs/2403.05530))
|
|
703
|
+
creator_organization_name: Google
|
|
704
|
+
access: limited
|
|
705
|
+
release_date: 2024-04-10
|
|
706
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
707
|
+
|
|
708
|
+
- name: google/gemma-2b
|
|
709
|
+
display_name: Gemma (2B)
|
|
710
|
+
# TODO: Fill in Gemma description.
|
|
711
|
+
description: TBD
|
|
712
|
+
creator_organization_name: Google
|
|
713
|
+
access: open
|
|
714
|
+
release_date: 2024-02-21
|
|
715
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
716
|
+
|
|
717
|
+
- name: google/gemma-2b-it
|
|
718
|
+
display_name: Gemma Instruct (2B)
|
|
719
|
+
# TODO: Fill in Gemma description.
|
|
720
|
+
description: TBD
|
|
721
|
+
creator_organization_name: Google
|
|
722
|
+
access: open
|
|
723
|
+
release_date: 2024-02-21
|
|
724
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
725
|
+
|
|
726
|
+
- name: google/gemma-7b
|
|
727
|
+
display_name: Gemma (7B)
|
|
728
|
+
# TODO: Fill in Gemma description.
|
|
729
|
+
description: TBD
|
|
730
|
+
creator_organization_name: Google
|
|
731
|
+
access: open
|
|
732
|
+
release_date: 2024-02-21
|
|
733
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
734
|
+
|
|
735
|
+
- name: google/gemma-7b-it
|
|
736
|
+
display_name: Gemma Instruct (7B)
|
|
737
|
+
# TODO: Fill in Gemma description.
|
|
738
|
+
description: TBD
|
|
739
|
+
creator_organization_name: Google
|
|
740
|
+
access: open
|
|
741
|
+
release_date: 2024-02-21
|
|
742
|
+
# TODO: Add OUTPUT_FORMAT_INSTRUCTIONS_TAG tag
|
|
743
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
744
|
+
|
|
529
745
|
- name: google/text-bison@001
|
|
530
746
|
display_name: PaLM-2 (Bison)
|
|
531
747
|
description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
@@ -534,6 +750,14 @@ models:
|
|
|
534
750
|
release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
|
|
535
751
|
tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
536
752
|
|
|
753
|
+
- name: google/text-bison@002
|
|
754
|
+
display_name: PaLM-2 (Bison)
|
|
755
|
+
description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
756
|
+
creator_organization_name: Google
|
|
757
|
+
access: limited
|
|
758
|
+
release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
|
|
759
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
760
|
+
|
|
537
761
|
- name: google/text-bison-32k
|
|
538
762
|
display_name: PaLM-2 (Bison)
|
|
539
763
|
description: The best value PaLM model with a 32K context. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
@@ -558,6 +782,14 @@ models:
|
|
|
558
782
|
release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
|
|
559
783
|
tags: [CODE_MODEL_TAG]
|
|
560
784
|
|
|
785
|
+
- name: google/code-bison@002
|
|
786
|
+
display_name: Codey PaLM-2 (Bison)
|
|
787
|
+
description: A model fine-tuned to generate code based on a natural language description of the desired code. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
788
|
+
creator_organization_name: Google
|
|
789
|
+
access: limited
|
|
790
|
+
release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
|
|
791
|
+
tags: [CODE_MODEL_TAG]
|
|
792
|
+
|
|
561
793
|
- name: google/code-bison-32k
|
|
562
794
|
display_name: Codey PaLM-2 (Bison)
|
|
563
795
|
description: Codey with a 32K context. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
|
|
@@ -568,18 +800,6 @@ models:
|
|
|
568
800
|
|
|
569
801
|
|
|
570
802
|
|
|
571
|
-
# HazyResearch
|
|
572
|
-
- name: hazyresearch/h3-2.7b
|
|
573
|
-
display_name: H3 (2.7B)
|
|
574
|
-
description: H3 (2.7B parameters) is a decoder-only language model based on state space models ([paper](https://arxiv.org/abs/2212.14052)).
|
|
575
|
-
creator_organization_name: HazyResearch
|
|
576
|
-
access: open
|
|
577
|
-
num_parameters: 2700000000
|
|
578
|
-
release_date: 2023-01-23
|
|
579
|
-
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
803
|
# HuggingFace
|
|
584
804
|
- name: HuggingFaceM4/idefics-9b
|
|
585
805
|
display_name: IDEFICS (9B)
|
|
@@ -588,7 +808,7 @@ models:
|
|
|
588
808
|
access: open
|
|
589
809
|
num_parameters: 9000000000
|
|
590
810
|
release_date: 2023-08-22
|
|
591
|
-
tags: [VISION_LANGUAGE_MODEL_TAG]
|
|
811
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
592
812
|
|
|
593
813
|
- name: HuggingFaceM4/idefics-9b-instruct
|
|
594
814
|
display_name: IDEFICS instruct (9B)
|
|
@@ -597,7 +817,7 @@ models:
|
|
|
597
817
|
access: open
|
|
598
818
|
num_parameters: 9000000000
|
|
599
819
|
release_date: 2023-08-22
|
|
600
|
-
tags: [VISION_LANGUAGE_MODEL_TAG]
|
|
820
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, IDEFICS_INSTRUCT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
601
821
|
|
|
602
822
|
- name: HuggingFaceM4/idefics-80b
|
|
603
823
|
display_name: IDEFICS (80B)
|
|
@@ -606,7 +826,7 @@ models:
|
|
|
606
826
|
access: open
|
|
607
827
|
num_parameters: 80000000000
|
|
608
828
|
release_date: 2023-08-22
|
|
609
|
-
tags: [VISION_LANGUAGE_MODEL_TAG]
|
|
829
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
610
830
|
|
|
611
831
|
- name: HuggingFaceM4/idefics-80b-instruct
|
|
612
832
|
display_name: IDEFICS instruct (80B)
|
|
@@ -615,8 +835,189 @@ models:
|
|
|
615
835
|
access: open
|
|
616
836
|
num_parameters: 80000000000
|
|
617
837
|
release_date: 2023-08-22
|
|
618
|
-
tags: [VISION_LANGUAGE_MODEL_TAG]
|
|
838
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, IDEFICS_INSTRUCT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
619
839
|
|
|
840
|
+
## Text-to-Image Diffusion Models
|
|
841
|
+
- name: huggingface/dreamlike-diffusion-v1-0
|
|
842
|
+
display_name: Dreamlike Diffusion v1.0 (1B)
|
|
843
|
+
description: Dreamlike Diffusion v1.0 is Stable Diffusion v1.5 fine tuned on high quality art ([HuggingFace model card](https://huggingface.co/dreamlike-art/dreamlike-diffusion-1.0))
|
|
844
|
+
creator_organization_name: dreamlike.art
|
|
845
|
+
access: open
|
|
846
|
+
num_parameters: 1000000000
|
|
847
|
+
release_date: 2023-03-08
|
|
848
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
849
|
+
|
|
850
|
+
- name: huggingface/dreamlike-photoreal-v2-0
|
|
851
|
+
display_name: Dreamlike Photoreal v2.0 (1B)
|
|
852
|
+
description: Dreamlike Photoreal v2.0 is a photorealistic model based on Stable Diffusion v1.5 ([HuggingFace model card](https://huggingface.co/dreamlike-art/dreamlike-photoreal-2.0))
|
|
853
|
+
creator_organization_name: dreamlike.art
|
|
854
|
+
access: open
|
|
855
|
+
num_parameters: 1000000000
|
|
856
|
+
release_date: 2022-11-23
|
|
857
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
858
|
+
|
|
859
|
+
- name: huggingface/openjourney-v1-0
|
|
860
|
+
display_name: Openjourney (1B)
|
|
861
|
+
description: Openjourney is an open source Stable Diffusion fine tuned model on Midjourney images ([HuggingFace model card](https://huggingface.co/prompthero/openjourney))
|
|
862
|
+
creator_organization_name: PromptHero
|
|
863
|
+
access: open
|
|
864
|
+
num_parameters: 1000000000
|
|
865
|
+
release_date: 2022-11-01 # TODO: get the exact date
|
|
866
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
867
|
+
|
|
868
|
+
- name: huggingface/openjourney-v2-0
|
|
869
|
+
display_name: Openjourney v2 (1B)
|
|
870
|
+
description: Openjourney v2 is an open source Stable Diffusion fine tuned model on Midjourney images. Openjourney v2 is now referred to as Openjourney v4 in Hugging Face ([HuggingFace model card](https://huggingface.co/prompthero/openjourney-v4)).
|
|
871
|
+
creator_organization_name: PromptHero
|
|
872
|
+
access: open
|
|
873
|
+
num_parameters: 1000000000
|
|
874
|
+
release_date: 2023-01-01 # TODO: get the exact date
|
|
875
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
876
|
+
|
|
877
|
+
- name: huggingface/promptist-stable-diffusion-v1-4
|
|
878
|
+
display_name: Promptist + Stable Diffusion v1.4 (1B)
|
|
879
|
+
description: Trained with human preferences, Promptist optimizes user input into model-preferred prompts for Stable Diffusion v1.4 ([paper](https://arxiv.org/abs/2212.09611))
|
|
880
|
+
creator_organization_name: Microsoft
|
|
881
|
+
access: open
|
|
882
|
+
num_parameters: 1000000000
|
|
883
|
+
release_date: 2022-12-19
|
|
884
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
885
|
+
|
|
886
|
+
- name: huggingface/redshift-diffusion
|
|
887
|
+
display_name: Redshift Diffusion (1B)
|
|
888
|
+
description: Redshift Diffusion is an open source Stable Diffusion model fine tuned on high resolution 3D artworks ([HuggingFace model card](https://huggingface.co/nitrosocke/redshift-diffusion))
|
|
889
|
+
creator_organization_name: nitrosocke
|
|
890
|
+
access: open
|
|
891
|
+
num_parameters: 1000000000
|
|
892
|
+
release_date: 2022-11-29
|
|
893
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
894
|
+
|
|
895
|
+
- name: huggingface/stable-diffusion-safe-weak
|
|
896
|
+
display_name: Safe Stable Diffusion weak (1B)
|
|
897
|
+
description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105)).
|
|
898
|
+
creator_organization_name: TU Darmstadt
|
|
899
|
+
access: open
|
|
900
|
+
num_parameters: 1000000000
|
|
901
|
+
release_date: 2022-11-09
|
|
902
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
903
|
+
|
|
904
|
+
- name: huggingface/stable-diffusion-safe-medium
|
|
905
|
+
display_name: Safe Stable Diffusion medium (1B)
|
|
906
|
+
description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
|
|
907
|
+
creator_organization_name: TU Darmstadt
|
|
908
|
+
access: open
|
|
909
|
+
num_parameters: 1000000000
|
|
910
|
+
release_date: 2022-11-09
|
|
911
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
912
|
+
|
|
913
|
+
- name: huggingface/stable-diffusion-safe-strong
|
|
914
|
+
display_name: Safe Stable Diffusion strong (1B)
|
|
915
|
+
description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
|
|
916
|
+
creator_organization_name: TU Darmstadt
|
|
917
|
+
access: open
|
|
918
|
+
num_parameters: 1000000000
|
|
919
|
+
release_date: 2022-11-09
|
|
920
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
921
|
+
|
|
922
|
+
- name: huggingface/stable-diffusion-safe-max
|
|
923
|
+
display_name: Safe Stable Diffusion max (1B)
|
|
924
|
+
description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
|
|
925
|
+
creator_organization_name: TU Darmstadt
|
|
926
|
+
access: open
|
|
927
|
+
num_parameters: 1000000000
|
|
928
|
+
release_date: 2022-11-09
|
|
929
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
930
|
+
|
|
931
|
+
- name: huggingface/stable-diffusion-v1-4
|
|
932
|
+
display_name: Stable Diffusion v1.4 (1B)
|
|
933
|
+
description: Stable Diffusion v1.4 is a latent text-to-image diffusion model capable of generating photorealistic images given any text input ([paper](https://arxiv.org/abs/2112.10752))
|
|
934
|
+
creator_organization_name: Ludwig Maximilian University of Munich CompVis
|
|
935
|
+
access: open
|
|
936
|
+
num_parameters: 1000000000
|
|
937
|
+
release_date: 2022-08-01
|
|
938
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
939
|
+
|
|
940
|
+
- name: huggingface/stable-diffusion-v1-5
|
|
941
|
+
display_name: Stable Diffusion v1.5 (1B)
|
|
942
|
+
description: The Stable-Diffusion-v1-5 checkpoint was initialized with the weights of the Stable-Diffusion-v1-2 checkpoint and subsequently fine-tuned on 595k steps at resolution 512x512 on laion-aesthetics v2 5+ and 10% dropping of the text-conditioning to improve classifier-free guidance sampling ([paper](https://arxiv.org/abs/2112.10752))
|
|
943
|
+
creator_organization_name: Runway
|
|
944
|
+
access: open
|
|
945
|
+
num_parameters: 1000000000
|
|
946
|
+
release_date: 2022-10-20
|
|
947
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
948
|
+
|
|
949
|
+
- name: huggingface/stable-diffusion-v2-base
|
|
950
|
+
display_name: Stable Diffusion v2 base (1B)
|
|
951
|
+
description: The model is trained from scratch 550k steps at resolution 256x256 on a subset of LAION-5B filtered for explicit pornographic material, using the LAION-NSFW classifier with punsafe=0.1 and an aesthetic score greater than 4.5. Then it is further trained for 850k steps at resolution 512x512 on the same dataset on images with resolution greater than 512x512 ([paper](https://arxiv.org/abs/2112.10752))
|
|
952
|
+
creator_organization_name: Stability AI
|
|
953
|
+
access: open
|
|
954
|
+
num_parameters: 1000000000
|
|
955
|
+
release_date: 2022-11-23
|
|
956
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
957
|
+
|
|
958
|
+
- name: huggingface/stable-diffusion-v2-1-base
|
|
959
|
+
display_name: Stable Diffusion v2.1 base (1B)
|
|
960
|
+
description: This stable-diffusion-2-1-base model fine-tunes stable-diffusion-2-base with 220k extra steps taken, with punsafe=0.98 on the same dataset ([paper](https://arxiv.org/abs/2112.10752))
|
|
961
|
+
creator_organization_name: Stability AI
|
|
962
|
+
access: open
|
|
963
|
+
num_parameters: 1000000000
|
|
964
|
+
release_date: 2022-11-23
|
|
965
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
966
|
+
|
|
967
|
+
- name: huggingface/vintedois-diffusion-v0-1
|
|
968
|
+
display_name: Vintedois (22h) Diffusion model v0.1 (1B)
|
|
969
|
+
description: Vintedois (22h) Diffusion model v0.1 is Stable Diffusion v1.5 that was finetuned on a large amount of high quality images with simple prompts to generate beautiful images without a lot of prompt engineering ([HuggingFace model card](https://huggingface.co/22h/vintedois-diffusion-v0-1))
|
|
970
|
+
creator_organization_name: 22 Hours
|
|
971
|
+
access: open
|
|
972
|
+
num_parameters: 1000000000
|
|
973
|
+
release_date: 2022-12-27
|
|
974
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
975
|
+
|
|
976
|
+
- name: segmind/Segmind-Vega
|
|
977
|
+
display_name: Segmind Stable Diffusion (0.74B)
|
|
978
|
+
description: The Segmind-Vega Model is a distilled version of the Stable Diffusion XL (SDXL), offering a remarkable 70% reduction in size and an impressive 100% speedup while retaining high-quality text-to-image generation capabilities. Trained on diverse datasets, including Grit and Midjourney scrape data, it excels at creating a wide range of visual content based on textual prompts. ([HuggingFace model card](https://huggingface.co/segmind/Segmind-Vega))
|
|
979
|
+
creator_organization_name: Segmind
|
|
980
|
+
access: open
|
|
981
|
+
num_parameters: 740000000
|
|
982
|
+
release_date: 2023-12-01
|
|
983
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
984
|
+
|
|
985
|
+
- name: segmind/SSD-1B
|
|
986
|
+
display_name: Segmind Stable Diffusion (1B)
|
|
987
|
+
description: The Segmind Stable Diffusion Model (SSD-1B) is a distilled 50% smaller version of the Stable Diffusion XL (SDXL), offering a 60% speedup while maintaining high-quality text-to-image generation capabilities. It has been trained on diverse datasets, including Grit and Midjourney scrape data, to enhance its ability to create a wide range of visual content based on textual prompts. ([HuggingFace model card](https://huggingface.co/segmind/SSD-1B))
|
|
988
|
+
creator_organization_name: Segmind
|
|
989
|
+
access: open
|
|
990
|
+
num_parameters: 1000000000
|
|
991
|
+
release_date: 2023-10-20
|
|
992
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
993
|
+
|
|
994
|
+
- name: stabilityai/stable-diffusion-xl-base-1.0
|
|
995
|
+
display_name: Stable Diffusion XL
|
|
996
|
+
description: Stable Diffusion XL (SDXL) consists of an ensemble of experts pipeline for latent diffusion. ([HuggingFace model card](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0))
|
|
997
|
+
creator_organization_name: Stability AI
|
|
998
|
+
access: open
|
|
999
|
+
num_parameters: 6600000000
|
|
1000
|
+
release_date: 2023-07-26
|
|
1001
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1002
|
+
|
|
1003
|
+
# Kakao
|
|
1004
|
+
- name: kakaobrain/mindall-e
|
|
1005
|
+
display_name: minDALL-E (1.3B)
|
|
1006
|
+
description: minDALL-E, named after minGPT, is an autoregressive text-to-image generation model trained on 14 million image-text pairs ([code](https://github.com/kakaobrain/minDALL-E))
|
|
1007
|
+
creator_organization_name: Kakao
|
|
1008
|
+
access: open
|
|
1009
|
+
num_parameters: 1300000000
|
|
1010
|
+
release_date: 2021-12-13
|
|
1011
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1012
|
+
|
|
1013
|
+
# Lexica
|
|
1014
|
+
- name: lexica/search-stable-diffusion-1.5
|
|
1015
|
+
display_name: Lexica Search with Stable Diffusion v1.5 (1B)
|
|
1016
|
+
description: Retrieves Stable Diffusion v1.5 images Lexica users generated ([docs](https://lexica.art/docs)).
|
|
1017
|
+
creator_organization_name: Lexica
|
|
1018
|
+
access: open
|
|
1019
|
+
release_date: 2023-01-01
|
|
1020
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
620
1021
|
|
|
621
1022
|
|
|
622
1023
|
# Lightning AI
|
|
@@ -791,6 +1192,23 @@ models:
|
|
|
791
1192
|
# TODO(#1828): Upgrade to FULL_FUNCTIONALITY_TEXT_MODEL_TAG
|
|
792
1193
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
793
1194
|
|
|
1195
|
+
- name: meta/llama-3-8b
|
|
1196
|
+
display_name: Llama 3 (8B)
|
|
1197
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability.
|
|
1198
|
+
creator_organization_name: Meta
|
|
1199
|
+
access: open
|
|
1200
|
+
num_parameters: 8000000000
|
|
1201
|
+
release_date: 2024-04-18
|
|
1202
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1203
|
+
|
|
1204
|
+
- name: meta/llama-3-70b
|
|
1205
|
+
display_name: Llama 3 (70B)
|
|
1206
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability.
|
|
1207
|
+
creator_organization_name: Meta
|
|
1208
|
+
access: open
|
|
1209
|
+
num_parameters: 70000000000
|
|
1210
|
+
release_date: 2024-04-18
|
|
1211
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
794
1212
|
|
|
795
1213
|
|
|
796
1214
|
# Microsoft/NVIDIA
|
|
@@ -801,7 +1219,7 @@ models:
|
|
|
801
1219
|
access: closed
|
|
802
1220
|
num_parameters: 530000000000
|
|
803
1221
|
release_date: 2022-01-28
|
|
804
|
-
tags: [
|
|
1222
|
+
tags: [] # deprecated text model
|
|
805
1223
|
|
|
806
1224
|
- name: microsoft/TNLGv2_7B
|
|
807
1225
|
display_name: TNLG v2 (6.7B)
|
|
@@ -810,7 +1228,44 @@ models:
|
|
|
810
1228
|
access: closed
|
|
811
1229
|
num_parameters: 6700000000
|
|
812
1230
|
release_date: 2022-01-28
|
|
813
|
-
tags: [
|
|
1231
|
+
tags: [] # deprecated text model
|
|
1232
|
+
|
|
1233
|
+
- name: microsoft/llava-1.5-7b-hf
|
|
1234
|
+
display_name: LLaVA 1.5 (7B)
|
|
1235
|
+
description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
|
|
1236
|
+
creator_organization_name: Microsoft
|
|
1237
|
+
access: open
|
|
1238
|
+
num_parameters: 7000000000
|
|
1239
|
+
release_date: 2023-10-05
|
|
1240
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1241
|
+
|
|
1242
|
+
- name: microsoft/llava-1.5-13b-hf
|
|
1243
|
+
display_name: LLaVA 1.5 (13B)
|
|
1244
|
+
description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
|
|
1245
|
+
creator_organization_name: Microsoft
|
|
1246
|
+
access: open
|
|
1247
|
+
num_parameters: 13000000000
|
|
1248
|
+
release_date: 2023-10-05
|
|
1249
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1250
|
+
|
|
1251
|
+
|
|
1252
|
+
- name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
|
|
1253
|
+
display_name: OpenFlamingo (9B)
|
|
1254
|
+
description: OpenFlamingo is an open source implementation of DeepMind's Flamingo models. This 9B-parameter model uses a CLIP ViT-L/14 vision encoder and MPT-7B language model. ([paper](https://arxiv.org/abs/2308.01390))
|
|
1255
|
+
creator_organization_name: OpenFlamingo
|
|
1256
|
+
access: open
|
|
1257
|
+
num_parameters: 9000000000
|
|
1258
|
+
release_date: 2023-08-02
|
|
1259
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, OPEN_FLAMINGO_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1260
|
+
|
|
1261
|
+
- name: microsoft/phi-2
|
|
1262
|
+
display_name: Phi-2
|
|
1263
|
+
description: Phi-2 is a Transformer with 2.7 billion parameters. It was trained using the same data sources as Phi-1.5, augmented with a new data source that consists of various NLP synthetic texts and filtered websites (for safety and educational value)
|
|
1264
|
+
creator_organization_name: Microsoft
|
|
1265
|
+
access: open
|
|
1266
|
+
num_parameters: 13000000000
|
|
1267
|
+
release_date: 2023-10-05
|
|
1268
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
814
1269
|
|
|
815
1270
|
|
|
816
1271
|
|
|
@@ -833,6 +1288,37 @@ models:
|
|
|
833
1288
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
834
1289
|
|
|
835
1290
|
|
|
1291
|
+
# Allen Institute for AI
|
|
1292
|
+
# OLMo Blog: https://blog.allenai.org/olmo-open-language-model-87ccfc95f580
|
|
1293
|
+
- name: allenai/olmo-7b
|
|
1294
|
+
display_name: OLMo (7B)
|
|
1295
|
+
description: OLMo is a series of Open Language Models trained on the Dolma dataset.
|
|
1296
|
+
creator_organization_name: Allen Institute for AI
|
|
1297
|
+
access: open
|
|
1298
|
+
num_parameters: 7000000000
|
|
1299
|
+
release_date: 2024-02-01
|
|
1300
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1301
|
+
|
|
1302
|
+
- name: allenai/olmo-7b-twin-2t
|
|
1303
|
+
display_name: OLMo (7B Twin 2T)
|
|
1304
|
+
description: OLMo is a series of Open Language Models trained on the Dolma dataset.
|
|
1305
|
+
creator_organization_name: Allen Institute for AI
|
|
1306
|
+
access: open
|
|
1307
|
+
num_parameters: 7000000000
|
|
1308
|
+
release_date: 2024-02-01
|
|
1309
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1310
|
+
|
|
1311
|
+
- name: allenai/olmo-7b-instruct
|
|
1312
|
+
display_name: OLMo (7B Instruct)
|
|
1313
|
+
description: OLMo is a series of Open Language Models trained on the Dolma dataset. The instruct versions was trained on the Tulu SFT mixture and a cleaned version of the UltraFeedback dataset.
|
|
1314
|
+
creator_organization_name: Allen Institute for AI
|
|
1315
|
+
access: open
|
|
1316
|
+
num_parameters: 7000000000
|
|
1317
|
+
release_date: 2024-02-01
|
|
1318
|
+
# TODO: Add instruct tag.
|
|
1319
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1320
|
+
|
|
1321
|
+
|
|
836
1322
|
# Mistral AI
|
|
837
1323
|
- name: mistralai/mistral-7b-v0.1
|
|
838
1324
|
display_name: Mistral v0.1 (7B)
|
|
@@ -841,17 +1327,72 @@ models:
|
|
|
841
1327
|
access: open
|
|
842
1328
|
num_parameters: 7300000000
|
|
843
1329
|
release_date: 2023-09-27
|
|
844
|
-
tags: [TEXT_MODEL_TAG,
|
|
1330
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
845
1331
|
|
|
846
1332
|
- name: mistralai/mixtral-8x7b-32kseqlen
|
|
847
1333
|
display_name: Mixtral (8x7B 32K seqlen)
|
|
848
1334
|
description: Mistral AI's mixture-of-experts model ([tweet](https://twitter.com/MistralAI/status/1733150512395038967)).
|
|
849
1335
|
creator_organization_name: Mistral AI
|
|
850
1336
|
access: open
|
|
851
|
-
num_parameters:
|
|
1337
|
+
num_parameters: 46700000000
|
|
852
1338
|
release_date: 2023-12-08
|
|
853
|
-
tags: [TEXT_MODEL_TAG,
|
|
1339
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1340
|
+
|
|
1341
|
+
- name: mistralai/mixtral-8x7b-instruct-v0.1
|
|
1342
|
+
display_name: Mixtral (8x7B Instruct)
|
|
1343
|
+
description: Mixtral (8x7B Instruct) is a version of Mixtral (8x7B) that was optimized through supervised fine-tuning and direct preference optimisation (DPO) for careful instruction following.
|
|
1344
|
+
creator_organization_name: Mistral AI
|
|
1345
|
+
access: open
|
|
1346
|
+
num_parameters: 46700000000
|
|
1347
|
+
# Blog post: https://mistral.ai/news/mixtral-of-experts/
|
|
1348
|
+
release_date: 2023-12-11
|
|
1349
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
|
|
1350
|
+
|
|
1351
|
+
- name: mistralai/mixtral-8x22b
|
|
1352
|
+
display_name: Mixtral (8x22B)
|
|
1353
|
+
description: Mistral AI's mixture-of-experts model ([tweet](https://twitter.com/MistralAI/status/1777869263778291896)).
|
|
1354
|
+
creator_organization_name: Mistral AI
|
|
1355
|
+
access: open
|
|
1356
|
+
num_parameters: 176000000000
|
|
1357
|
+
release_date: 2024-04-10
|
|
1358
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1359
|
+
|
|
1360
|
+
- name: mistralai/bakLlava-v1-hf
|
|
1361
|
+
display_name: BakLLaVA v1 (7B)
|
|
1362
|
+
description: BakLLaVA v1 is a Mistral 7B base augmented with the LLaVA 1.5 architecture. ([blog](https://huggingface.co/llava-hf/bakLlava-v1-hf))
|
|
1363
|
+
creator_organization_name: Mistral AI
|
|
1364
|
+
access: open
|
|
1365
|
+
num_parameters: 7000000000
|
|
1366
|
+
release_date: 2023-10-16
|
|
1367
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1368
|
+
|
|
1369
|
+
- name: mistralai/mistral-small-2402
|
|
1370
|
+
display_name: Mistral Small (2402)
|
|
1371
|
+
# TODO: Fill in description
|
|
1372
|
+
description: TBD
|
|
1373
|
+
creator_organization_name: Mistral AI
|
|
1374
|
+
access: limited
|
|
1375
|
+
# Blog post: https://mistral.ai/news/mistral-large/
|
|
1376
|
+
release_date: 2023-02-26
|
|
1377
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
|
|
1378
|
+
|
|
1379
|
+
- name: mistralai/mistral-medium-2312
|
|
1380
|
+
display_name: Mistral Medium (2312)
|
|
1381
|
+
description: Mistral is a transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA).
|
|
1382
|
+
creator_organization_name: Mistral AI
|
|
1383
|
+
access: limited
|
|
1384
|
+
release_date: 2023-12-11
|
|
1385
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
|
|
854
1386
|
|
|
1387
|
+
- name: mistralai/mistral-large-2402
|
|
1388
|
+
display_name: Mistral Large (2402)
|
|
1389
|
+
# TODO: Fill in description
|
|
1390
|
+
description: TBD
|
|
1391
|
+
creator_organization_name: Mistral AI
|
|
1392
|
+
access: limited
|
|
1393
|
+
# Blog post: https://mistral.ai/news/mistral-large/
|
|
1394
|
+
release_date: 2023-02-26
|
|
1395
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
|
|
855
1396
|
|
|
856
1397
|
|
|
857
1398
|
# MosaicML
|
|
@@ -950,6 +1491,23 @@ models:
|
|
|
950
1491
|
|
|
951
1492
|
## GPT 3 Models
|
|
952
1493
|
# The list of models can be found here: https://beta.openai.com/docs/engines/gpt-3
|
|
1494
|
+
|
|
1495
|
+
- name: openai/davinci-002
|
|
1496
|
+
display_name: davinci-002
|
|
1497
|
+
description: Replacement for the GPT-3 curie and davinci base models.
|
|
1498
|
+
creator_organization_name: OpenAI
|
|
1499
|
+
access: limited
|
|
1500
|
+
release_date: 2023-08-22
|
|
1501
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1502
|
+
|
|
1503
|
+
- name: openai/babbage-002
|
|
1504
|
+
display_name: babbage-002
|
|
1505
|
+
description: Replacement for the GPT-3 ada and babbage base models.
|
|
1506
|
+
creator_organization_name: OpenAI
|
|
1507
|
+
access: limited
|
|
1508
|
+
release_date: 2023-08-22
|
|
1509
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1510
|
+
|
|
953
1511
|
# DEPRECATED: Announced on July 06 2023 that these models will be shut down on January 04 2024.
|
|
954
1512
|
|
|
955
1513
|
- name: openai/davinci # DEPRECATED
|
|
@@ -1048,9 +1606,17 @@ models:
|
|
|
1048
1606
|
## GPT 3.5 Turbo Models
|
|
1049
1607
|
# ChatGPT: https://openai.com/blog/chatgpt
|
|
1050
1608
|
|
|
1609
|
+
- name: openai/gpt-3.5-turbo-instruct
|
|
1610
|
+
display_name: GPT-3.5 Turbo Instruct
|
|
1611
|
+
description: Similar capabilities as GPT-3 era models. Compatible with legacy Completions endpoint and not Chat Completions.
|
|
1612
|
+
creator_organization_name: OpenAI
|
|
1613
|
+
access: limited
|
|
1614
|
+
release_date: 2023-09-18
|
|
1615
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1616
|
+
|
|
1051
1617
|
- name: openai/gpt-3.5-turbo-0301
|
|
1052
1618
|
display_name: GPT-3.5 Turbo (0301)
|
|
1053
|
-
description: Sibling model of text-davinci-003 is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-03-01.
|
|
1619
|
+
description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-03-01.
|
|
1054
1620
|
creator_organization_name: OpenAI
|
|
1055
1621
|
access: limited
|
|
1056
1622
|
release_date: 2023-03-01
|
|
@@ -1058,17 +1624,36 @@ models:
|
|
|
1058
1624
|
|
|
1059
1625
|
- name: openai/gpt-3.5-turbo-0613
|
|
1060
1626
|
display_name: GPT-3.5 Turbo (0613)
|
|
1061
|
-
description: Sibling model of text-davinci-003 is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13.
|
|
1627
|
+
description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13.
|
|
1062
1628
|
creator_organization_name: OpenAI
|
|
1063
1629
|
access: limited
|
|
1064
1630
|
release_date: 2023-06-13
|
|
1065
1631
|
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1066
1632
|
|
|
1067
|
-
|
|
1068
|
-
|
|
1633
|
+
- name: openai/gpt-3.5-turbo-1106
|
|
1634
|
+
display_name: GPT-3.5 Turbo (1106)
|
|
1635
|
+
description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-11-06.
|
|
1636
|
+
creator_organization_name: OpenAI
|
|
1637
|
+
access: limited
|
|
1638
|
+
# Actual release blog post was published on 2024-01-25:
|
|
1639
|
+
# https://openai.com/blog/new-embedding-models-and-api-updates
|
|
1640
|
+
release_date: 2024-01-25
|
|
1641
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1642
|
+
|
|
1643
|
+
- name: openai/gpt-3.5-turbo-0125
|
|
1644
|
+
display_name: gpt-3.5-turbo-0125
|
|
1645
|
+
description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2024-01-25.
|
|
1646
|
+
creator_organization_name: OpenAI
|
|
1647
|
+
access: limited
|
|
1648
|
+
# Release blog post was published on 2024-01-25:
|
|
1649
|
+
# https://openai.com/blog/new-embedding-models-and-api-updates
|
|
1650
|
+
# The actual release date is unclear - it was described as "next week".
|
|
1651
|
+
release_date: 2023-06-13
|
|
1652
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1653
|
+
|
|
1069
1654
|
- name: openai/gpt-3.5-turbo-16k-0613
|
|
1070
1655
|
display_name: gpt-3.5-turbo-16k-0613
|
|
1071
|
-
description: Sibling model of text-davinci-003 is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13 with a longer context length of 16,384 tokens.
|
|
1656
|
+
description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13 with a longer context length of 16,384 tokens.
|
|
1072
1657
|
creator_organization_name: OpenAI
|
|
1073
1658
|
access: limited
|
|
1074
1659
|
release_date: 2023-06-13
|
|
@@ -1079,7 +1664,7 @@ models:
|
|
|
1079
1664
|
|
|
1080
1665
|
- name: openai/gpt-4-1106-preview
|
|
1081
1666
|
display_name: GPT-4 Turbo (1106 preview)
|
|
1082
|
-
description: GPT-4 Turbo (preview) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Preview snapshot from
|
|
1667
|
+
description: GPT-4 Turbo (preview) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Preview snapshot from 2023-11-06.
|
|
1083
1668
|
creator_organization_name: OpenAI
|
|
1084
1669
|
access: limited
|
|
1085
1670
|
release_date: 2023-11-06
|
|
@@ -1087,7 +1672,7 @@ models:
|
|
|
1087
1672
|
|
|
1088
1673
|
- name: openai/gpt-4-0314
|
|
1089
1674
|
display_name: GPT-4 (0314)
|
|
1090
|
-
description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 from
|
|
1675
|
+
description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 from 2023-03-14.
|
|
1091
1676
|
creator_organization_name: OpenAI
|
|
1092
1677
|
access: limited
|
|
1093
1678
|
release_date: 2023-03-14
|
|
@@ -1117,6 +1702,31 @@ models:
|
|
|
1117
1702
|
release_date: 2023-06-13
|
|
1118
1703
|
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1119
1704
|
|
|
1705
|
+
- name: openai/gpt-4-0125-preview
|
|
1706
|
+
display_name: GPT-4 Turbo (0125 preview)
|
|
1707
|
+
description: GPT-4 Turbo (preview) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Preview snapshot from 2023-01-25. This snapshot is intended to reduce cases of “laziness” where the model doesn’t complete a task.
|
|
1708
|
+
creator_organization_name: OpenAI
|
|
1709
|
+
access: limited
|
|
1710
|
+
# Actual release blog post was published on 2024-01-25:
|
|
1711
|
+
# https://openai.com/blog/new-embedding-models-and-api-updates
|
|
1712
|
+
release_date: 2024-01-25
|
|
1713
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1714
|
+
|
|
1715
|
+
- name: openai/gpt-4-turbo-2024-04-09
|
|
1716
|
+
display_name: GPT-4 Turbo (2024-04-09)
|
|
1717
|
+
description: GPT-4 Turbo (2024-04-09) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Snapshot from 2024-04-09.
|
|
1718
|
+
creator_organization_name: OpenAI
|
|
1719
|
+
access: limited
|
|
1720
|
+
release_date: 2024-04-09
|
|
1721
|
+
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1722
|
+
|
|
1723
|
+
- name: openai/gpt-4-vision-preview
|
|
1724
|
+
display_name: GPT-4V (preview)
|
|
1725
|
+
description: GPT-4V is a large multimodal model that accepts both text and images and is optimized for chat but works well for traditional completions tasks.
|
|
1726
|
+
creator_organization_name: OpenAI
|
|
1727
|
+
access: limited
|
|
1728
|
+
release_date: 2023-11-06
|
|
1729
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
1120
1730
|
|
|
1121
1731
|
## Codex Models
|
|
1122
1732
|
# DEPRECATED: Codex models have been shut down on March 23 2023.
|
|
@@ -1198,7 +1808,109 @@ models:
|
|
|
1198
1808
|
release_date: 2022-12-15 # Blog post date
|
|
1199
1809
|
tags: [TEXT_SIMILARITY_MODEL_TAG]
|
|
1200
1810
|
|
|
1811
|
+
# Text-to-image models
|
|
1812
|
+
- name: openai/dall-e-2
|
|
1813
|
+
display_name: DALL-E 2 (3.5B)
|
|
1814
|
+
description: DALL-E 2 is a encoder-decoder-based latent diffusion model trained on large-scale paired text-image datasets. The model is available via the OpenAI API ([paper](https://arxiv.org/abs/2204.06125)).
|
|
1815
|
+
creator_organization_name: OpenAI
|
|
1816
|
+
access: limited
|
|
1817
|
+
num_parameters: 3500000000
|
|
1818
|
+
release_date: 2022-04-13
|
|
1819
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1201
1820
|
|
|
1821
|
+
- name: openai/dall-e-3
|
|
1822
|
+
display_name: DALL-E 3
|
|
1823
|
+
description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The default style, vivid, causes the model to lean towards generating hyper-real and dramatic images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
|
|
1824
|
+
creator_organization_name: OpenAI
|
|
1825
|
+
access: limited
|
|
1826
|
+
num_parameters: 0
|
|
1827
|
+
release_date: 2023-11-06
|
|
1828
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1829
|
+
|
|
1830
|
+
- name: openai/dall-e-3-natural
|
|
1831
|
+
display_name: DALL-E 3 (natural style)
|
|
1832
|
+
description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The natural style causes the model to produce more natural, less hyper-real looking images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
|
|
1833
|
+
creator_organization_name: OpenAI
|
|
1834
|
+
access: limited
|
|
1835
|
+
num_parameters: 0
|
|
1836
|
+
release_date: 2023-11-06
|
|
1837
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1838
|
+
|
|
1839
|
+
- name: openai/dall-e-3-hd
|
|
1840
|
+
display_name: DALL-E 3 HD
|
|
1841
|
+
description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The HD version creates images with finer details and greater consistency across the image, but generation is slower. The default style, vivid, causes the model to lean towards generating hyper-real and dramatic images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
|
|
1842
|
+
creator_organization_name: OpenAI
|
|
1843
|
+
access: limited
|
|
1844
|
+
num_parameters: 0
|
|
1845
|
+
release_date: 2023-11-06
|
|
1846
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1847
|
+
|
|
1848
|
+
- name: openai/dall-e-3-hd-natural
|
|
1849
|
+
display_name: DALL-E 3 HD (natural style)
|
|
1850
|
+
description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The HD version creates images with finer details and greater consistency across the image, but generation is slower. The natural style causes the model to produce more natural, less hyper-real looking images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
|
|
1851
|
+
creator_organization_name: OpenAI
|
|
1852
|
+
access: limited
|
|
1853
|
+
num_parameters: 0
|
|
1854
|
+
release_date: 2023-11-06
|
|
1855
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
1856
|
+
|
|
1857
|
+
# Qwen
|
|
1858
|
+
|
|
1859
|
+
- name: qwen/qwen-7b
|
|
1860
|
+
display_name: Qwen
|
|
1861
|
+
description: 7B-parameter version of the large language model series, Qwen (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
|
|
1862
|
+
creator_organization_name: Qwen
|
|
1863
|
+
access: open
|
|
1864
|
+
release_date: 2024-02-05
|
|
1865
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1866
|
+
|
|
1867
|
+
- name: qwen/qwen1.5-7b
|
|
1868
|
+
display_name: Qwen1.5 (7B)
|
|
1869
|
+
description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
|
|
1870
|
+
creator_organization_name: Qwen
|
|
1871
|
+
access: open
|
|
1872
|
+
release_date: 2024-02-05
|
|
1873
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1874
|
+
|
|
1875
|
+
- name: qwen/qwen1.5-14b
|
|
1876
|
+
display_name: Qwen1.5 (14B)
|
|
1877
|
+
description: 14B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
|
|
1878
|
+
creator_organization_name: Qwen
|
|
1879
|
+
access: open
|
|
1880
|
+
release_date: 2024-02-05
|
|
1881
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1882
|
+
|
|
1883
|
+
- name: qwen/qwen1.5-32b
|
|
1884
|
+
display_name: Qwen1.5 (32B)
|
|
1885
|
+
description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
|
|
1886
|
+
creator_organization_name: Qwen
|
|
1887
|
+
access: open
|
|
1888
|
+
release_date: 2024-02-05
|
|
1889
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1890
|
+
|
|
1891
|
+
- name: qwen/qwen1.5-72b
|
|
1892
|
+
display_name: Qwen1.5 (72B)
|
|
1893
|
+
description: 72B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
|
|
1894
|
+
creator_organization_name: Qwen
|
|
1895
|
+
access: open
|
|
1896
|
+
release_date: 2024-02-05
|
|
1897
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1898
|
+
|
|
1899
|
+
- name: qwen/qwen-vl
|
|
1900
|
+
display_name: Qwen-VL
|
|
1901
|
+
description: Visual multimodal version of the large model series ([paper](https://arxiv.org/abs/2308.12966)).
|
|
1902
|
+
creator_organization_name: Alibaba Cloud
|
|
1903
|
+
access: open
|
|
1904
|
+
release_date: 2023-08-24
|
|
1905
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
1906
|
+
|
|
1907
|
+
- name: qwen/qwen-vl-chat
|
|
1908
|
+
display_name: Qwen-VL Chat
|
|
1909
|
+
description: Chat version of the visual multimodal model Qwen ([paper](https://arxiv.org/abs/2308.12966)).
|
|
1910
|
+
creator_organization_name: Alibaba Cloud
|
|
1911
|
+
access: open
|
|
1912
|
+
release_date: 2023-08-24
|
|
1913
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
1202
1914
|
|
|
1203
1915
|
# Salesforce
|
|
1204
1916
|
- name: salesforce/codegen # NOT SUPPORTED
|
|
@@ -1351,6 +2063,16 @@ models:
|
|
|
1351
2063
|
|
|
1352
2064
|
|
|
1353
2065
|
# Tsinghua
|
|
2066
|
+
|
|
2067
|
+
- name: thudm/cogview2
|
|
2068
|
+
display_name: CogView2 (6B)
|
|
2069
|
+
description: CogView2 is a hierarchical transformer (6B-9B-9B parameters) for text-to-image generation that supports both English and Chinese input text ([paper](https://arxiv.org/abs/2105.13290))
|
|
2070
|
+
creator_organization_name: Tsinghua
|
|
2071
|
+
access: open
|
|
2072
|
+
num_parameters: 6000000000
|
|
2073
|
+
release_date: 2022-06-15
|
|
2074
|
+
tags: [TEXT_TO_IMAGE_MODEL_TAG]
|
|
2075
|
+
|
|
1354
2076
|
- name: tsinghua/glm
|
|
1355
2077
|
display_name: GLM (130B)
|
|
1356
2078
|
description: GLM (130B parameters) is an open bilingual (English & Chinese) bidirectional dense model that was trained using General Language Model (GLM) procedure ([paper](https://arxiv.org/pdf/2210.02414.pdf)).
|