crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (546) hide show
  1. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/METADATA +144 -36
  2. crfm_helm-0.5.0.dist-info/RECORD +642 -0
  3. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +37 -2
  5. helm/benchmark/adaptation/adapters/adapter.py +4 -42
  6. helm/benchmark/adaptation/adapters/adapter_factory.py +24 -27
  7. helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -0
  8. helm/benchmark/adaptation/adapters/generation_adapter.py +2 -0
  9. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +21 -4
  10. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +12 -5
  11. helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +1 -0
  12. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
  13. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +104 -0
  14. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +5 -1
  15. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +1 -0
  16. helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -0
  17. helm/benchmark/adaptation/adapters/test_adapter.py +2 -1
  18. helm/benchmark/adaptation/adapters/test_generation_adapter.py +59 -14
  19. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +40 -5
  20. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +78 -10
  21. helm/benchmark/adaptation/common_adapter_specs.py +376 -0
  22. helm/benchmark/adaptation/prompt.py +7 -1
  23. helm/benchmark/adaptation/request_state.py +6 -1
  24. helm/benchmark/adaptation/scenario_state.py +6 -2
  25. helm/benchmark/annotation/annotator.py +43 -0
  26. helm/benchmark/annotation/annotator_factory.py +61 -0
  27. helm/benchmark/annotation/image2structure/image_compiler_annotator.py +88 -0
  28. helm/benchmark/annotation/image2structure/latex_compiler_annotator.py +59 -0
  29. helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +84 -0
  30. helm/benchmark/annotation/image2structure/webpage_compiler_annotator.py +132 -0
  31. helm/benchmark/annotation/test_annotator_factory.py +26 -0
  32. helm/benchmark/annotation/test_dummy_annotator.py +44 -0
  33. helm/benchmark/annotation_executor.py +124 -0
  34. helm/benchmark/augmentations/cleva_perturbation.py +7 -14
  35. helm/benchmark/augmentations/contraction_expansion_perturbation.py +3 -3
  36. helm/benchmark/augmentations/contrast_sets_perturbation.py +0 -3
  37. helm/benchmark/augmentations/data_augmenter.py +0 -2
  38. helm/benchmark/augmentations/dialect_perturbation.py +2 -2
  39. helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
  40. helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
  41. helm/benchmark/augmentations/gender_perturbation.py +3 -3
  42. helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
  43. helm/benchmark/augmentations/mild_mix_perturbation.py +2 -2
  44. helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
  45. helm/benchmark/augmentations/person_name_perturbation.py +0 -7
  46. helm/benchmark/augmentations/perturbation.py +20 -7
  47. helm/benchmark/augmentations/perturbation_description.py +1 -1
  48. helm/benchmark/augmentations/space_perturbation.py +2 -2
  49. helm/benchmark/augmentations/suffix_perturbation.py +29 -0
  50. helm/benchmark/augmentations/synonym_perturbation.py +2 -2
  51. helm/benchmark/augmentations/test_perturbation.py +11 -7
  52. helm/benchmark/augmentations/translate_perturbation.py +30 -0
  53. helm/benchmark/augmentations/typos_perturbation.py +2 -2
  54. helm/benchmark/config_registry.py +38 -0
  55. helm/benchmark/executor.py +46 -16
  56. helm/benchmark/huggingface_registration.py +37 -7
  57. helm/benchmark/metrics/basic_metrics.py +172 -641
  58. helm/benchmark/metrics/bbq_metrics.py +3 -4
  59. helm/benchmark/metrics/bias_metrics.py +6 -6
  60. helm/benchmark/metrics/classification_metrics.py +11 -8
  61. helm/benchmark/metrics/cleva_accuracy_metrics.py +8 -5
  62. helm/benchmark/metrics/cleva_harms_metrics.py +2 -2
  63. helm/benchmark/metrics/code_metrics.py +4 -3
  64. helm/benchmark/metrics/code_metrics_helper.py +0 -2
  65. helm/benchmark/metrics/common_metric_specs.py +167 -0
  66. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +72 -0
  67. helm/benchmark/metrics/decodingtrust_ood_knowledge_metrics.py +66 -0
  68. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +101 -0
  69. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +202 -0
  70. helm/benchmark/metrics/disinformation_metrics.py +6 -112
  71. helm/benchmark/metrics/dry_run_metrics.py +5 -3
  72. helm/benchmark/metrics/efficiency_metrics.py +206 -0
  73. helm/benchmark/metrics/evaluate_instances_metric.py +59 -0
  74. helm/benchmark/metrics/evaluate_reference_metrics.py +376 -0
  75. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +54 -0
  76. helm/benchmark/metrics/image_generation/aesthetics_scorer.py +66 -0
  77. helm/benchmark/metrics/image_generation/clip_score_metrics.py +73 -0
  78. helm/benchmark/metrics/image_generation/denoised_runtime_metric.py +42 -0
  79. helm/benchmark/metrics/image_generation/detection_metrics.py +57 -0
  80. helm/benchmark/metrics/image_generation/detectors/base_detector.py +8 -0
  81. helm/benchmark/metrics/image_generation/detectors/vitdet.py +178 -0
  82. helm/benchmark/metrics/image_generation/efficiency_metrics.py +41 -0
  83. helm/benchmark/metrics/image_generation/fidelity_metrics.py +168 -0
  84. helm/benchmark/metrics/image_generation/fractal_dimension/__init__.py +0 -0
  85. helm/benchmark/metrics/image_generation/fractal_dimension/fractal_dimension_util.py +63 -0
  86. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +33 -0
  87. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +50 -0
  88. helm/benchmark/metrics/image_generation/gender_metrics.py +58 -0
  89. helm/benchmark/metrics/image_generation/image_critique_metrics.py +284 -0
  90. helm/benchmark/metrics/image_generation/lpips_metrics.py +82 -0
  91. helm/benchmark/metrics/image_generation/multi_scale_ssim_metrics.py +82 -0
  92. helm/benchmark/metrics/image_generation/nsfw_detector.py +96 -0
  93. helm/benchmark/metrics/image_generation/nsfw_metrics.py +103 -0
  94. helm/benchmark/metrics/image_generation/nudity_metrics.py +38 -0
  95. helm/benchmark/metrics/image_generation/photorealism_critique_metrics.py +153 -0
  96. helm/benchmark/metrics/image_generation/psnr_metrics.py +78 -0
  97. helm/benchmark/metrics/image_generation/q16/__init__.py +0 -0
  98. helm/benchmark/metrics/image_generation/q16/q16_toxicity_detector.py +90 -0
  99. helm/benchmark/metrics/image_generation/q16/test_q16.py +18 -0
  100. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +48 -0
  101. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +164 -0
  102. helm/benchmark/metrics/image_generation/uiqi_metrics.py +92 -0
  103. helm/benchmark/metrics/image_generation/watermark/__init__.py +0 -0
  104. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +16 -0
  105. helm/benchmark/metrics/image_generation/watermark/watermark_detector.py +87 -0
  106. helm/benchmark/metrics/image_generation/watermark_metrics.py +48 -0
  107. helm/benchmark/metrics/instruction_following_critique_metrics.py +3 -1
  108. helm/benchmark/metrics/language_modeling_metrics.py +99 -0
  109. helm/benchmark/metrics/machine_translation_metrics.py +5 -5
  110. helm/benchmark/metrics/metric.py +93 -172
  111. helm/benchmark/metrics/metric_name.py +0 -1
  112. helm/benchmark/metrics/metric_service.py +16 -0
  113. helm/benchmark/metrics/paraphrase_generation_metrics.py +3 -4
  114. helm/benchmark/metrics/ranking_metrics.py +6 -7
  115. helm/benchmark/metrics/reference_metric.py +148 -0
  116. helm/benchmark/metrics/summac/model_summac.py +0 -2
  117. helm/benchmark/metrics/summarization_metrics.py +8 -8
  118. helm/benchmark/metrics/test_classification_metrics.py +9 -6
  119. helm/benchmark/metrics/test_disinformation_metrics.py +78 -0
  120. helm/benchmark/metrics/test_evaluate_reference_metrics.py +30 -0
  121. helm/benchmark/metrics/test_metric.py +2 -2
  122. helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +1 -1
  123. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +13 -3
  124. helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
  125. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -0
  126. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +9 -2
  127. helm/benchmark/metrics/toxicity_metrics.py +1 -1
  128. helm/benchmark/metrics/toxicity_utils.py +23 -0
  129. helm/benchmark/metrics/unitxt_metrics.py +81 -0
  130. helm/benchmark/metrics/vision_language/__init__.py +0 -0
  131. helm/benchmark/metrics/vision_language/emd_utils.py +341 -0
  132. helm/benchmark/metrics/vision_language/image_metrics.py +450 -0
  133. helm/benchmark/metrics/vision_language/image_utils.py +100 -0
  134. helm/benchmark/model_deployment_registry.py +164 -41
  135. helm/benchmark/model_metadata_registry.py +181 -35
  136. helm/benchmark/multi_gpu_runner.py +133 -0
  137. helm/benchmark/presentation/contamination.py +3 -3
  138. helm/benchmark/presentation/create_plots.py +8 -7
  139. helm/benchmark/presentation/run_display.py +50 -17
  140. helm/benchmark/presentation/schema.py +28 -46
  141. helm/benchmark/presentation/summarize.py +213 -96
  142. helm/benchmark/presentation/table.py +8 -8
  143. helm/benchmark/presentation/test_contamination.py +2 -2
  144. helm/benchmark/presentation/test_run_entry.py +14 -9
  145. helm/benchmark/presentation/test_summarize.py +5 -0
  146. helm/benchmark/run.py +66 -54
  147. helm/benchmark/run_expander.py +342 -31
  148. helm/benchmark/run_spec.py +93 -0
  149. helm/benchmark/run_spec_factory.py +162 -0
  150. helm/benchmark/run_specs/__init__.py +0 -0
  151. helm/benchmark/{run_specs.py → run_specs/classic_run_specs.py} +217 -1330
  152. helm/benchmark/run_specs/cleva_run_specs.py +277 -0
  153. helm/benchmark/run_specs/decodingtrust_run_specs.py +314 -0
  154. helm/benchmark/run_specs/heim_run_specs.py +623 -0
  155. helm/benchmark/run_specs/instruction_following_run_specs.py +129 -0
  156. helm/benchmark/run_specs/lite_run_specs.py +307 -0
  157. helm/benchmark/run_specs/simple_run_specs.py +104 -0
  158. helm/benchmark/run_specs/unitxt_run_specs.py +42 -0
  159. helm/benchmark/run_specs/vlm_run_specs.py +501 -0
  160. helm/benchmark/runner.py +116 -69
  161. helm/benchmark/runner_config_registry.py +21 -0
  162. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  163. helm/benchmark/scenarios/bold_scenario.py +2 -2
  164. helm/benchmark/scenarios/cleva_scenario.py +43 -46
  165. helm/benchmark/scenarios/code_scenario.py +3 -2
  166. helm/benchmark/scenarios/commonsense_scenario.py +171 -191
  167. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +169 -0
  168. helm/benchmark/scenarios/decodingtrust_adv_robustness_scenario.py +121 -0
  169. helm/benchmark/scenarios/decodingtrust_fairness_scenario.py +77 -0
  170. helm/benchmark/scenarios/decodingtrust_machine_ethics_scenario.py +324 -0
  171. helm/benchmark/scenarios/decodingtrust_ood_robustness_scenario.py +204 -0
  172. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +559 -0
  173. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +67 -0
  174. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +78 -0
  175. helm/benchmark/scenarios/dialogue_scenarios.py +0 -1
  176. helm/benchmark/scenarios/entity_matching_scenario.py +1 -1
  177. helm/benchmark/scenarios/image_generation/__init__.py +0 -0
  178. helm/benchmark/scenarios/image_generation/common_syntactic_processes_scenario.py +105 -0
  179. helm/benchmark/scenarios/image_generation/cub200_scenario.py +95 -0
  180. helm/benchmark/scenarios/image_generation/daily_dalle_scenario.py +124 -0
  181. helm/benchmark/scenarios/image_generation/demographic_stereotypes_scenario.py +82 -0
  182. helm/benchmark/scenarios/image_generation/detection_scenario.py +83 -0
  183. helm/benchmark/scenarios/image_generation/draw_bench_scenario.py +74 -0
  184. helm/benchmark/scenarios/image_generation/i2p_scenario.py +57 -0
  185. helm/benchmark/scenarios/image_generation/landing_page_scenario.py +46 -0
  186. helm/benchmark/scenarios/image_generation/logos_scenario.py +223 -0
  187. helm/benchmark/scenarios/image_generation/magazine_cover_scenario.py +91 -0
  188. helm/benchmark/scenarios/image_generation/mental_disorders_scenario.py +46 -0
  189. helm/benchmark/scenarios/image_generation/mscoco_scenario.py +91 -0
  190. helm/benchmark/scenarios/image_generation/paint_skills_scenario.py +72 -0
  191. helm/benchmark/scenarios/image_generation/parti_prompts_scenario.py +94 -0
  192. helm/benchmark/scenarios/image_generation/radiology_scenario.py +42 -0
  193. helm/benchmark/scenarios/image_generation/relational_understanding_scenario.py +52 -0
  194. helm/benchmark/scenarios/image_generation/time_most_significant_historical_figures_scenario.py +124 -0
  195. helm/benchmark/scenarios/image_generation/winoground_scenario.py +62 -0
  196. helm/benchmark/scenarios/imdb_scenario.py +0 -1
  197. helm/benchmark/scenarios/legalbench_scenario.py +123 -0
  198. helm/benchmark/scenarios/live_qa_scenario.py +94 -0
  199. helm/benchmark/scenarios/lm_entry_scenario.py +185 -0
  200. helm/benchmark/scenarios/lsat_qa_scenario.py +4 -2
  201. helm/benchmark/scenarios/math_scenario.py +19 -2
  202. helm/benchmark/scenarios/medication_qa_scenario.py +60 -0
  203. helm/benchmark/scenarios/numeracy_scenario.py +3 -3
  204. helm/benchmark/scenarios/opinions_qa_scenario.py +6 -10
  205. helm/benchmark/scenarios/raft_scenario.py +2 -6
  206. helm/benchmark/scenarios/scenario.py +14 -2
  207. helm/benchmark/scenarios/simple_scenarios.py +122 -1
  208. helm/benchmark/scenarios/test_math_scenario.py +22 -0
  209. helm/benchmark/scenarios/test_scenario.py +6 -3
  210. helm/benchmark/scenarios/test_simple_scenarios.py +50 -0
  211. helm/benchmark/scenarios/thai_exam_scenario.py +135 -0
  212. helm/benchmark/scenarios/the_pile_scenario.py +6 -7
  213. helm/benchmark/scenarios/unitxt_scenario.py +56 -0
  214. helm/benchmark/scenarios/verifiability_judgment_scenario.py +3 -1
  215. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  216. helm/benchmark/scenarios/vision_language/bingo_scenario.py +103 -0
  217. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +92 -0
  218. helm/benchmark/scenarios/vision_language/heim_human_eval_scenario.py +113 -0
  219. helm/benchmark/scenarios/vision_language/image2structure/__init__.py +0 -0
  220. helm/benchmark/scenarios/vision_language/image2structure/chart2csv_scenario.py +55 -0
  221. helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +214 -0
  222. helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +25 -0
  223. helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +20 -0
  224. helm/benchmark/scenarios/vision_language/image2structure/utils_latex.py +347 -0
  225. helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
  226. helm/benchmark/scenarios/vision_language/image2structure/webpage/driver.py +84 -0
  227. helm/benchmark/scenarios/vision_language/image2structure/webpage/jekyll_server.py +182 -0
  228. helm/benchmark/scenarios/vision_language/image2structure/webpage/utils.py +31 -0
  229. helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +225 -0
  230. helm/benchmark/scenarios/vision_language/mementos_scenario.py +124 -0
  231. helm/benchmark/scenarios/vision_language/mme_scenario.py +145 -0
  232. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +187 -0
  233. helm/benchmark/scenarios/vision_language/multipanelvqa_scenario.py +169 -0
  234. helm/benchmark/scenarios/vision_language/pope_scenario.py +104 -0
  235. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +129 -0
  236. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +108 -0
  237. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +107 -0
  238. helm/benchmark/scenarios/vision_language/vqa_scenario.py +1 -1
  239. helm/benchmark/scenarios/wmt_14_scenario.py +18 -18
  240. helm/benchmark/server.py +59 -2
  241. helm/benchmark/slurm_jobs.py +12 -0
  242. helm/benchmark/slurm_runner.py +79 -51
  243. helm/benchmark/static/benchmarking.js +3 -4
  244. helm/benchmark/static/contamination.yaml +1 -1
  245. helm/benchmark/static/images/organizations/together.png +0 -0
  246. helm/benchmark/static/json-urls.js +4 -0
  247. helm/benchmark/static/{schema.yaml → schema_classic.yaml} +346 -930
  248. helm/benchmark/static/schema_instruction_following.yaml +210 -0
  249. helm/benchmark/static/schema_lite.yaml +824 -0
  250. helm/benchmark/static/schema_mmlu.yaml +1507 -0
  251. helm/benchmark/static/schema_unitxt.yaml +428 -0
  252. helm/benchmark/static/schema_vlm.yaml +576 -0
  253. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  254. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  255. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  256. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  257. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  258. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  259. helm/benchmark/static_build/assets/crfm-logo-74391ab8.png +0 -0
  260. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  261. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  262. helm/benchmark/static_build/assets/heim-logo-3e5e3aa4.png +0 -0
  263. helm/benchmark/static_build/assets/helm-logo-simple-2ed5400b.png +0 -0
  264. helm/benchmark/static_build/assets/helmhero-28e90f4d.png +0 -0
  265. helm/benchmark/static_build/assets/index-5088afcb.css +1 -0
  266. helm/benchmark/static_build/assets/index-d839df55.js +9 -0
  267. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  268. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  269. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  270. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  271. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  272. helm/benchmark/static_build/assets/react-d4a0b69b.js +85 -0
  273. helm/benchmark/static_build/assets/recharts-6d337683.js +97 -0
  274. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  275. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  276. helm/benchmark/static_build/assets/tremor-54a99cc4.js +10 -0
  277. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  278. helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  279. helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  280. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  281. helm/benchmark/static_build/config.js +4 -0
  282. helm/benchmark/static_build/index.html +20 -0
  283. helm/benchmark/test_data_preprocessor.py +3 -3
  284. helm/benchmark/test_model_deployment_definition.py +90 -0
  285. helm/benchmark/test_run_expander.py +1 -1
  286. helm/benchmark/tokenizer_config_registry.py +10 -14
  287. helm/benchmark/window_services/ai21_window_service.py +22 -33
  288. helm/benchmark/window_services/cohere_window_service.py +1 -63
  289. helm/benchmark/window_services/default_window_service.py +2 -35
  290. helm/benchmark/window_services/encoder_decoder_window_service.py +0 -11
  291. helm/benchmark/window_services/ice_window_service.py +0 -34
  292. helm/benchmark/window_services/image_generation/__init__.py +0 -0
  293. helm/benchmark/window_services/image_generation/clip_window_service.py +15 -0
  294. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +9 -0
  295. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +9 -0
  296. helm/benchmark/window_services/image_generation/test_clip_window_service.py +29 -0
  297. helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py +30 -0
  298. helm/benchmark/window_services/local_window_service.py +21 -4
  299. helm/benchmark/window_services/no_decoding_window_service.py +32 -0
  300. helm/benchmark/window_services/test_anthropic_window_service.py +2 -1
  301. helm/benchmark/window_services/test_bloom_window_service.py +2 -1
  302. helm/benchmark/window_services/test_cohere_window_service.py +2 -1
  303. helm/benchmark/window_services/test_flan_t5_window_service.py +2 -1
  304. helm/benchmark/window_services/test_gpt2_window_service.py +2 -2
  305. helm/benchmark/window_services/test_gpt4_window_service.py +2 -1
  306. helm/benchmark/window_services/test_gptj_window_service.py +3 -2
  307. helm/benchmark/window_services/test_gptneox_window_service.py +3 -2
  308. helm/benchmark/window_services/test_ice_window_service.py +2 -1
  309. helm/benchmark/window_services/test_openai_window_service.py +2 -1
  310. helm/benchmark/window_services/test_opt_window_service.py +3 -2
  311. helm/benchmark/window_services/test_palmyra_window_service.py +2 -1
  312. helm/benchmark/window_services/test_t0pp_window_service.py +2 -1
  313. helm/benchmark/window_services/test_t511b_window_service.py +2 -1
  314. helm/benchmark/window_services/test_ul2_window_service.py +2 -1
  315. helm/benchmark/window_services/test_utils.py +3 -2
  316. helm/benchmark/window_services/test_yalm_window_service.py +2 -1
  317. helm/benchmark/window_services/window_service.py +42 -0
  318. helm/benchmark/window_services/window_service_factory.py +24 -269
  319. helm/benchmark/window_services/yalm_window_service.py +0 -27
  320. helm/clients/__init__.py +0 -0
  321. helm/{proxy/clients → clients}/ai21_client.py +5 -12
  322. helm/clients/aleph_alpha_client.py +112 -0
  323. helm/{proxy/clients → clients}/anthropic_client.py +213 -24
  324. helm/clients/auto_client.py +215 -0
  325. helm/clients/bedrock_client.py +128 -0
  326. helm/clients/bedrock_utils.py +72 -0
  327. helm/{proxy/clients → clients}/client.py +67 -55
  328. helm/clients/clip_score_client.py +49 -0
  329. helm/clients/clip_scorers/__init__.py +0 -0
  330. helm/clients/clip_scorers/base_clip_scorer.py +18 -0
  331. helm/clients/clip_scorers/clip_scorer.py +50 -0
  332. helm/clients/clip_scorers/multilingual_clip_scorer.py +50 -0
  333. helm/{proxy/clients → clients}/cohere_client.py +6 -17
  334. helm/clients/gcs_client.py +82 -0
  335. helm/{proxy/clients → clients}/google_client.py +7 -8
  336. helm/clients/google_translate_client.py +35 -0
  337. helm/{proxy/clients → clients}/http_model_client.py +6 -10
  338. helm/{proxy/clients → clients}/huggingface_client.py +134 -92
  339. helm/clients/image_generation/__init__.py +0 -0
  340. helm/clients/image_generation/adobe_vision_client.py +78 -0
  341. helm/clients/image_generation/aleph_alpha_image_generation_client.py +98 -0
  342. helm/clients/image_generation/cogview2/__init__.py +0 -0
  343. helm/clients/image_generation/cogview2/coglm_strategy.py +96 -0
  344. helm/clients/image_generation/cogview2/coglm_utils.py +82 -0
  345. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +15 -0
  346. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +96 -0
  347. helm/clients/image_generation/cogview2/sr_pipeline/dsr_model.py +254 -0
  348. helm/clients/image_generation/cogview2/sr_pipeline/dsr_sampling.py +190 -0
  349. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +141 -0
  350. helm/clients/image_generation/cogview2/sr_pipeline/itersr_model.py +269 -0
  351. helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py +120 -0
  352. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +42 -0
  353. helm/clients/image_generation/cogview2_client.py +191 -0
  354. helm/clients/image_generation/dalle2_client.py +192 -0
  355. helm/clients/image_generation/dalle3_client.py +108 -0
  356. helm/clients/image_generation/dalle_mini/__init__.py +3 -0
  357. helm/clients/image_generation/dalle_mini/data.py +442 -0
  358. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -0
  359. helm/clients/image_generation/dalle_mini/model/configuration.py +175 -0
  360. helm/clients/image_generation/dalle_mini/model/modeling.py +1834 -0
  361. helm/clients/image_generation/dalle_mini/model/partitions.py +84 -0
  362. helm/clients/image_generation/dalle_mini/model/processor.py +63 -0
  363. helm/clients/image_generation/dalle_mini/model/text.py +251 -0
  364. helm/clients/image_generation/dalle_mini/model/tokenizer.py +9 -0
  365. helm/clients/image_generation/dalle_mini/model/utils.py +29 -0
  366. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -0
  367. helm/clients/image_generation/dalle_mini/vqgan_jax/configuration_vqgan.py +40 -0
  368. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +107 -0
  369. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +610 -0
  370. helm/clients/image_generation/dalle_mini_client.py +190 -0
  371. helm/clients/image_generation/deep_floyd_client.py +78 -0
  372. helm/clients/image_generation/huggingface_diffusers_client.py +249 -0
  373. helm/clients/image_generation/image_generation_client_utils.py +9 -0
  374. helm/clients/image_generation/lexica_client.py +86 -0
  375. helm/clients/image_generation/mindalle/__init__.py +0 -0
  376. helm/clients/image_generation/mindalle/models/__init__.py +216 -0
  377. helm/clients/image_generation/mindalle/models/stage1/__init__.py +0 -0
  378. helm/clients/image_generation/mindalle/models/stage1/layers.py +312 -0
  379. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +103 -0
  380. helm/clients/image_generation/mindalle/models/stage2/__init__.py +0 -0
  381. helm/clients/image_generation/mindalle/models/stage2/layers.py +144 -0
  382. helm/clients/image_generation/mindalle/models/stage2/transformer.py +268 -0
  383. helm/clients/image_generation/mindalle/models/tokenizer.py +30 -0
  384. helm/clients/image_generation/mindalle/utils/__init__.py +3 -0
  385. helm/clients/image_generation/mindalle/utils/config.py +129 -0
  386. helm/clients/image_generation/mindalle/utils/sampling.py +149 -0
  387. helm/clients/image_generation/mindalle/utils/utils.py +89 -0
  388. helm/clients/image_generation/mindalle_client.py +115 -0
  389. helm/clients/image_generation/nudity_check_client.py +64 -0
  390. helm/clients/image_generation/together_image_generation_client.py +111 -0
  391. helm/{proxy/clients → clients}/lit_gpt_client.py +7 -5
  392. helm/{proxy/clients → clients}/megatron_client.py +13 -7
  393. helm/clients/mistral_client.py +134 -0
  394. helm/clients/moderation_api_client.py +109 -0
  395. helm/clients/open_lm_client.py +43 -0
  396. helm/clients/openai_client.py +302 -0
  397. helm/{proxy/clients → clients}/palmyra_client.py +15 -12
  398. helm/{proxy/clients → clients}/perspective_api_client.py +7 -8
  399. helm/clients/simple_client.py +64 -0
  400. helm/{proxy/clients → clients}/test_auto_client.py +15 -15
  401. helm/clients/test_client.py +100 -0
  402. helm/clients/test_huggingface_client.py +70 -0
  403. helm/clients/test_simple_client.py +19 -0
  404. helm/{proxy/clients → clients}/test_together_client.py +23 -12
  405. helm/{proxy/clients → clients}/together_client.py +18 -71
  406. helm/clients/vertexai_client.py +391 -0
  407. helm/clients/vision_language/__init__.py +0 -0
  408. helm/clients/vision_language/huggingface_vlm_client.py +104 -0
  409. helm/{proxy/clients → clients}/vision_language/idefics_client.py +59 -52
  410. helm/clients/vision_language/open_flamingo/__init__.py +2 -0
  411. helm/clients/vision_language/open_flamingo/src/__init__.py +0 -0
  412. helm/clients/vision_language/open_flamingo/src/factory.py +147 -0
  413. helm/clients/vision_language/open_flamingo/src/flamingo.py +337 -0
  414. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +155 -0
  415. helm/clients/vision_language/open_flamingo/src/helpers.py +267 -0
  416. helm/clients/vision_language/open_flamingo/src/utils.py +47 -0
  417. helm/clients/vision_language/open_flamingo_client.py +155 -0
  418. helm/clients/vision_language/qwen_vlm_client.py +171 -0
  419. helm/clients/vllm_client.py +46 -0
  420. helm/common/cache.py +24 -179
  421. helm/common/cache_backend_config.py +47 -0
  422. helm/common/clip_score_request.py +41 -0
  423. helm/common/concurrency.py +32 -0
  424. helm/common/credentials_utils.py +28 -0
  425. helm/common/file_caches/__init__.py +0 -0
  426. helm/common/file_caches/file_cache.py +16 -0
  427. helm/common/file_caches/local_file_cache.py +61 -0
  428. helm/common/file_caches/test_local_file_cache.py +25 -0
  429. helm/common/file_upload_request.py +27 -0
  430. helm/common/general.py +29 -10
  431. helm/common/image_generation_parameters.py +25 -0
  432. helm/common/images_utils.py +24 -1
  433. helm/common/key_value_store.py +113 -0
  434. helm/common/media_object.py +13 -0
  435. helm/common/moderations_api_request.py +71 -0
  436. helm/common/mongo_key_value_store.py +88 -0
  437. helm/common/multimodal_request_utils.py +31 -0
  438. helm/common/nudity_check_request.py +29 -0
  439. helm/common/object_spec.py +2 -2
  440. helm/common/request.py +36 -27
  441. helm/common/test_general.py +6 -0
  442. helm/common/tokenization_request.py +6 -3
  443. helm/config/__init__.py +0 -0
  444. helm/config/model_deployments.yaml +1942 -0
  445. helm/config/model_metadata.yaml +2201 -0
  446. helm/config/tokenizer_configs.yaml +362 -0
  447. helm/proxy/accounts.py +31 -4
  448. helm/proxy/critique/mechanical_turk_critique_importer.py +3 -0
  449. helm/proxy/critique/model_critique_client.py +13 -5
  450. helm/proxy/example_queries.py +29 -17
  451. helm/proxy/retry.py +8 -2
  452. helm/proxy/server.py +77 -5
  453. helm/proxy/services/remote_service.py +31 -0
  454. helm/proxy/services/server_service.py +103 -20
  455. helm/proxy/services/service.py +34 -2
  456. helm/proxy/services/test_remote_service.py +7 -6
  457. helm/proxy/services/test_service.py +27 -18
  458. helm/proxy/test_accounts.py +32 -0
  459. helm/proxy/token_counters/auto_token_counter.py +37 -37
  460. helm/proxy/token_counters/test_auto_token_counter.py +164 -0
  461. helm/proxy/token_counters/token_counter.py +3 -5
  462. helm/py.typed +0 -0
  463. helm/tokenizers/__init__.py +0 -0
  464. helm/{proxy/tokenizers → tokenizers}/ai21_tokenizer.py +3 -3
  465. helm/{proxy/tokenizers → tokenizers}/aleph_alpha_tokenizer.py +3 -1
  466. helm/{proxy/tokenizers → tokenizers}/anthropic_tokenizer.py +17 -11
  467. helm/tokenizers/auto_tokenizer.py +93 -0
  468. helm/{proxy/tokenizers → tokenizers}/caching_tokenizer.py +8 -2
  469. helm/{proxy/tokenizers → tokenizers}/cohere_tokenizer.py +1 -1
  470. helm/{proxy/tokenizers → tokenizers}/http_model_tokenizer.py +3 -3
  471. helm/{proxy/tokenizers → tokenizers}/huggingface_tokenizer.py +56 -60
  472. helm/tokenizers/simple_tokenizer.py +33 -0
  473. helm/tokenizers/test_anthropic_tokenizer.py +82 -0
  474. helm/tokenizers/test_huggingface_tokenizer.py +136 -0
  475. helm/tokenizers/test_simple_tokenizer.py +33 -0
  476. helm/tokenizers/vertexai_tokenizer.py +97 -0
  477. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer.py +5 -3
  478. helm/tokenizers/yalm_tokenizer_data/__init__.py +0 -0
  479. helm/tokenizers/yalm_tokenizer_data/voc_100b.sp +0 -0
  480. helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/yalm_tokenizer.py +1 -1
  481. crfm_helm-0.3.0.dist-info/RECORD +0 -396
  482. helm/benchmark/vlm_run_specs.py +0 -71
  483. helm/benchmark/window_services/anthropic_window_service.py +0 -68
  484. helm/benchmark/window_services/bloom_window_service.py +0 -35
  485. helm/benchmark/window_services/flan_t5_window_service.py +0 -29
  486. helm/benchmark/window_services/gpt2_window_service.py +0 -32
  487. helm/benchmark/window_services/gptj_window_service.py +0 -38
  488. helm/benchmark/window_services/gptneox_window_service.py +0 -41
  489. helm/benchmark/window_services/http_model_window_service.py +0 -28
  490. helm/benchmark/window_services/huggingface_window_service.py +0 -59
  491. helm/benchmark/window_services/lit_gpt_window_service.py +0 -27
  492. helm/benchmark/window_services/llama_window_service.py +0 -28
  493. helm/benchmark/window_services/luminous_window_service.py +0 -67
  494. helm/benchmark/window_services/megatron_window_service.py +0 -10
  495. helm/benchmark/window_services/mt_nlg_window_service.py +0 -27
  496. helm/benchmark/window_services/openai_window_service.py +0 -13
  497. helm/benchmark/window_services/opt_window_service.py +0 -35
  498. helm/benchmark/window_services/palmyra_window_service.py +0 -45
  499. helm/benchmark/window_services/remote_window_service.py +0 -48
  500. helm/benchmark/window_services/santacoder_window_service.py +0 -27
  501. helm/benchmark/window_services/starcoder_window_service.py +0 -27
  502. helm/benchmark/window_services/t0pp_window_service.py +0 -35
  503. helm/benchmark/window_services/t511b_window_service.py +0 -30
  504. helm/benchmark/window_services/test_mt_nlg_window_service.py +0 -48
  505. helm/benchmark/window_services/ul2_window_service.py +0 -30
  506. helm/benchmark/window_services/wider_ai21_window_service.py +0 -24
  507. helm/benchmark/window_services/wider_openai_window_service.py +0 -52
  508. helm/proxy/clients/aleph_alpha_client.py +0 -99
  509. helm/proxy/clients/auto_client.py +0 -461
  510. helm/proxy/clients/goose_ai_client.py +0 -100
  511. helm/proxy/clients/microsoft_client.py +0 -182
  512. helm/proxy/clients/openai_client.py +0 -206
  513. helm/proxy/clients/remote_model_registry.py +0 -28
  514. helm/proxy/clients/simple_client.py +0 -61
  515. helm/proxy/clients/test_anthropic_client.py +0 -63
  516. helm/proxy/clients/test_client.py +0 -31
  517. helm/proxy/clients/test_huggingface_client.py +0 -87
  518. helm/proxy/models.py +0 -963
  519. helm/proxy/test_models.py +0 -27
  520. helm/proxy/token_counters/ai21_token_counter.py +0 -20
  521. helm/proxy/token_counters/cohere_token_counter.py +0 -13
  522. helm/proxy/token_counters/free_token_counter.py +0 -12
  523. helm/proxy/token_counters/gooseai_token_counter.py +0 -24
  524. helm/proxy/token_counters/openai_token_counter.py +0 -22
  525. helm/proxy/token_counters/test_ai21_token_counter.py +0 -86
  526. helm/proxy/token_counters/test_openai_token_counter.py +0 -79
  527. helm/proxy/tokenizers/simple_tokenizer.py +0 -32
  528. helm/proxy/tokenizers/test_huggingface_tokenizer.py +0 -56
  529. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/LICENSE +0 -0
  530. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/entry_points.txt +0 -0
  531. {crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/top_level.txt +0 -0
  532. /helm/{proxy/clients → benchmark/annotation}/__init__.py +0 -0
  533. /helm/{proxy/clients/vision_language → benchmark/annotation/image2structure}/__init__.py +0 -0
  534. /helm/{proxy/tokenizers → benchmark/metrics/image_generation}/__init__.py +0 -0
  535. /helm/{proxy/tokenizers/yalm_tokenizer_data → benchmark/metrics/image_generation/detectors}/__init__.py +0 -0
  536. /helm/{proxy/clients → clients}/ai21_utils.py +0 -0
  537. /helm/{proxy/clients → clients}/cohere_utils.py +0 -0
  538. /helm/{proxy/clients → clients}/lit_gpt_generate.py +0 -0
  539. /helm/{proxy/clients → clients}/toxicity_classifier_client.py +0 -0
  540. /helm/{proxy/tokenizers → tokenizers}/ice_tokenizer.py +0 -0
  541. /helm/{proxy/tokenizers → tokenizers}/lit_gpt_tokenizer.py +0 -0
  542. /helm/{proxy/tokenizers → tokenizers}/test_ice_tokenizer.py +0 -0
  543. /helm/{proxy/tokenizers → tokenizers}/test_yalm_tokenizer.py +0 -0
  544. /helm/{proxy/tokenizers → tokenizers}/tiktoken_tokenizer.py +0 -0
  545. /helm/{proxy/tokenizers → tokenizers}/tokenizer.py +0 -0
  546. /helm/{proxy/tokenizers → tokenizers}/yalm_tokenizer_data/test_yalm_tokenizer.py +0 -0
@@ -0,0 +1,2201 @@
1
+ # This file defines all the models officially supported by the Helm API.
2
+ # The model names here should match the model names in model_deployments.yaml.
3
+
4
+ # If you want to add a new model, you can technically do it here but we recommend
5
+ # you to do it in prod_env/model_metadata.yaml instead.
6
+
7
+ # Follow the template of this file to add a new model. You can copy paste this to get started:
8
+ # # This file contains the metadata for private models
9
+ # models: [] # Leave empty to disable private models
10
+
11
+
12
+ models:
13
+
14
+ - name: simple/model1
15
+ display_name: Simple Model 1
16
+ description: This is a test model.
17
+ creator_organization_name: Helm
18
+ access: open
19
+ release_date: 2023-01-01
20
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
21
+
22
+ # Adobe
23
+ - name: adobe/giga-gan
24
+ display_name: GigaGAN (1B)
25
+ description: GigaGAN is a GAN model that produces high-quality images extremely quickly. The model was trained on text and image pairs from LAION2B-en and COYO-700M. ([paper](https://arxiv.org/abs/2303.05511)).
26
+ creator_organization_name: Adobe
27
+ access: limited
28
+ num_parameters: 1000000000
29
+ release_date: 2023-06-22
30
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
31
+
32
+
33
+ # AI21 Labs
34
+ - name: ai21/j1-jumbo # DEPRECATED
35
+ display_name: J1-Jumbo v1 (178B)
36
+ description: Jurassic-1 Jumbo (178B parameters) ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
37
+ creator_organization_name: AI21 Labs
38
+ access: limited
39
+ num_parameters: 178000000000
40
+ release_date: 2021-08-11
41
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
42
+
43
+ - name: ai21/j1-large # DEPRECATED
44
+ display_name: J1-Large v1 (7.5B)
45
+ description: Jurassic-1 Large (7.5B parameters) ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
46
+ creator_organization_name: AI21 Labs
47
+ access: limited
48
+ num_parameters: 7500000000
49
+ release_date: 2021-08-11
50
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
51
+
52
+ - name: ai21/j1-grande # DEPRECATED
53
+ display_name: J1-Grande v1 (17B)
54
+ description: Jurassic-1 Grande (17B parameters) with a "few tweaks" to the training process ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
55
+ creator_organization_name: AI21 Labs
56
+ access: limited
57
+ num_parameters: 17000000000
58
+ release_date: 2022-05-03
59
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
60
+
61
+ - name: ai21/j1-grande-v2-beta # DEPRECATED
62
+ display_name: J1-Grande v2 beta (17B)
63
+ description: Jurassic-1 Grande v2 beta (17B parameters)
64
+ creator_organization_name: AI21 Labs
65
+ access: limited
66
+ num_parameters: 17000000000
67
+ release_date: 2022-10-28
68
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
69
+
70
+ - name: ai21/j2-jumbo
71
+ display_name: Jurassic-2 Jumbo (178B)
72
+ description: Jurassic-2 Jumbo (178B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
73
+ creator_organization_name: AI21 Labs
74
+ access: limited
75
+ num_parameters: 178000000000
76
+ release_date: 2023-03-09
77
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
78
+
79
+ - name: ai21/j2-large
80
+ display_name: Jurassic-2 Large (7.5B)
81
+ description: Jurassic-2 Large (7.5B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
82
+ creator_organization_name: AI21 Labs
83
+ access: limited
84
+ num_parameters: 7500000000
85
+ release_date: 2023-03-09
86
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
87
+
88
+ - name: ai21/j2-grande
89
+ display_name: Jurassic-2 Grande (17B)
90
+ description: Jurassic-2 Grande (17B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
91
+ creator_organization_name: AI21 Labs
92
+ access: limited
93
+ num_parameters: 17000000000
94
+ release_date: 2023-03-09
95
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
96
+
97
+ # TODO(1524): Change AI21 model names
98
+ # - j2-jumbo -> j2-ultra
99
+ # - j2-grande -> j2-mid
100
+ # - j2-large -> j2-light
101
+
102
+
103
+
104
+ # Aleph Alpha
105
+ # Aleph Alpha's Luminous models: https://docs.aleph-alpha.com/docs/introduction/luminous
106
+ # TODO: add Luminous World when it's released
107
+ - name: AlephAlpha/luminous-base
108
+ display_name: Luminous Base (13B)
109
+ description: Luminous Base (13B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/))
110
+ creator_organization_name: Aleph Alpha
111
+ access: limited
112
+ num_parameters: 13000000000
113
+ # TODO: get exact release date
114
+ release_date: 2022-01-01
115
+ # Does not support echo
116
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
117
+
118
+ - name: AlephAlpha/luminous-extended
119
+ display_name: Luminous Extended (30B)
120
+ description: Luminous Extended (30B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/))
121
+ creator_organization_name: Aleph Alpha
122
+ access: limited
123
+ num_parameters: 30000000000
124
+ release_date: 2022-01-01
125
+ # Does not support echo
126
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
127
+
128
+ - name: AlephAlpha/luminous-supreme
129
+ display_name: Luminous Supreme (70B)
130
+ description: Luminous Supreme (70B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/))
131
+ creator_organization_name: Aleph Alpha
132
+ access: limited
133
+ num_parameters: 70000000000
134
+ release_date: 2022-01-01
135
+ # Does not support echo.
136
+ # Currently, only Luminous-extended and Luminous-base support multimodal inputs
137
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
138
+
139
+ # TODO: Uncomment when luminous-world is released.
140
+ # - name: AlephAlpha/luminous-world # Not released yet.
141
+ # display_name: Luminous World (178B)
142
+ # description: Luminous World (178B parameters) ([docs](https://docs.aleph-alpha.com/docs/introduction/luminous/))
143
+ # creator_organization_name: Aleph Alpha
144
+ # access: limited
145
+ # num_parameters: TBD
146
+ # release_date: TBD
147
+ # # Does not support echo.
148
+ # tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
149
+
150
+ - name: AlephAlpha/m-vader
151
+ display_name: MultiFusion (13B)
152
+ description: MultiFusion is a multimodal, multilingual diffusion model that extend the capabilities of Stable Diffusion v1.4 by integrating different pre-trained modules, which transfers capabilities to the downstream model ([paper](https://arxiv.org/abs/2305.15296))
153
+ creator_organization_name: Aleph Alpha
154
+ access: limited
155
+ num_parameters: 13000000000
156
+ release_date: 2023-05-24
157
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
158
+
159
+
160
+ # Amazon
161
+ # References for Amazon Titan models:
162
+ # - https://aws.amazon.com/bedrock/titan/
163
+ # - https://community.aws/content/2ZUVD3fkNtqEOYIa2iUJAFArS7c/family-of-titan-text-models---cli-demo
164
+ # - https://aws.amazon.com/about-aws/whats-new/2023/11/amazon-titan-models-express-lite-bedrock/
165
+ - name: amazon/titan-text-lite-v1
166
+ display_name: Amazon Titan Text Lite
167
+ description: Amazon Titan Text Lite is a lightweight, efficient model perfect for fine-tuning English-language tasks like summarization and copywriting. It caters to customers seeking a smaller, cost-effective, and highly customizable model. It supports various formats, including text generation, code generation, rich text formatting, and orchestration (agents). Key model attributes encompass fine-tuning, text generation, code generation, and rich text formatting.
168
+ creator_organization_name: Amazon
169
+ access: limited
170
+ release_date: 2023-11-29
171
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
172
+
173
+ - name: amazon/titan-tg1-large
174
+ display_name: Amazon Titan Large
175
+ description: Amazon Titan Large is efficient model perfect for fine-tuning English-language tasks like summarization, create article, marketing campaign.
176
+ creator_organization_name: Amazon
177
+ access: limited
178
+ release_date: 2023-11-29
179
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
180
+
181
+ - name: amazon/titan-text-express-v1
182
+ display_name: Amazon Titan Text Express
183
+ description: Amazon Titan Text Express, with a context length of up to 8,000 tokens, excels in advanced language tasks like open-ended text generation and conversational chat. It's also optimized for Retrieval Augmented Generation (RAG). Initially designed for English, the model offers preview multilingual support for over 100 additional languages.
184
+ creator_organization_name: Amazon
185
+ access: limited
186
+ release_date: 2023-11-29
187
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
188
+
189
+
190
+ # Anthropic
191
+ - name: anthropic/claude-v1.3
192
+ display_name: Anthropic Claude v1.3
193
+ description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
194
+ creator_organization_name: Anthropic
195
+ access: limited
196
+ num_parameters: 52000000000
197
+ release_date: 2023-03-17
198
+ tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
199
+
200
+ - name: anthropic/claude-instant-v1
201
+ display_name: Anthropic Claude Instant V1
202
+ description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)).
203
+ creator_organization_name: Anthropic
204
+ access: limited
205
+ release_date: 2023-03-17
206
+ tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
207
+
208
+ - name: anthropic/claude-instant-1.2
209
+ display_name: Anthropic Claude Instant 1.2
210
+ description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)).
211
+ creator_organization_name: Anthropic
212
+ access: limited
213
+ release_date: 2023-08-09
214
+ tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
215
+
216
+ - name: anthropic/claude-2.0
217
+ display_name: Anthropic Claude 2.0
218
+ description: Claude 2.0 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf))
219
+ creator_organization_name: Anthropic
220
+ access: limited
221
+ release_date: 2023-07-11
222
+ tags: [ANTHROPIC_CLAUDE_2_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
223
+
224
+ - name: anthropic/claude-2.1
225
+ display_name: Anthropic Claude 2.1
226
+ description: Claude 2.1 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf))
227
+ creator_organization_name: Anthropic
228
+ access: limited
229
+ release_date: 2023-11-21
230
+ tags: [ANTHROPIC_CLAUDE_2_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
231
+
232
+ - name: anthropic/claude-3-haiku-20240307
233
+ display_name: Claude 3 Haiku (20240307)
234
+ description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
235
+ creator_organization_name: Anthropic
236
+ access: limited
237
+ release_date: 2024-03-13 # https://www.anthropic.com/news/claude-3-haiku
238
+ tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
239
+
240
+ - name: anthropic/claude-3-sonnet-20240229
241
+ display_name: Claude 3 Sonnet (20240229)
242
+ description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
243
+ creator_organization_name: Anthropic
244
+ access: limited
245
+ release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
246
+ tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
247
+
248
+ - name: anthropic/claude-3-opus-20240229
249
+ display_name: Claude 3 Opus (20240229)
250
+ description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
251
+ creator_organization_name: Anthropic
252
+ access: limited
253
+ release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
254
+ tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
255
+
256
+ # DEPRECATED: Please do not use.
257
+ - name: anthropic/stanford-online-all-v4-s3
258
+ display_name: Anthropic-LM v4-s3 (52B)
259
+ description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
260
+ creator_organization_name: Anthropic
261
+ access: closed
262
+ num_parameters: 52000000000
263
+ release_date: 2021-12-01
264
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
265
+
266
+
267
+
268
+ # Berkeley
269
+ - name: berkeley/koala-13b # NOT SUPPORTED
270
+ display_name: Koala (13B)
271
+ description: Koala (13B) is a chatbot fine-tuned from Llama (13B) on dialogue data gathered from the web. ([blog post](https://bair.berkeley.edu/blog/2023/04/03/koala/))
272
+ creator_organization_name: UC Berkeley
273
+ access: open
274
+ num_parameters: 13000000000
275
+ release_date: 2022-04-03
276
+ tags: [] # TODO: add tags
277
+
278
+
279
+
280
+ # BigScience
281
+ - name: bigscience/bloom
282
+ display_name: BLOOM (176B)
283
+ description: BLOOM (176B parameters) is an autoregressive model trained on 46 natural languages and 13 programming languages ([paper](https://arxiv.org/pdf/2211.05100.pdf)).
284
+ creator_organization_name: BigScience
285
+ access: open
286
+ num_parameters: 176000000000
287
+ release_date: 2022-06-28
288
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
289
+
290
+ - name: bigscience/bloomz # NOT SUPPORTED
291
+ display_name: BLOOMZ (176B)
292
+ description: BLOOMZ (176B parameters) is BLOOM that has been fine-tuned on natural language instructions ([details](https://huggingface.co/bigscience/bloomz)).
293
+ creator_organization_name: BigScience
294
+ access: open
295
+ num_parameters: 176000000000
296
+ release_date: 2022-11-03
297
+ tags: [] # TODO: add tags
298
+
299
+ - name: bigscience/t0pp
300
+ display_name: T0pp (11B)
301
+ description: T0pp (11B parameters) is an encoder-decoder model trained on a large set of different tasks specified in natural language prompts ([paper](https://arxiv.org/pdf/2110.08207.pdf)).
302
+ creator_organization_name: BigScience
303
+ access: open
304
+ num_parameters: 11000000000
305
+ release_date: 2021-10-15
306
+ # Does not support echo.
307
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
308
+
309
+
310
+
311
+ # BigCode
312
+ - name: bigcode/santacoder
313
+ display_name: SantaCoder (1.1B)
314
+ description: SantaCoder (1.1B parameters) model trained on the Python, Java, and JavaScript subset of The Stack (v1.1) ([model card](https://huggingface.co/bigcode/santacoder)).
315
+ creator_organization_name: BigCode
316
+ access: open
317
+ num_parameters: 1100000000
318
+ release_date: 2023-01-09 # ArXiv submission date
319
+ tags: [CODE_MODEL_TAG]
320
+
321
+ - name: bigcode/starcoder
322
+ display_name: StarCoder (15.5B)
323
+ description: The StarCoder (15.5B parameter) model trained on 80+ programming languages from The Stack (v1.2) ([model card](https://huggingface.co/bigcode/starcoder)).
324
+ creator_organization_name: BigCode
325
+ access: open
326
+ num_parameters: 15500000000
327
+ release_date: 2023-05-09 # ArXiv submission date
328
+ tags: [CODE_MODEL_TAG]
329
+
330
+
331
+
332
+ # Cerebras Systems
333
+ - name: cerebras/cerebras-gpt-6.7b # NOT SUPPORTED
334
+ display_name: Cerebras GPT (6.7B)
335
+ description: Cerebras GPT is a family of open compute-optimal language models scaled from 111M to 13B parameters trained on the Eleuther Pile. ([paper](https://arxiv.org/pdf/2304.03208.pdf))
336
+ creator_organization_name: Cerebras
337
+ access: limited
338
+ num_parameters: 6700000000
339
+ release_date: 2023-04-06
340
+ tags: [] # TODO: add tags
341
+
342
+ - name: cerebras/cerebras-gpt-13b # NOT SUPPORTED
343
+ display_name: Cerebras GPT (13B)
344
+ description: Cerebras GPT is a family of open compute-optimal language models scaled from 111M to 13B parameters trained on the Eleuther Pile. ([paper](https://arxiv.org/pdf/2304.03208.pdf))
345
+ creator_organization_name: Cerebras
346
+ access: limited
347
+ num_parameters: 13000000000
348
+ release_date: 2023-04-06
349
+ tags: [] # TODO: add tags
350
+
351
+
352
+
353
+ # Cohere
354
+ # Model versioning and the possible versions are not documented here:
355
+ # https://docs.cohere.ai/generate-reference#model-optional.
356
+ # So, instead, we got the names of the models from the Cohere Playground.
357
+ #
358
+ # Note that their tokenizer and model were trained on English text and
359
+ # they do not have a dedicated decode API endpoint, so the adaptation
360
+ # step for language modeling fails for certain Scenarios:
361
+ # the_pile:subset=ArXiv
362
+ # the_pile:subset=Github
363
+ # the_pile:subset=PubMed Central
364
+
365
+ # TODO: Consider renaming to new model names.
366
+ - name: cohere/xlarge-20220609
367
+ display_name: Cohere xlarge v20220609 (52.4B)
368
+ description: Cohere xlarge v20220609 (52.4B parameters)
369
+ creator_organization_name: Cohere
370
+ access: limited
371
+ num_parameters: 52400000000
372
+ release_date: 2022-06-09
373
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
374
+
375
+ - name: cohere/large-20220720 # DEPRECATED
376
+ display_name: Cohere large v20220720 (13.1B)
377
+ description: Cohere large v20220720 (13.1B parameters), which is deprecated by Cohere as of December 2, 2022.
378
+ creator_organization_name: Cohere
379
+ access: limited
380
+ num_parameters: 13100000000
381
+ release_date: 2022-07-20
382
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
383
+
384
+ - name: cohere/medium-20220720
385
+ display_name: Cohere medium v20220720 (6.1B)
386
+ description: Cohere medium v20220720 (6.1B parameters)
387
+ creator_organization_name: Cohere
388
+ access: limited
389
+ num_parameters: 6100000000
390
+ release_date: 2022-07-20
391
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
392
+
393
+ - name: cohere/small-20220720 # DEPRECATED
394
+ display_name: Cohere small v20220720 (410M)
395
+ description: Cohere small v20220720 (410M parameters), which is deprecated by Cohere as of December 2, 2022.
396
+ creator_organization_name: Cohere
397
+ access: limited
398
+ num_parameters: 410000000
399
+ release_date: 2022-07-20
400
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
401
+
402
+ - name: cohere/xlarge-20221108
403
+ display_name: Cohere xlarge v20221108 (52.4B)
404
+ description: Cohere xlarge v20221108 (52.4B parameters)
405
+ creator_organization_name: Cohere
406
+ access: limited
407
+ num_parameters: 52400000000
408
+ release_date: 2022-11-08
409
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
410
+
411
+ - name: cohere/medium-20221108 # DEPRECATED
412
+ display_name: Cohere medium v20221108 (6.1B)
413
+ description: Cohere medium v20221108 (6.1B parameters)
414
+ creator_organization_name: Cohere
415
+ access: limited
416
+ num_parameters: 6100000000
417
+ release_date: 2022-11-08
418
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
419
+
420
+ - name: cohere/command-medium-beta # DEPRECATED
421
+ display_name: Cohere Command beta (6.1B)
422
+ description: Cohere Command beta (6.1B parameters) is fine-tuned from the medium model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
423
+ creator_organization_name: Cohere
424
+ access: limited
425
+ num_parameters: 6100000000
426
+ release_date: 2022-11-08
427
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
428
+
429
+ - name: cohere/command-xlarge-beta # DEPRECATED
430
+ display_name: Cohere Command beta (52.4B)
431
+ description: Cohere Command beta (52.4B parameters) is fine-tuned from the XL model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
432
+ creator_organization_name: Cohere
433
+ access: limited
434
+ num_parameters: 52400000000
435
+ release_date: 2022-11-08
436
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
437
+
438
+ - name: cohere/command
439
+ display_name: Cohere Command
440
+ description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog)
441
+ creator_organization_name: Cohere
442
+ access: limited
443
+ release_date: 2023-09-29
444
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
445
+
446
+ - name: cohere/command-light
447
+ display_name: Cohere Command Light
448
+ description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog)
449
+ creator_organization_name: Cohere
450
+ access: limited
451
+ release_date: 2023-09-29
452
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
453
+
454
+ # Craiyon
455
+ - name: craiyon/dalle-mini
456
+ display_name: DALL-E mini (0.4B)
457
+ description: DALL-E mini is an open-source text-to-image model that attempt to reproduce OpenAI's DALL-E 1 ([code](https://github.com/borisdayma/dalle-mini)).
458
+ creator_organization_name: Craiyon
459
+ access: open
460
+ num_parameters: 400000000
461
+ release_date: 2022-04-21
462
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
463
+
464
+ - name: craiyon/dalle-mega
465
+ display_name: DALL-E mega (2.6B)
466
+ description: DALL-E mega is an open-source text-to-image model that attempt to reproduce OpenAI's DALL-E 1 ([code](https://github.com/borisdayma/dalle-mini)).
467
+ creator_organization_name: Craiyon
468
+ access: open
469
+ num_parameters: 2600000000
470
+ release_date: 2022-04-21
471
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
472
+
473
+ # DeepFloyd
474
+ - name: DeepFloyd/IF-I-M-v1.0
475
+ display_name: DeepFloyd IF Medium (0.4B)
476
+ description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
477
+ creator_organization_name: DeepFloyd
478
+ access: open
479
+ num_parameters: 400000000
480
+ release_date: 2023-04-28
481
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
482
+
483
+ - name: DeepFloyd/IF-I-L-v1.0
484
+ display_name: DeepFloyd IF Large (0.9B)
485
+ description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
486
+ creator_organization_name: DeepFloyd
487
+ access: open
488
+ num_parameters: 900000000
489
+ release_date: 2023-04-28
490
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
491
+
492
+ - name: DeepFloyd/IF-I-XL-v1.0
493
+ display_name: DeepFloyd IF X-Large (4.3B)
494
+ description: DeepFloyd-IF is a pixel-based text-to-image triple-cascaded diffusion model with state-of-the-art photorealism and language understanding (paper coming soon).
495
+ creator_organization_name: DeepFloyd
496
+ access: open
497
+ num_parameters: 4300000000
498
+ release_date: 2023-04-28
499
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
500
+
501
+
502
+ # Databricks
503
+ - name: databricks/dolly-v2-3b
504
+ display_name: Dolly V2 (3B)
505
+ description: Dolly V2 (3B) is an instruction-following large language model trained on the Databricks machine learning platform. It is based on pythia-12b.
506
+ creator_organization_name: Databricks
507
+ access: open
508
+ num_parameters: 2517652480
509
+ release_date: 2023-04-12
510
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
511
+
512
+ - name: databricks/dolly-v2-7b
513
+ display_name: Dolly V2 (7B)
514
+ description: Dolly V2 (7B) is an instruction-following large language model trained on the Databricks machine learning platform. It is based on pythia-12b.
515
+ creator_organization_name: Databricks
516
+ access: open
517
+ num_parameters: 6444163072
518
+ release_date: 2023-04-12
519
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
520
+
521
+ - name: databricks/dolly-v2-12b
522
+ display_name: Dolly V2 (12B)
523
+ description: Dolly V2 (12B) is an instruction-following large language model trained on the Databricks machine learning platform. It is based on pythia-12b.
524
+ creator_organization_name: Databricks
525
+ access: open
526
+ num_parameters: 11327027200
527
+ release_date: 2023-04-12
528
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
529
+
530
+ - name: databricks/dbrx-instruct
531
+ display_name: DBRX Instruct
532
+ description: DBRX is a large language model with a fine-grained mixture-of-experts (MoE) architecture that uses 16 experts and chooses 4. It has 132B total parameters, of which 36B parameters are active on any input. ([blog post](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm))
533
+ creator_organization_name: Databricks
534
+ access: open
535
+ num_parameters: 132000000000
536
+ release_date: 2024-03-27
537
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
538
+
539
+
540
+ # DeepMind
541
+ - name: deepmind/gopher # NOT SUPPORTED
542
+ display_name: Gopher (280B)
543
+ description: Gopher (280B parameters) ([paper](https://arxiv.org/pdf/2112.11446.pdf)).
544
+ creator_organization_name: DeepMind
545
+ access: closed
546
+ num_parameters: 280000000000
547
+ release_date: 2021-12-08
548
+ tags: [] # TODO: add tags
549
+
550
+ - name: deepmind/chinchilla # NOT SUPPORTED
551
+ display_name: Chinchilla (70B)
552
+ description: Chinchilla (70B parameters) ([paper](https://arxiv.org/pdf/2203.15556.pdf)).
553
+ creator_organization_name: DeepMind
554
+ access: closed
555
+ num_parameters: 70000000000
556
+ release_date: 2022-03-31
557
+ tags: [] # TODO: add tags
558
+
559
+
560
+ # Deepseek
561
+ - name: deepseek-ai/deepseek-llm-67b-chat
562
+ display_name: DeepSeek Chat (67B)
563
+ description: DeepSeek Chat is a open-source language model trained on 2 trillion tokens in both English and Chinese, and fine-tuned supervised fine-tuning (SFT) and Direct Preference Optimization (DPO). ([paper](https://arxiv.org/abs/2401.02954))
564
+ creator_organization_name: DeepSeek
565
+ access: open
566
+ num_parameters: 67000000000
567
+ release_date: 2024-01-05
568
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
569
+
570
+
571
+ # EleutherAI
572
+ - name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together.
573
+ display_name: GPT-J (6B)
574
+ description: GPT-J (6B parameters) autoregressive language model trained on The Pile ([details](https://arankomatsuzaki.wordpress.com/2021/06/04/gpt-j/)).
575
+ creator_organization_name: EleutherAI
576
+ access: open
577
+ num_parameters: 6000000000
578
+ release_date: 2021-06-04
579
+ # TODO: The BUGGY_TEMP_0_TAG is a deployment related tag (Together).
580
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, BUGGY_TEMP_0_TAG]
581
+
582
+ - name: eleutherai/gpt-neox-20b # Served by GooseAi and Together.
583
+ display_name: GPT-NeoX (20B)
584
+ description: GPT-NeoX (20B parameters) autoregressive language model trained on The Pile ([paper](https://arxiv.org/pdf/2204.06745.pdf)).
585
+ creator_organization_name: EleutherAI
586
+ access: open
587
+ num_parameters: 20000000000
588
+ release_date: 2022-02-02
589
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
590
+
591
+ - name: eleutherai/pythia-1b-v0
592
+ display_name: Pythia (1B)
593
+ description: Pythia (1B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers.
594
+ creator_organization_name: EleutherAI
595
+ access: open
596
+ num_parameters: 805736448
597
+ release_date: 2023-02-13
598
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
599
+
600
+ - name: eleutherai/pythia-2.8b-v0
601
+ display_name: Pythia (2.8B)
602
+ description: Pythia (2.8B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers.
603
+ creator_organization_name: EleutherAI
604
+ access: open
605
+ num_parameters: 2517652480
606
+ release_date: 2023-02-13
607
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
608
+
609
+ - name: eleutherai/pythia-6.9b
610
+ display_name: Pythia (6.9B)
611
+ description: Pythia (6.9B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers.
612
+ creator_organization_name: EleutherAI
613
+ access: open
614
+ num_parameters: 6444163072
615
+ release_date: 2023-02-13
616
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
617
+
618
+ - name: eleutherai/pythia-12b-v0
619
+ display_name: Pythia (12B)
620
+ description: Pythia (12B parameters). The Pythia project combines interpretability analysis and scaling laws to understand how knowledge develops and evolves during training in autoregressive transformers.
621
+ creator_organization_name: EleutherAI
622
+ access: open
623
+ num_parameters: 11327027200
624
+ release_date: 2023-02-13
625
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
626
+
627
+
628
+
629
+ # Google
630
+ - name: google/t5-11b
631
+ display_name: T5 (11B)
632
+ description: T5 (11B parameters) is an encoder-decoder model trained on a multi-task mixture, where each task is converted into a text-to-text format ([paper](https://arxiv.org/pdf/1910.10683.pdf)).
633
+ creator_organization_name: Google
634
+ access: open
635
+ num_parameters: 11000000000
636
+ release_date: 2019-10-23
637
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
638
+
639
+ - name: google/ul2
640
+ display_name: UL2 (20B)
641
+ description: UL2 (20B parameters) is an encoder-decoder model trained on the C4 corpus. It's similar to T5 but trained with a different objective and slightly different scaling knobs ([paper](https://arxiv.org/pdf/2205.05131.pdf)).
642
+ creator_organization_name: Google
643
+ access: open
644
+ num_parameters: 20000000000
645
+ release_date: 2022-05-10
646
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG, NLG_PREFIX_TAG]
647
+
648
+ - name: google/flan-t5-xxl
649
+ display_name: Flan-T5 (11B)
650
+ description: Flan-T5 (11B parameters) is T5 fine-tuned on 1.8K tasks ([paper](https://arxiv.org/pdf/2210.11416.pdf)).
651
+ creator_organization_name: Google
652
+ access: open
653
+ num_parameters: 11000000000
654
+ release_date: 2022-12-06 # Paper date
655
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
656
+
657
+ - name: google/palm # NOT SUPPORTED
658
+ display_name: PaLM (540B)
659
+ description: Pathways Language Model (540B parameters) is trained using 6144 TPU v4 chips ([paper](https://arxiv.org/pdf/2204.02311.pdf)).
660
+ creator_organization_name: Google
661
+ access: closed
662
+ num_parameters: 540000000000
663
+ release_date: 2023-03-01 # was first announced on 2022-04 but remained private.
664
+ tags: [] # TODO: add tags
665
+
666
+ # Note: This is aliased to a snapshot of gemini-pro. When possible, please use a versioned snapshot instead.
667
+ - name: google/gemini-pro
668
+ display_name: Gemini Pro
669
+ description: Gemini Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
670
+ creator_organization_name: Google
671
+ access: limited
672
+ release_date: 2023-12-13
673
+ tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
674
+
675
+ - name: google/gemini-1.0-pro-001
676
+ display_name: Gemini 1.0 Pro
677
+ description: Gemini 1.0 Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
678
+ creator_organization_name: Google
679
+ access: limited
680
+ release_date: 2023-12-13
681
+ tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
682
+
683
+ # Note: This is aliased to a snapshot of gemini-pro-vision. When possible, please use a versioned snapshot instead.
684
+ - name: google/gemini-pro-vision
685
+ display_name: Gemini Pro Vision
686
+ description: Gemini Pro Vision is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
687
+ creator_organization_name: Google
688
+ access: limited
689
+ release_date: 2023-12-13
690
+ tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG]
691
+
692
+ - name: google/gemini-1.0-pro-vision-001
693
+ display_name: Gemini 1.0 Pro Vision
694
+ description: Gemini 1.0 Pro Vision is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
695
+ creator_organization_name: Google
696
+ access: limited
697
+ release_date: 2023-12-13
698
+ tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
699
+
700
+ - name: google/gemini-1.5-pro-preview-0409
701
+ display_name: Gemini 1.5 Pro
702
+ description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. ([paper](https://arxiv.org/abs/2403.05530))
703
+ creator_organization_name: Google
704
+ access: limited
705
+ release_date: 2024-04-10
706
+ tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
707
+
708
+ - name: google/gemma-2b
709
+ display_name: Gemma (2B)
710
+ # TODO: Fill in Gemma description.
711
+ description: TBD
712
+ creator_organization_name: Google
713
+ access: open
714
+ release_date: 2024-02-21
715
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
716
+
717
+ - name: google/gemma-2b-it
718
+ display_name: Gemma Instruct (2B)
719
+ # TODO: Fill in Gemma description.
720
+ description: TBD
721
+ creator_organization_name: Google
722
+ access: open
723
+ release_date: 2024-02-21
724
+ tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
725
+
726
+ - name: google/gemma-7b
727
+ display_name: Gemma (7B)
728
+ # TODO: Fill in Gemma description.
729
+ description: TBD
730
+ creator_organization_name: Google
731
+ access: open
732
+ release_date: 2024-02-21
733
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
734
+
735
+ - name: google/gemma-7b-it
736
+ display_name: Gemma Instruct (7B)
737
+ # TODO: Fill in Gemma description.
738
+ description: TBD
739
+ creator_organization_name: Google
740
+ access: open
741
+ release_date: 2024-02-21
742
+ # TODO: Add OUTPUT_FORMAT_INSTRUCTIONS_TAG tag
743
+ tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
744
+
745
+ - name: google/text-bison@001
746
+ display_name: PaLM-2 (Bison)
747
+ description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
748
+ creator_organization_name: Google
749
+ access: limited
750
+ release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
751
+ tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
752
+
753
+ - name: google/text-bison@002
754
+ display_name: PaLM-2 (Bison)
755
+ description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
756
+ creator_organization_name: Google
757
+ access: limited
758
+ release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
759
+ tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
760
+
761
+ - name: google/text-bison-32k
762
+ display_name: PaLM-2 (Bison)
763
+ description: The best value PaLM model with a 32K context. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
764
+ creator_organization_name: Google
765
+ access: limited
766
+ release_date: 2023-06-07 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
767
+ tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
768
+
769
+ - name: google/text-unicorn@001
770
+ display_name: PaLM-2 (Unicorn)
771
+ description: The largest model in PaLM family. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
772
+ creator_organization_name: Google
773
+ access: limited
774
+ release_date: 2023-11-30 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/text#model_versions
775
+ tags: [TEXT_MODEL_TAG, GOOGLE_PALM_2_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
776
+
777
+ - name: google/code-bison@001
778
+ display_name: Codey PaLM-2 (Bison)
779
+ description: A model fine-tuned to generate code based on a natural language description of the desired code. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
780
+ creator_organization_name: Google
781
+ access: limited
782
+ release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
783
+ tags: [CODE_MODEL_TAG]
784
+
785
+ - name: google/code-bison@002
786
+ display_name: Codey PaLM-2 (Bison)
787
+ description: A model fine-tuned to generate code based on a natural language description of the desired code. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
788
+ creator_organization_name: Google
789
+ access: limited
790
+ release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
791
+ tags: [CODE_MODEL_TAG]
792
+
793
+ - name: google/code-bison-32k
794
+ display_name: Codey PaLM-2 (Bison)
795
+ description: Codey with a 32K context. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
796
+ creator_organization_name: Google
797
+ access: limited
798
+ release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
799
+ tags: [CODE_MODEL_TAG]
800
+
801
+
802
+
803
+ # HuggingFace
804
+ - name: HuggingFaceM4/idefics-9b
805
+ display_name: IDEFICS (9B)
806
+ description: IDEFICS (9B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics))
807
+ creator_organization_name: HuggingFace
808
+ access: open
809
+ num_parameters: 9000000000
810
+ release_date: 2023-08-22
811
+ tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
812
+
813
+ - name: HuggingFaceM4/idefics-9b-instruct
814
+ display_name: IDEFICS instruct (9B)
815
+ description: IDEFICS instruct (9B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics))
816
+ creator_organization_name: HuggingFace
817
+ access: open
818
+ num_parameters: 9000000000
819
+ release_date: 2023-08-22
820
+ tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, IDEFICS_INSTRUCT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
821
+
822
+ - name: HuggingFaceM4/idefics-80b
823
+ display_name: IDEFICS (80B)
824
+ description: IDEFICS (80B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics))
825
+ creator_organization_name: HuggingFace
826
+ access: open
827
+ num_parameters: 80000000000
828
+ release_date: 2023-08-22
829
+ tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
830
+
831
+ - name: HuggingFaceM4/idefics-80b-instruct
832
+ display_name: IDEFICS instruct (80B)
833
+ description: IDEFICS instruct (80B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics))
834
+ creator_organization_name: HuggingFace
835
+ access: open
836
+ num_parameters: 80000000000
837
+ release_date: 2023-08-22
838
+ tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, IDEFICS_INSTRUCT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
839
+
840
+ ## Text-to-Image Diffusion Models
841
+ - name: huggingface/dreamlike-diffusion-v1-0
842
+ display_name: Dreamlike Diffusion v1.0 (1B)
843
+ description: Dreamlike Diffusion v1.0 is Stable Diffusion v1.5 fine tuned on high quality art ([HuggingFace model card](https://huggingface.co/dreamlike-art/dreamlike-diffusion-1.0))
844
+ creator_organization_name: dreamlike.art
845
+ access: open
846
+ num_parameters: 1000000000
847
+ release_date: 2023-03-08
848
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
849
+
850
+ - name: huggingface/dreamlike-photoreal-v2-0
851
+ display_name: Dreamlike Photoreal v2.0 (1B)
852
+ description: Dreamlike Photoreal v2.0 is a photorealistic model based on Stable Diffusion v1.5 ([HuggingFace model card](https://huggingface.co/dreamlike-art/dreamlike-photoreal-2.0))
853
+ creator_organization_name: dreamlike.art
854
+ access: open
855
+ num_parameters: 1000000000
856
+ release_date: 2022-11-23
857
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
858
+
859
+ - name: huggingface/openjourney-v1-0
860
+ display_name: Openjourney (1B)
861
+ description: Openjourney is an open source Stable Diffusion fine tuned model on Midjourney images ([HuggingFace model card](https://huggingface.co/prompthero/openjourney))
862
+ creator_organization_name: PromptHero
863
+ access: open
864
+ num_parameters: 1000000000
865
+ release_date: 2022-11-01 # TODO: get the exact date
866
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
867
+
868
+ - name: huggingface/openjourney-v2-0
869
+ display_name: Openjourney v2 (1B)
870
+ description: Openjourney v2 is an open source Stable Diffusion fine tuned model on Midjourney images. Openjourney v2 is now referred to as Openjourney v4 in Hugging Face ([HuggingFace model card](https://huggingface.co/prompthero/openjourney-v4)).
871
+ creator_organization_name: PromptHero
872
+ access: open
873
+ num_parameters: 1000000000
874
+ release_date: 2023-01-01 # TODO: get the exact date
875
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
876
+
877
+ - name: huggingface/promptist-stable-diffusion-v1-4
878
+ display_name: Promptist + Stable Diffusion v1.4 (1B)
879
+ description: Trained with human preferences, Promptist optimizes user input into model-preferred prompts for Stable Diffusion v1.4 ([paper](https://arxiv.org/abs/2212.09611))
880
+ creator_organization_name: Microsoft
881
+ access: open
882
+ num_parameters: 1000000000
883
+ release_date: 2022-12-19
884
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
885
+
886
+ - name: huggingface/redshift-diffusion
887
+ display_name: Redshift Diffusion (1B)
888
+ description: Redshift Diffusion is an open source Stable Diffusion model fine tuned on high resolution 3D artworks ([HuggingFace model card](https://huggingface.co/nitrosocke/redshift-diffusion))
889
+ creator_organization_name: nitrosocke
890
+ access: open
891
+ num_parameters: 1000000000
892
+ release_date: 2022-11-29
893
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
894
+
895
+ - name: huggingface/stable-diffusion-safe-weak
896
+ display_name: Safe Stable Diffusion weak (1B)
897
+ description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105)).
898
+ creator_organization_name: TU Darmstadt
899
+ access: open
900
+ num_parameters: 1000000000
901
+ release_date: 2022-11-09
902
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
903
+
904
+ - name: huggingface/stable-diffusion-safe-medium
905
+ display_name: Safe Stable Diffusion medium (1B)
906
+ description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
907
+ creator_organization_name: TU Darmstadt
908
+ access: open
909
+ num_parameters: 1000000000
910
+ release_date: 2022-11-09
911
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
912
+
913
+ - name: huggingface/stable-diffusion-safe-strong
914
+ display_name: Safe Stable Diffusion strong (1B)
915
+ description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
916
+ creator_organization_name: TU Darmstadt
917
+ access: open
918
+ num_parameters: 1000000000
919
+ release_date: 2022-11-09
920
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
921
+
922
+ - name: huggingface/stable-diffusion-safe-max
923
+ display_name: Safe Stable Diffusion max (1B)
924
+ description: Safe Stable Diffusion is an extension to the Stable Diffusion that drastically reduces inappropriate content ([paper](https://arxiv.org/abs/2211.05105))
925
+ creator_organization_name: TU Darmstadt
926
+ access: open
927
+ num_parameters: 1000000000
928
+ release_date: 2022-11-09
929
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
930
+
931
+ - name: huggingface/stable-diffusion-v1-4
932
+ display_name: Stable Diffusion v1.4 (1B)
933
+ description: Stable Diffusion v1.4 is a latent text-to-image diffusion model capable of generating photorealistic images given any text input ([paper](https://arxiv.org/abs/2112.10752))
934
+ creator_organization_name: Ludwig Maximilian University of Munich CompVis
935
+ access: open
936
+ num_parameters: 1000000000
937
+ release_date: 2022-08-01
938
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
939
+
940
+ - name: huggingface/stable-diffusion-v1-5
941
+ display_name: Stable Diffusion v1.5 (1B)
942
+ description: The Stable-Diffusion-v1-5 checkpoint was initialized with the weights of the Stable-Diffusion-v1-2 checkpoint and subsequently fine-tuned on 595k steps at resolution 512x512 on laion-aesthetics v2 5+ and 10% dropping of the text-conditioning to improve classifier-free guidance sampling ([paper](https://arxiv.org/abs/2112.10752))
943
+ creator_organization_name: Runway
944
+ access: open
945
+ num_parameters: 1000000000
946
+ release_date: 2022-10-20
947
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
948
+
949
+ - name: huggingface/stable-diffusion-v2-base
950
+ display_name: Stable Diffusion v2 base (1B)
951
+ description: The model is trained from scratch 550k steps at resolution 256x256 on a subset of LAION-5B filtered for explicit pornographic material, using the LAION-NSFW classifier with punsafe=0.1 and an aesthetic score greater than 4.5. Then it is further trained for 850k steps at resolution 512x512 on the same dataset on images with resolution greater than 512x512 ([paper](https://arxiv.org/abs/2112.10752))
952
+ creator_organization_name: Stability AI
953
+ access: open
954
+ num_parameters: 1000000000
955
+ release_date: 2022-11-23
956
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
957
+
958
+ - name: huggingface/stable-diffusion-v2-1-base
959
+ display_name: Stable Diffusion v2.1 base (1B)
960
+ description: This stable-diffusion-2-1-base model fine-tunes stable-diffusion-2-base with 220k extra steps taken, with punsafe=0.98 on the same dataset ([paper](https://arxiv.org/abs/2112.10752))
961
+ creator_organization_name: Stability AI
962
+ access: open
963
+ num_parameters: 1000000000
964
+ release_date: 2022-11-23
965
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
966
+
967
+ - name: huggingface/vintedois-diffusion-v0-1
968
+ display_name: Vintedois (22h) Diffusion model v0.1 (1B)
969
+ description: Vintedois (22h) Diffusion model v0.1 is Stable Diffusion v1.5 that was finetuned on a large amount of high quality images with simple prompts to generate beautiful images without a lot of prompt engineering ([HuggingFace model card](https://huggingface.co/22h/vintedois-diffusion-v0-1))
970
+ creator_organization_name: 22 Hours
971
+ access: open
972
+ num_parameters: 1000000000
973
+ release_date: 2022-12-27
974
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
975
+
976
+ - name: segmind/Segmind-Vega
977
+ display_name: Segmind Stable Diffusion (0.74B)
978
+ description: The Segmind-Vega Model is a distilled version of the Stable Diffusion XL (SDXL), offering a remarkable 70% reduction in size and an impressive 100% speedup while retaining high-quality text-to-image generation capabilities. Trained on diverse datasets, including Grit and Midjourney scrape data, it excels at creating a wide range of visual content based on textual prompts. ([HuggingFace model card](https://huggingface.co/segmind/Segmind-Vega))
979
+ creator_organization_name: Segmind
980
+ access: open
981
+ num_parameters: 740000000
982
+ release_date: 2023-12-01
983
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
984
+
985
+ - name: segmind/SSD-1B
986
+ display_name: Segmind Stable Diffusion (1B)
987
+ description: The Segmind Stable Diffusion Model (SSD-1B) is a distilled 50% smaller version of the Stable Diffusion XL (SDXL), offering a 60% speedup while maintaining high-quality text-to-image generation capabilities. It has been trained on diverse datasets, including Grit and Midjourney scrape data, to enhance its ability to create a wide range of visual content based on textual prompts. ([HuggingFace model card](https://huggingface.co/segmind/SSD-1B))
988
+ creator_organization_name: Segmind
989
+ access: open
990
+ num_parameters: 1000000000
991
+ release_date: 2023-10-20
992
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
993
+
994
+ - name: stabilityai/stable-diffusion-xl-base-1.0
995
+ display_name: Stable Diffusion XL
996
+ description: Stable Diffusion XL (SDXL) consists of an ensemble of experts pipeline for latent diffusion. ([HuggingFace model card](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0))
997
+ creator_organization_name: Stability AI
998
+ access: open
999
+ num_parameters: 6600000000
1000
+ release_date: 2023-07-26
1001
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
1002
+
1003
+ # Kakao
1004
+ - name: kakaobrain/mindall-e
1005
+ display_name: minDALL-E (1.3B)
1006
+ description: minDALL-E, named after minGPT, is an autoregressive text-to-image generation model trained on 14 million image-text pairs ([code](https://github.com/kakaobrain/minDALL-E))
1007
+ creator_organization_name: Kakao
1008
+ access: open
1009
+ num_parameters: 1300000000
1010
+ release_date: 2021-12-13
1011
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
1012
+
1013
+ # Lexica
1014
+ - name: lexica/search-stable-diffusion-1.5
1015
+ display_name: Lexica Search with Stable Diffusion v1.5 (1B)
1016
+ description: Retrieves Stable Diffusion v1.5 images Lexica users generated ([docs](https://lexica.art/docs)).
1017
+ creator_organization_name: Lexica
1018
+ access: open
1019
+ release_date: 2023-01-01
1020
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
1021
+
1022
+
1023
+ # Lightning AI
1024
+ - name: lightningai/lit-gpt
1025
+ display_name: Lit-GPT
1026
+ description: Lit-GPT is an optimized collection of open-source LLMs for finetuning and inference. It supports – Falcon, Llama 2, Vicuna, LongChat, and other top-performing open-source large language models.
1027
+ creator_organization_name: Lightning AI
1028
+ access: open
1029
+ release_date: 2023-04-04
1030
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1031
+
1032
+
1033
+
1034
+ # LMSYS
1035
+ - name: lmsys/vicuna-7b-v1.3
1036
+ display_name: Vicuna v1.3 (7B)
1037
+ description: Vicuna v1.3 (7B) is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT.
1038
+ creator_organization_name: LMSYS
1039
+ access: open
1040
+ num_parameters: 7000000000
1041
+ release_date: 2023-06-22
1042
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1043
+
1044
+ - name: lmsys/vicuna-13b-v1.3
1045
+ display_name: Vicuna v1.3 (13B)
1046
+ description: Vicuna v1.3 (13B) is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT.
1047
+ creator_organization_name: LMSYS
1048
+ access: open
1049
+ num_parameters: 13000000000
1050
+ release_date: 2023-06-22
1051
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1052
+
1053
+
1054
+
1055
+ # Meta
1056
+ - name: meta/opt-iml-175b # NOT SUPPORTED
1057
+ display_name: OPT-IML (175B)
1058
+ description: OPT-IML (175B parameters) is a suite of decoder-only transformer LMs that are multi-task fine-tuned on 2000 datasets ([paper](https://arxiv.org/pdf/2212.12017.pdf)).
1059
+ creator_organization_name: Meta
1060
+ access: open
1061
+ num_parameters: 175000000000
1062
+ release_date: 2022-12-22
1063
+ tags: [] # TODO: add tags
1064
+
1065
+ - name: meta/opt-iml-30b # NOT SUPPORTED
1066
+ display_name: OPT-IML (30B)
1067
+ description: OPT-IML (30B parameters) is a suite of decoder-only transformer LMs that are multi-task fine-tuned on 2000 datasets ([paper](https://arxiv.org/pdf/2212.12017.pdf)).
1068
+ creator_organization_name: Meta
1069
+ access: open
1070
+ num_parameters: 30000000000
1071
+ release_date: 2022-12-22
1072
+ tags: [] # TODO: add tags
1073
+
1074
+ - name: meta/opt-175b
1075
+ display_name: OPT (175B)
1076
+ description: Open Pre-trained Transformers (175B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
1077
+ creator_organization_name: Meta
1078
+ access: open
1079
+ num_parameters: 175000000000
1080
+ release_date: 2022-05-02
1081
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
1082
+
1083
+ - name: meta/opt-66b
1084
+ display_name: OPT (66B)
1085
+ description: Open Pre-trained Transformers (66B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
1086
+ creator_organization_name: Meta
1087
+ access: open
1088
+ num_parameters: 66000000000
1089
+ release_date: 2022-05-02
1090
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
1091
+
1092
+ - name: meta/opt-6.7b
1093
+ display_name: OPT (6.7B)
1094
+ description: Open Pre-trained Transformers (6.7B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
1095
+ creator_organization_name: Meta
1096
+ access: open
1097
+ num_parameters: 6700000000
1098
+ release_date: 2022-05-02
1099
+ # TODO: The BUGGY_TEMP_0_TAG is a deployment related tag (Together).
1100
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, BUGGY_TEMP_0_TAG]
1101
+
1102
+ - name: meta/opt-1.3b
1103
+ display_name: OPT (1.3B)
1104
+ description: Open Pre-trained Transformers (1.3B parameters) is a suite of decoder-only pre-trained transformers that are fully and responsibly shared with interested researchers ([paper](https://arxiv.org/pdf/2205.01068.pdf)).
1105
+ creator_organization_name: Meta
1106
+ access: open
1107
+ num_parameters: 1300000000
1108
+ release_date: 2022-05-02
1109
+ # TODO: The BUGGY_TEMP_0_TAG is a deployment related tag (Together).
1110
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, BUGGY_TEMP_0_TAG]
1111
+
1112
+ - name: meta/galactica-120b # NOT SUPPORTED
1113
+ display_name: Galactica (120B)
1114
+ description: Galactica (120B parameters) is trained on 48 million papers, textbooks, lectures notes, compounds and proteins, scientific websites, etc. ([paper](https://galactica.org/static/paper.pdf)).
1115
+ creator_organization_name: Meta
1116
+ access: open
1117
+ num_parameters: 120000000000
1118
+ release_date: 2022-11-15
1119
+ tags: [] # TODO: add tags
1120
+
1121
+ - name: meta/galactica-30b # NOT SUPPORTED
1122
+ display_name: Galactica (30B)
1123
+ description: Galactica (30B parameters) is trained on 48 million papers, textbooks, lectures notes, compounds and proteins, scientific websites, etc. ([paper](https://galactica.org/static/paper.pdf)).
1124
+ creator_organization_name: Meta
1125
+ access: open
1126
+ num_parameters: 30000000000
1127
+ release_date: 2022-11-15
1128
+ tags: [] # TODO: add tags
1129
+
1130
+ - name: meta/llama-7b
1131
+ display_name: LLaMA (7B)
1132
+ description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
1133
+ creator_organization_name: Meta
1134
+ access: open
1135
+ num_parameters: 7000000000
1136
+ release_date: 2023-02-24
1137
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1138
+
1139
+ - name: meta/llama-13b
1140
+ display_name: LLaMA (13B)
1141
+ description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
1142
+ creator_organization_name: Meta
1143
+ access: open
1144
+ num_parameters: 13000000000
1145
+ release_date: 2023-02-24
1146
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1147
+
1148
+ - name: meta/llama-30b
1149
+ display_name: LLaMA (30B)
1150
+ description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
1151
+ creator_organization_name: Meta
1152
+ access: open
1153
+ num_parameters: 30000000000
1154
+ release_date: 2023-02-24
1155
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1156
+
1157
+ - name: meta/llama-65b
1158
+ display_name: LLaMA (65B)
1159
+ description: LLaMA is a collection of foundation language models ranging from 7B to 65B parameters.
1160
+ creator_organization_name: Meta
1161
+ access: open
1162
+ num_parameters: 65000000000
1163
+ release_date: 2023-02-24
1164
+ # TODO(#1828): Upgrade to FULL_FUNCTIONALITY_TEXT_MODEL_TAG
1165
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1166
+
1167
+ - name: meta/llama-2-7b
1168
+ display_name: Llama 2 (7B)
1169
+ description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1.
1170
+ creator_organization_name: Meta
1171
+ access: open
1172
+ num_parameters: 7000000000
1173
+ release_date: 2023-07-18
1174
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1175
+
1176
+ - name: meta/llama-2-13b
1177
+ display_name: Llama 2 (13B)
1178
+ description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1.
1179
+ creator_organization_name: Meta
1180
+ access: open
1181
+ num_parameters: 13000000000
1182
+ release_date: 2023-07-18
1183
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1184
+
1185
+ - name: meta/llama-2-70b
1186
+ display_name: Llama 2 (70B)
1187
+ description: Llama 2 pretrained models are trained on 2 trillion tokens, and have double the context length than Llama 1.
1188
+ creator_organization_name: Meta
1189
+ access: open
1190
+ num_parameters: 70000000000
1191
+ release_date: 2023-07-18
1192
+ # TODO(#1828): Upgrade to FULL_FUNCTIONALITY_TEXT_MODEL_TAG
1193
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1194
+
1195
+ - name: meta/llama-3-8b
1196
+ display_name: Llama 3 (8B)
1197
+ description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability.
1198
+ creator_organization_name: Meta
1199
+ access: open
1200
+ num_parameters: 8000000000
1201
+ release_date: 2024-04-18
1202
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1203
+
1204
+ - name: meta/llama-3-70b
1205
+ display_name: Llama 3 (70B)
1206
+ description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability.
1207
+ creator_organization_name: Meta
1208
+ access: open
1209
+ num_parameters: 70000000000
1210
+ release_date: 2024-04-18
1211
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1212
+
1213
+
1214
+ # Microsoft/NVIDIA
1215
+ - name: microsoft/TNLGv2_530B
1216
+ display_name: TNLG v2 (530B)
1217
+ description: TNLG v2 (530B parameters) autoregressive language model trained on a filtered subset of the Pile and CommonCrawl ([paper](https://arxiv.org/pdf/2201.11990.pdf)).
1218
+ creator_organization_name: Microsoft/NVIDIA
1219
+ access: closed
1220
+ num_parameters: 530000000000
1221
+ release_date: 2022-01-28
1222
+ tags: [] # deprecated text model
1223
+
1224
+ - name: microsoft/TNLGv2_7B
1225
+ display_name: TNLG v2 (6.7B)
1226
+ description: TNLG v2 (6.7B parameters) autoregressive language model trained on a filtered subset of the Pile and CommonCrawl ([paper](https://arxiv.org/pdf/2201.11990.pdf)).
1227
+ creator_organization_name: Microsoft/NVIDIA
1228
+ access: closed
1229
+ num_parameters: 6700000000
1230
+ release_date: 2022-01-28
1231
+ tags: [] # deprecated text model
1232
+
1233
+ - name: microsoft/llava-1.5-7b-hf
1234
+ display_name: LLaVA 1.5 (7B)
1235
+ description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
1236
+ creator_organization_name: Microsoft
1237
+ access: open
1238
+ num_parameters: 7000000000
1239
+ release_date: 2023-10-05
1240
+ tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
1241
+
1242
+ - name: microsoft/llava-1.5-13b-hf
1243
+ display_name: LLaVA 1.5 (13B)
1244
+ description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
1245
+ creator_organization_name: Microsoft
1246
+ access: open
1247
+ num_parameters: 13000000000
1248
+ release_date: 2023-10-05
1249
+ tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
1250
+
1251
+
1252
+ - name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
1253
+ display_name: OpenFlamingo (9B)
1254
+ description: OpenFlamingo is an open source implementation of DeepMind's Flamingo models. This 9B-parameter model uses a CLIP ViT-L/14 vision encoder and MPT-7B language model. ([paper](https://arxiv.org/abs/2308.01390))
1255
+ creator_organization_name: OpenFlamingo
1256
+ access: open
1257
+ num_parameters: 9000000000
1258
+ release_date: 2023-08-02
1259
+ tags: [VISION_LANGUAGE_MODEL_TAG, OPEN_FLAMINGO_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
1260
+
1261
+ - name: microsoft/phi-2
1262
+ display_name: Phi-2
1263
+ description: Phi-2 is a Transformer with 2.7 billion parameters. It was trained using the same data sources as Phi-1.5, augmented with a new data source that consists of various NLP synthetic texts and filtered websites (for safety and educational value)
1264
+ creator_organization_name: Microsoft
1265
+ access: open
1266
+ num_parameters: 13000000000
1267
+ release_date: 2023-10-05
1268
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1269
+
1270
+
1271
+
1272
+ # 01.AI
1273
+ - name: 01-ai/yi-6b
1274
+ display_name: Yi (6B)
1275
+ description: The Yi models are large language models trained from scratch by developers at 01.AI.
1276
+ creator_organization_name: 01.AI
1277
+ access: open
1278
+ num_parameters: 6000000000
1279
+ release_date: 2023-11-02
1280
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1281
+ - name: 01-ai/yi-34b
1282
+ display_name: Yi (34B)
1283
+ description: The Yi models are large language models trained from scratch by developers at 01.AI.
1284
+ creator_organization_name: 01.AI
1285
+ access: open
1286
+ num_parameters: 34000000000
1287
+ release_date: 2023-11-02
1288
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1289
+
1290
+
1291
+ # Allen Institute for AI
1292
+ # OLMo Blog: https://blog.allenai.org/olmo-open-language-model-87ccfc95f580
1293
+ - name: allenai/olmo-7b
1294
+ display_name: OLMo (7B)
1295
+ description: OLMo is a series of Open Language Models trained on the Dolma dataset.
1296
+ creator_organization_name: Allen Institute for AI
1297
+ access: open
1298
+ num_parameters: 7000000000
1299
+ release_date: 2024-02-01
1300
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1301
+
1302
+ - name: allenai/olmo-7b-twin-2t
1303
+ display_name: OLMo (7B Twin 2T)
1304
+ description: OLMo is a series of Open Language Models trained on the Dolma dataset.
1305
+ creator_organization_name: Allen Institute for AI
1306
+ access: open
1307
+ num_parameters: 7000000000
1308
+ release_date: 2024-02-01
1309
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1310
+
1311
+ - name: allenai/olmo-7b-instruct
1312
+ display_name: OLMo (7B Instruct)
1313
+ description: OLMo is a series of Open Language Models trained on the Dolma dataset. The instruct versions was trained on the Tulu SFT mixture and a cleaned version of the UltraFeedback dataset.
1314
+ creator_organization_name: Allen Institute for AI
1315
+ access: open
1316
+ num_parameters: 7000000000
1317
+ release_date: 2024-02-01
1318
+ # TODO: Add instruct tag.
1319
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1320
+
1321
+
1322
+ # Mistral AI
1323
+ - name: mistralai/mistral-7b-v0.1
1324
+ display_name: Mistral v0.1 (7B)
1325
+ description: Mistral 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA).
1326
+ creator_organization_name: Mistral AI
1327
+ access: open
1328
+ num_parameters: 7300000000
1329
+ release_date: 2023-09-27
1330
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1331
+
1332
+ - name: mistralai/mixtral-8x7b-32kseqlen
1333
+ display_name: Mixtral (8x7B 32K seqlen)
1334
+ description: Mistral AI's mixture-of-experts model ([tweet](https://twitter.com/MistralAI/status/1733150512395038967)).
1335
+ creator_organization_name: Mistral AI
1336
+ access: open
1337
+ num_parameters: 46700000000
1338
+ release_date: 2023-12-08
1339
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1340
+
1341
+ - name: mistralai/mixtral-8x7b-instruct-v0.1
1342
+ display_name: Mixtral (8x7B Instruct)
1343
+ description: Mixtral (8x7B Instruct) is a version of Mixtral (8x7B) that was optimized through supervised fine-tuning and direct preference optimisation (DPO) for careful instruction following.
1344
+ creator_organization_name: Mistral AI
1345
+ access: open
1346
+ num_parameters: 46700000000
1347
+ # Blog post: https://mistral.ai/news/mixtral-of-experts/
1348
+ release_date: 2023-12-11
1349
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
1350
+
1351
+ - name: mistralai/mixtral-8x22b
1352
+ display_name: Mixtral (8x22B)
1353
+ description: Mistral AI's mixture-of-experts model ([tweet](https://twitter.com/MistralAI/status/1777869263778291896)).
1354
+ creator_organization_name: Mistral AI
1355
+ access: open
1356
+ num_parameters: 176000000000
1357
+ release_date: 2024-04-10
1358
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1359
+
1360
+ - name: mistralai/bakLlava-v1-hf
1361
+ display_name: BakLLaVA v1 (7B)
1362
+ description: BakLLaVA v1 is a Mistral 7B base augmented with the LLaVA 1.5 architecture. ([blog](https://huggingface.co/llava-hf/bakLlava-v1-hf))
1363
+ creator_organization_name: Mistral AI
1364
+ access: open
1365
+ num_parameters: 7000000000
1366
+ release_date: 2023-10-16
1367
+ tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
1368
+
1369
+ - name: mistralai/mistral-small-2402
1370
+ display_name: Mistral Small (2402)
1371
+ # TODO: Fill in description
1372
+ description: TBD
1373
+ creator_organization_name: Mistral AI
1374
+ access: limited
1375
+ # Blog post: https://mistral.ai/news/mistral-large/
1376
+ release_date: 2023-02-26
1377
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
1378
+
1379
+ - name: mistralai/mistral-medium-2312
1380
+ display_name: Mistral Medium (2312)
1381
+ description: Mistral is a transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA).
1382
+ creator_organization_name: Mistral AI
1383
+ access: limited
1384
+ release_date: 2023-12-11
1385
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
1386
+
1387
+ - name: mistralai/mistral-large-2402
1388
+ display_name: Mistral Large (2402)
1389
+ # TODO: Fill in description
1390
+ description: TBD
1391
+ creator_organization_name: Mistral AI
1392
+ access: limited
1393
+ # Blog post: https://mistral.ai/news/mistral-large/
1394
+ release_date: 2023-02-26
1395
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
1396
+
1397
+
1398
+ # MosaicML
1399
+ - name: mosaicml/mpt-7b
1400
+ display_name: MPT (7B)
1401
+ description: MPT (7B) is a Transformer trained from scratch on 1T tokens of text and code.
1402
+ creator_organization_name: MosaicML
1403
+ access: open
1404
+ num_parameters: 6700000000
1405
+ release_date: 2023-05-05
1406
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1407
+
1408
+ - name: mosaicml/mpt-7b-chat # NOT SUPPORTED
1409
+ display_name: MPT-Chat (7B)
1410
+ description: MPT-Chat (7B) is a chatbot-like model for dialogue generation. It is built by finetuning MPT (30B) , a Transformer trained from scratch on 1T tokens of text and code.
1411
+ creator_organization_name: MosaicML
1412
+ access: open
1413
+ num_parameters: 6700000000
1414
+ release_date: 2023-05-05
1415
+ tags: [] # TODO: add tags
1416
+
1417
+ - name: mosaicml/mpt-instruct-7b
1418
+ display_name: MPT-Instruct (7B)
1419
+ description: MPT-Instruct (7B) is a model for short-form instruction following. It is built by finetuning MPT (30B), a Transformer trained from scratch on 1T tokens of text and code.
1420
+ creator_organization_name: MosaicML
1421
+ access: open
1422
+ num_parameters: 6700000000
1423
+ release_date: 2023-05-05
1424
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1425
+
1426
+ - name: mosaicml/mpt-30b
1427
+ display_name: MPT (30B)
1428
+ description: MPT (30B) is a Transformer trained from scratch on 1T tokens of text and code.
1429
+ creator_organization_name: MosaicML
1430
+ access: open
1431
+ num_parameters: 30000000000
1432
+ release_date: 2023-06-22
1433
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1434
+
1435
+ - name: mosaicml/mpt-30b-chat # NOT SUPPORTED
1436
+ display_name: MPT-Chat (30B)
1437
+ description: MPT-Chat (30B) is a chatbot-like model for dialogue generation. It is built by finetuning MPT (30B), a Transformer trained from scratch on 1T tokens of text and code.
1438
+ creator_organization_name: MosaicML
1439
+ access: open
1440
+ num_parameters: 30000000000
1441
+ release_date: 2023-06-22
1442
+ tags: [] # TODO: add tags
1443
+
1444
+ - name: mosaicml/mpt-instruct-30b
1445
+ display_name: MPT-Instruct (30B)
1446
+ description: MPT-Instruct (30B) is a model for short-form instruction following. It is built by finetuning MPT (30B), a Transformer trained from scratch on 1T tokens of text and code.
1447
+ creator_organization_name: MosaicML
1448
+ access: open
1449
+ num_parameters: 30000000000
1450
+ release_date: 2023-06-22
1451
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1452
+
1453
+
1454
+
1455
+ # Neurips
1456
+ - name: neurips/local
1457
+ display_name: Neurips Local
1458
+ description: Neurips Local
1459
+ creator_organization_name: Neurips
1460
+ access: open
1461
+ release_date: 2023-06-01
1462
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1463
+
1464
+
1465
+
1466
+ # NVIDIA
1467
+ - name: nvidia/megatron-gpt2
1468
+ display_name: Megatron GPT2
1469
+ description: GPT-2 implemented in Megatron-LM ([paper](https://arxiv.org/abs/1909.08053)).
1470
+ creator_organization_name: NVIDIA
1471
+ access: open
1472
+ release_date: 2019-09-17 # paper date
1473
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, BUGGY_TEMP_0_TAG]
1474
+
1475
+
1476
+
1477
+ # OpenAI
1478
+
1479
+ ## GPT 2 Models
1480
+ # Not served by OpenAI, instead served by HuggingFace.
1481
+
1482
+ - name: openai/gpt2
1483
+ display_name: GPT-2 (1.5B)
1484
+ description: GPT-2 (1.5B parameters) is a transformer model trained on a large corpus of English text in a self-supervised fashion ([paper](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)).
1485
+ creator_organization_name: OpenAI
1486
+ access: open
1487
+ num_parameters: 1500000000
1488
+ release_date: 2019-02-14
1489
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1490
+
1491
+
1492
+ ## GPT 3 Models
1493
+ # The list of models can be found here: https://beta.openai.com/docs/engines/gpt-3
1494
+
1495
+ - name: openai/davinci-002
1496
+ display_name: davinci-002
1497
+ description: Replacement for the GPT-3 curie and davinci base models.
1498
+ creator_organization_name: OpenAI
1499
+ access: limited
1500
+ release_date: 2023-08-22
1501
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1502
+
1503
+ - name: openai/babbage-002
1504
+ display_name: babbage-002
1505
+ description: Replacement for the GPT-3 ada and babbage base models.
1506
+ creator_organization_name: OpenAI
1507
+ access: limited
1508
+ release_date: 2023-08-22
1509
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1510
+
1511
+ # DEPRECATED: Announced on July 06 2023 that these models will be shut down on January 04 2024.
1512
+
1513
+ - name: openai/davinci # DEPRECATED
1514
+ display_name: davinci (175B)
1515
+ description: Original GPT-3 (175B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
1516
+ creator_organization_name: OpenAI
1517
+ access: limited
1518
+ num_parameters: 175000000000
1519
+ release_date: 2020-05-28
1520
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1521
+
1522
+ - name: openai/curie # DEPRECATED
1523
+ display_name: curie (6.7B)
1524
+ description: Original GPT-3 (6.7B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
1525
+ creator_organization_name: OpenAI
1526
+ access: limited
1527
+ num_parameters: 6700000000
1528
+ release_date: 2020-05-28
1529
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1530
+
1531
+ - name: openai/babbage # DEPRECATED
1532
+ display_name: babbage (1.3B)
1533
+ description: Original GPT-3 (1.3B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
1534
+ creator_organization_name: OpenAI
1535
+ access: limited
1536
+ num_parameters: 1300000000
1537
+ release_date: 2020-05-28
1538
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1539
+
1540
+ - name: openai/ada # DEPRECATED
1541
+ display_name: ada (350M)
1542
+ description: Original GPT-3 (350M parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
1543
+ creator_organization_name: OpenAI
1544
+ access: limited
1545
+ num_parameters: 350000000
1546
+ release_date: 2020-05-28
1547
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1548
+
1549
+ - name: openai/text-davinci-003 # DEPRECATED
1550
+ display_name: GPT-3.5 (text-davinci-003)
1551
+ description: text-davinci-003 model that involves reinforcement learning (PPO) with reward models. Derived from text-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
1552
+ creator_organization_name: OpenAI
1553
+ access: limited
1554
+ num_parameters: 175000000000
1555
+ release_date: 2022-11-28
1556
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1557
+
1558
+ # TODO: text-davinci-002 supports insertion. Support insertion in our framework.
1559
+ # https://github.com/stanford-crfm/benchmarking/issues/359
1560
+ - name: openai/text-davinci-002 # DEPRECATED
1561
+ display_name: GPT-3.5 (text-davinci-002)
1562
+ description: text-davinci-002 model that involves supervised fine-tuning on human-written demonstrations. Derived from code-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
1563
+ creator_organization_name: OpenAI
1564
+ access: limited
1565
+ num_parameters: 175000000000
1566
+ release_date: 2022-01-27
1567
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1568
+
1569
+ - name: openai/text-davinci-001 # DEPRECATED
1570
+ display_name: GPT-3.5 (text-davinci-001)
1571
+ description: text-davinci-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
1572
+ creator_organization_name: OpenAI
1573
+ access: limited
1574
+ num_parameters: 175000000000
1575
+ release_date: 2022-01-27
1576
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1577
+
1578
+ - name: openai/text-curie-001 # DEPRECATED
1579
+ display_name: text-curie-001
1580
+ description: text-curie-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
1581
+ creator_organization_name: OpenAI
1582
+ access: limited
1583
+ num_parameters: 6700000000
1584
+ release_date: 2022-01-27
1585
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1586
+
1587
+ - name: openai/text-babbage-001 # DEPRECATED
1588
+ display_name: text-babbage-001
1589
+ description: text-babbage-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
1590
+ creator_organization_name: OpenAI
1591
+ access: limited
1592
+ num_parameters: 1300000000
1593
+ release_date: 2022-01-27
1594
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1595
+
1596
+ - name: openai/text-ada-001 # DEPRECATED
1597
+ display_name: text-ada-001
1598
+ description: text-ada-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
1599
+ creator_organization_name: OpenAI
1600
+ access: limited
1601
+ num_parameters: 350000000
1602
+ release_date: 2022-01-27
1603
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1604
+
1605
+
1606
+ ## GPT 3.5 Turbo Models
1607
+ # ChatGPT: https://openai.com/blog/chatgpt
1608
+
1609
+ - name: openai/gpt-3.5-turbo-instruct
1610
+ display_name: GPT-3.5 Turbo Instruct
1611
+ description: Similar capabilities as GPT-3 era models. Compatible with legacy Completions endpoint and not Chat Completions.
1612
+ creator_organization_name: OpenAI
1613
+ access: limited
1614
+ release_date: 2023-09-18
1615
+ tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1616
+
1617
+ - name: openai/gpt-3.5-turbo-0301
1618
+ display_name: GPT-3.5 Turbo (0301)
1619
+ description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-03-01.
1620
+ creator_organization_name: OpenAI
1621
+ access: limited
1622
+ release_date: 2023-03-01
1623
+ tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1624
+
1625
+ - name: openai/gpt-3.5-turbo-0613
1626
+ display_name: GPT-3.5 Turbo (0613)
1627
+ description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13.
1628
+ creator_organization_name: OpenAI
1629
+ access: limited
1630
+ release_date: 2023-06-13
1631
+ tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1632
+
1633
+ - name: openai/gpt-3.5-turbo-1106
1634
+ display_name: GPT-3.5 Turbo (1106)
1635
+ description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-11-06.
1636
+ creator_organization_name: OpenAI
1637
+ access: limited
1638
+ # Actual release blog post was published on 2024-01-25:
1639
+ # https://openai.com/blog/new-embedding-models-and-api-updates
1640
+ release_date: 2024-01-25
1641
+ tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1642
+
1643
+ - name: openai/gpt-3.5-turbo-0125
1644
+ display_name: gpt-3.5-turbo-0125
1645
+ description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2024-01-25.
1646
+ creator_organization_name: OpenAI
1647
+ access: limited
1648
+ # Release blog post was published on 2024-01-25:
1649
+ # https://openai.com/blog/new-embedding-models-and-api-updates
1650
+ # The actual release date is unclear - it was described as "next week".
1651
+ release_date: 2023-06-13
1652
+ tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1653
+
1654
+ - name: openai/gpt-3.5-turbo-16k-0613
1655
+ display_name: gpt-3.5-turbo-16k-0613
1656
+ description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-06-13 with a longer context length of 16,384 tokens.
1657
+ creator_organization_name: OpenAI
1658
+ access: limited
1659
+ release_date: 2023-06-13
1660
+ tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1661
+
1662
+
1663
+ ## GPT 4 Models
1664
+
1665
+ - name: openai/gpt-4-1106-preview
1666
+ display_name: GPT-4 Turbo (1106 preview)
1667
+ description: GPT-4 Turbo (preview) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Preview snapshot from 2023-11-06.
1668
+ creator_organization_name: OpenAI
1669
+ access: limited
1670
+ release_date: 2023-11-06
1671
+ tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1672
+
1673
+ - name: openai/gpt-4-0314
1674
+ display_name: GPT-4 (0314)
1675
+ description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 from 2023-03-14.
1676
+ creator_organization_name: OpenAI
1677
+ access: limited
1678
+ release_date: 2023-03-14
1679
+ tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1680
+
1681
+ - name: openai/gpt-4-32k-0314
1682
+ display_name: gpt-4-32k-0314
1683
+ description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 with a longer context length of 32,768 tokens from March 14th 2023.
1684
+ creator_organization_name: OpenAI
1685
+ access: limited
1686
+ release_date: 2023-03-14
1687
+ tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1688
+
1689
+ - name: openai/gpt-4-0613
1690
+ display_name: GPT-4 (0613)
1691
+ description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 from 2023-06-13.
1692
+ creator_organization_name: OpenAI
1693
+ access: limited
1694
+ release_date: 2023-06-13
1695
+ tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1696
+
1697
+ - name: openai/gpt-4-32k-0613
1698
+ display_name: gpt-4-32k-0613
1699
+ description: GPT-4 is a large multimodal model (currently only accepting text inputs and emitting text outputs) that is optimized for chat but works well for traditional completions tasks. Snapshot of gpt-4 with a longer context length of 32,768 tokens from 2023-06-13.
1700
+ creator_organization_name: OpenAI
1701
+ access: limited
1702
+ release_date: 2023-06-13
1703
+ tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1704
+
1705
+ - name: openai/gpt-4-0125-preview
1706
+ display_name: GPT-4 Turbo (0125 preview)
1707
+ description: GPT-4 Turbo (preview) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Preview snapshot from 2023-01-25. This snapshot is intended to reduce cases of “laziness” where the model doesn’t complete a task.
1708
+ creator_organization_name: OpenAI
1709
+ access: limited
1710
+ # Actual release blog post was published on 2024-01-25:
1711
+ # https://openai.com/blog/new-embedding-models-and-api-updates
1712
+ release_date: 2024-01-25
1713
+ tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1714
+
1715
+ - name: openai/gpt-4-turbo-2024-04-09
1716
+ display_name: GPT-4 Turbo (2024-04-09)
1717
+ description: GPT-4 Turbo (2024-04-09) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Snapshot from 2024-04-09.
1718
+ creator_organization_name: OpenAI
1719
+ access: limited
1720
+ release_date: 2024-04-09
1721
+ tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1722
+
1723
+ - name: openai/gpt-4-vision-preview
1724
+ display_name: GPT-4V (preview)
1725
+ description: GPT-4V is a large multimodal model that accepts both text and images and is optimized for chat but works well for traditional completions tasks.
1726
+ creator_organization_name: OpenAI
1727
+ access: limited
1728
+ release_date: 2023-11-06
1729
+ tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
1730
+
1731
+ ## Codex Models
1732
+ # DEPRECATED: Codex models have been shut down on March 23 2023.
1733
+
1734
+ - name: openai/code-davinci-002 # DEPRECATED
1735
+ display_name: code-davinci-002
1736
+ description: Codex-style model that is designed for pure code-completion tasks ([docs](https://beta.openai.com/docs/models/codex)).
1737
+ creator_organization_name: OpenAI
1738
+ access: limited
1739
+ release_date: 2021-07-01 # TODO: Find correct date (this is for v1)
1740
+ tags: [CODE_MODEL_TAG]
1741
+
1742
+ - name: openai/code-davinci-001 # DEPRECATED
1743
+ display_name: code-davinci-001
1744
+ description: code-davinci-001 model
1745
+ creator_organization_name: OpenAI
1746
+ access: limited
1747
+ release_date: 2021-07-01 # Paper date
1748
+ tags: [CODE_MODEL_TAG]
1749
+
1750
+ - name: openai/code-cushman-001 # DEPRECATED
1751
+ display_name: code-cushman-001 (12B)
1752
+ description: Codex-style model that is a stronger, multilingual version of the Codex (12B) model in the [Codex paper](https://arxiv.org/pdf/2107.03374.pdf).
1753
+ creator_organization_name: OpenAI
1754
+ access: limited
1755
+ num_parameters: 12000000000
1756
+ release_date: 2021-07-01 # Paper date
1757
+ tags: [CODE_MODEL_TAG]
1758
+
1759
+
1760
+ ## Text Similarity Models
1761
+ # OpenAI similarity embedding models: https://beta.openai.com/docs/guides/embeddings
1762
+ # The number of parameters is guessed based on the number of parameters of the
1763
+ # corresponding GPT-3 model.
1764
+ # DEPRECATED: Announced on July 06 2023 that first generation embeddings models
1765
+ # will be shut down on January 04 2024.
1766
+
1767
+ - name: openai/text-similarity-davinci-001 # DEPRECATED
1768
+ display_name: text-similarity-davinci-001
1769
+ description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
1770
+ creator_organization_name: OpenAI
1771
+ access: limited
1772
+ num_parameters: 175000000000
1773
+ release_date: 2022-01-25 # Blog post date
1774
+ tags: [TEXT_SIMILARITY_MODEL_TAG]
1775
+
1776
+ - name: openai/text-similarity-curie-001 # DEPRECATED
1777
+ display_name: text-similarity-curie-001
1778
+ description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
1779
+ creator_organization_name: OpenAI
1780
+ access: limited
1781
+ num_parameters: 6700000000
1782
+ release_date: 2022-01-25 # Blog post date
1783
+ tags: [TEXT_SIMILARITY_MODEL_TAG]
1784
+
1785
+ - name: openai/text-similarity-babbage-001 # DEPRECATED
1786
+ display_name: text-similarity-babbage-001
1787
+ description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
1788
+ creator_organization_name: OpenAI
1789
+ access: limited
1790
+ num_parameters: 1300000000
1791
+ release_date: 2022-01-25 # Blog post date
1792
+ tags: [TEXT_SIMILARITY_MODEL_TAG]
1793
+
1794
+ - name: openai/text-similarity-ada-001 # DEPRECATED
1795
+ display_name: text-similarity-ada-001
1796
+ description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
1797
+ creator_organization_name: OpenAI
1798
+ access: limited
1799
+ num_parameters: 350000000
1800
+ release_date: 2022-01-25 # Blog post date
1801
+ tags: [TEXT_SIMILARITY_MODEL_TAG]
1802
+
1803
+ - name: openai/text-embedding-ada-002
1804
+ display_name: text-embedding-ada-002
1805
+ description: An improved embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/new-and-improved-embedding-model)).
1806
+ creator_organization_name: OpenAI
1807
+ access: limited
1808
+ release_date: 2022-12-15 # Blog post date
1809
+ tags: [TEXT_SIMILARITY_MODEL_TAG]
1810
+
1811
+ # Text-to-image models
1812
+ - name: openai/dall-e-2
1813
+ display_name: DALL-E 2 (3.5B)
1814
+ description: DALL-E 2 is a encoder-decoder-based latent diffusion model trained on large-scale paired text-image datasets. The model is available via the OpenAI API ([paper](https://arxiv.org/abs/2204.06125)).
1815
+ creator_organization_name: OpenAI
1816
+ access: limited
1817
+ num_parameters: 3500000000
1818
+ release_date: 2022-04-13
1819
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
1820
+
1821
+ - name: openai/dall-e-3
1822
+ display_name: DALL-E 3
1823
+ description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The default style, vivid, causes the model to lean towards generating hyper-real and dramatic images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
1824
+ creator_organization_name: OpenAI
1825
+ access: limited
1826
+ num_parameters: 0
1827
+ release_date: 2023-11-06
1828
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
1829
+
1830
+ - name: openai/dall-e-3-natural
1831
+ display_name: DALL-E 3 (natural style)
1832
+ description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The natural style causes the model to produce more natural, less hyper-real looking images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
1833
+ creator_organization_name: OpenAI
1834
+ access: limited
1835
+ num_parameters: 0
1836
+ release_date: 2023-11-06
1837
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
1838
+
1839
+ - name: openai/dall-e-3-hd
1840
+ display_name: DALL-E 3 HD
1841
+ description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The HD version creates images with finer details and greater consistency across the image, but generation is slower. The default style, vivid, causes the model to lean towards generating hyper-real and dramatic images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
1842
+ creator_organization_name: OpenAI
1843
+ access: limited
1844
+ num_parameters: 0
1845
+ release_date: 2023-11-06
1846
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
1847
+
1848
+ - name: openai/dall-e-3-hd-natural
1849
+ display_name: DALL-E 3 HD (natural style)
1850
+ description: DALL-E 3 is a text-to-image generation model built natively on ChatGPT, used to prompt engineer automatically. The HD version creates images with finer details and greater consistency across the image, but generation is slower. The natural style causes the model to produce more natural, less hyper-real looking images. The model is available via the OpenAI API ([paper](https://cdn.openai.com/papers/dall-e-3.pdf)).
1851
+ creator_organization_name: OpenAI
1852
+ access: limited
1853
+ num_parameters: 0
1854
+ release_date: 2023-11-06
1855
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
1856
+
1857
+ # Qwen
1858
+
1859
+ - name: qwen/qwen-7b
1860
+ display_name: Qwen
1861
+ description: 7B-parameter version of the large language model series, Qwen (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
1862
+ creator_organization_name: Qwen
1863
+ access: open
1864
+ release_date: 2024-02-05
1865
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1866
+
1867
+ - name: qwen/qwen1.5-7b
1868
+ display_name: Qwen1.5 (7B)
1869
+ description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
1870
+ creator_organization_name: Qwen
1871
+ access: open
1872
+ release_date: 2024-02-05
1873
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1874
+
1875
+ - name: qwen/qwen1.5-14b
1876
+ display_name: Qwen1.5 (14B)
1877
+ description: 14B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
1878
+ creator_organization_name: Qwen
1879
+ access: open
1880
+ release_date: 2024-02-05
1881
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1882
+
1883
+ - name: qwen/qwen1.5-32b
1884
+ display_name: Qwen1.5 (32B)
1885
+ description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
1886
+ creator_organization_name: Qwen
1887
+ access: open
1888
+ release_date: 2024-02-05
1889
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1890
+
1891
+ - name: qwen/qwen1.5-72b
1892
+ display_name: Qwen1.5 (72B)
1893
+ description: 72B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
1894
+ creator_organization_name: Qwen
1895
+ access: open
1896
+ release_date: 2024-02-05
1897
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1898
+
1899
+ - name: qwen/qwen-vl
1900
+ display_name: Qwen-VL
1901
+ description: Visual multimodal version of the large model series ([paper](https://arxiv.org/abs/2308.12966)).
1902
+ creator_organization_name: Alibaba Cloud
1903
+ access: open
1904
+ release_date: 2023-08-24
1905
+ tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
1906
+
1907
+ - name: qwen/qwen-vl-chat
1908
+ display_name: Qwen-VL Chat
1909
+ description: Chat version of the visual multimodal model Qwen ([paper](https://arxiv.org/abs/2308.12966)).
1910
+ creator_organization_name: Alibaba Cloud
1911
+ access: open
1912
+ release_date: 2023-08-24
1913
+ tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
1914
+
1915
+ # Salesforce
1916
+ - name: salesforce/codegen # NOT SUPPORTED
1917
+ display_name: CodeGen (16B)
1918
+ description: CodeGen (16B parameters) is an open dense code model trained for multi-turn program synthesis ([blog](https://arxiv.org/pdf/2203.13474.pdf)).
1919
+ creator_organization_name: Tsinghua
1920
+ access: open
1921
+ num_parameters: 16000000000
1922
+ release_date: 2022-03-25
1923
+ tags: [] # TODO: add tags
1924
+
1925
+
1926
+
1927
+ # Stability AI
1928
+ - name: stabilityai/stablelm-base-alpha-3b
1929
+ display_name: StableLM-Base-Alpha (3B)
1930
+ description: StableLM-Base-Alpha is a suite of 3B and 7B parameter decoder-only language models pre-trained on a diverse collection of English datasets with a sequence length of 4096 to push beyond the context window limitations of existing open-source language models.
1931
+ creator_organization_name: Stability AI
1932
+ access: open
1933
+ num_parameters: 3000000000
1934
+ release_date: 2023-04-20
1935
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1936
+
1937
+ - name: stabilityai/stablelm-base-alpha-7b
1938
+ display_name: StableLM-Base-Alpha (7B)
1939
+ description: StableLM-Base-Alpha is a suite of 3B and 7B parameter decoder-only language models pre-trained on a diverse collection of English datasets with a sequence length of 4096 to push beyond the context window limitations of existing open-source language models.
1940
+ creator_organization_name: Stability AI
1941
+ access: open
1942
+ num_parameters: 7000000000
1943
+ release_date: 2023-04-20
1944
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1945
+
1946
+
1947
+
1948
+ # Stanford
1949
+ - name: stanford/alpaca-7b
1950
+ display_name: Alpaca (7B)
1951
+ description: Alpaca 7B is a model fine-tuned from the LLaMA 7B model on 52K instruction-following demonstrations
1952
+ creator_organization_name: Stanford
1953
+ access: open
1954
+ num_parameters: 7000000000
1955
+ release_date: 2023-03-13
1956
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1957
+
1958
+
1959
+
1960
+ # TII UAE
1961
+ - name: tiiuae/falcon-7b
1962
+ display_name: Falcon (7B)
1963
+ description: Falcon-7B is a 7B parameters causal decoder-only model built by TII and trained on 1,500B tokens of RefinedWeb enhanced with curated corpora.
1964
+ creator_organization_name: TII UAE
1965
+ access: open
1966
+ num_parameters: 7000000000
1967
+ release_date: 2023-03-15
1968
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1969
+
1970
+ - name: tiiuae/falcon-7b-instruct
1971
+ display_name: Falcon-Instruct (7B)
1972
+ description: Falcon-7B-Instruct is a 7B parameters causal decoder-only model built by TII based on Falcon-7B and finetuned on a mixture of chat/instruct datasets.
1973
+ creator_organization_name: TII UAE
1974
+ access: open
1975
+ num_parameters: 7000000000
1976
+ release_date: 2023-03-15
1977
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1978
+
1979
+ - name: tiiuae/falcon-40b
1980
+ display_name: Falcon (40B)
1981
+ description: Falcon-40B is a 40B parameters causal decoder-only model built by TII and trained on 1,500B tokens of RefinedWeb enhanced with curated corpora.
1982
+ creator_organization_name: TII UAE
1983
+ access: open
1984
+ num_parameters: 40000000000
1985
+ release_date: 2023-05-25
1986
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1987
+
1988
+ - name: tiiuae/falcon-40b-instruct
1989
+ display_name: Falcon-Instruct (40B)
1990
+ description: Falcon-40B-Instruct is a 40B parameters causal decoder-only model built by TII based on Falcon-7B and finetuned on a mixture of chat/instruct datasets.
1991
+ creator_organization_name: TII UAE
1992
+ access: open
1993
+ num_parameters: 40000000000
1994
+ release_date: 2023-05-25
1995
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1996
+
1997
+
1998
+
1999
+ # Together
2000
+ - name: together/gpt-jt-6b-v1
2001
+ display_name: GPT-JT (6B)
2002
+ description: GPT-JT (6B parameters) is a fork of GPT-J ([blog post](https://www.together.xyz/blog/releasing-v1-of-gpt-jt-powered-by-open-source-ai)).
2003
+ creator_organization_name: Together
2004
+ access: open
2005
+ num_parameters: 6700000000
2006
+ release_date: 2022-11-29
2007
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
2008
+
2009
+ - name: together/gpt-neoxt-chat-base-20b
2010
+ display_name: GPT-NeoXT-Chat-Base (20B)
2011
+ description: GPT-NeoXT-Chat-Base (20B) is fine-tuned from GPT-NeoX, serving as a base model for developing open-source chatbots.
2012
+ creator_organization_name: Together
2013
+ access: open
2014
+ num_parameters: 20000000000
2015
+ release_date: 2023-03-08
2016
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, CHATML_MODEL_TAG]
2017
+
2018
+ - name: together/redpajama-incite-base-3b-v1
2019
+ display_name: RedPajama-INCITE-Base-v1 (3B)
2020
+ description: RedPajama-INCITE-Base-v1 (3B parameters) is a 3 billion base model that aims to replicate the LLaMA recipe as closely as possible.
2021
+ creator_organization_name: Together
2022
+ access: open
2023
+ num_parameters: 3000000000
2024
+ release_date: 2023-05-05
2025
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
2026
+
2027
+ - name: together/redpajama-incite-instruct-3b-v1
2028
+ display_name: RedPajama-INCITE-Instruct-v1 (3B)
2029
+ description: RedPajama-INCITE-Instruct-v1 (3B parameters) is a model fine-tuned for few-shot applications on the data of GPT-JT. It is built from RedPajama-INCITE-Base-v1 (3B), a 3 billion base model that aims to replicate the LLaMA recipe as closely as possible.
2030
+ creator_organization_name: Together
2031
+ access: open
2032
+ num_parameters: 3000000000
2033
+ release_date: 2023-05-05
2034
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
2035
+
2036
+ - name: together/redpajama-incite-chat-3b-v1 # NOT SUPPORTED
2037
+ display_name: RedPajama-INCITE-Chat-v1 (3B)
2038
+ description: RedPajama-INCITE-Chat-v1 (3B parameters) is a model fine-tuned on OASST1 and Dolly2 to enhance chatting ability. It is built from RedPajama-INCITE-Base-v1 (3B), a 3 billion base model that aims to replicate the LLaMA recipe as closely as possible.
2039
+ creator_organization_name: Together
2040
+ access: open
2041
+ num_parameters: 3000000000
2042
+ release_date: 2023-05-05
2043
+ tafs: [] # TODO: add tags
2044
+
2045
+ - name: together/redpajama-incite-base-7b
2046
+ display_name: RedPajama-INCITE-Base (7B)
2047
+ description: RedPajama-INCITE-Base (7B parameters) is a 7 billion base model that aims to replicate the LLaMA recipe as closely as possible.
2048
+ creator_organization_name: Together
2049
+ access: open
2050
+ num_parameters: 7000000000
2051
+ release_date: 2023-05-05
2052
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
2053
+
2054
+ - name: together/redpajama-incite-instruct-7b
2055
+ display_name: RedPajama-INCITE-Instruct (7B)
2056
+ description: RedPajama-INCITE-Instruct (7B parameters) is a model fine-tuned for few-shot applications on the data of GPT-JT. It is built from RedPajama-INCITE-Base (7B), a 7 billion base model that aims to replicate the LLaMA recipe as closely as possible.
2057
+ creator_organization_name: Together
2058
+ access: open
2059
+ num_parameters: 7000000000
2060
+ release_date: 2023-05-05
2061
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
2062
+
2063
+
2064
+
2065
+ # Tsinghua
2066
+
2067
+ - name: thudm/cogview2
2068
+ display_name: CogView2 (6B)
2069
+ description: CogView2 is a hierarchical transformer (6B-9B-9B parameters) for text-to-image generation that supports both English and Chinese input text ([paper](https://arxiv.org/abs/2105.13290))
2070
+ creator_organization_name: Tsinghua
2071
+ access: open
2072
+ num_parameters: 6000000000
2073
+ release_date: 2022-06-15
2074
+ tags: [TEXT_TO_IMAGE_MODEL_TAG]
2075
+
2076
+ - name: tsinghua/glm
2077
+ display_name: GLM (130B)
2078
+ description: GLM (130B parameters) is an open bilingual (English & Chinese) bidirectional dense model that was trained using General Language Model (GLM) procedure ([paper](https://arxiv.org/pdf/2210.02414.pdf)).
2079
+ creator_organization_name: Tsinghua
2080
+ access: open
2081
+ num_parameters: 130000000000
2082
+ release_date: 2022-08-04
2083
+ # Inference with echo=True is not feasible -- in the prompt encoding phase, they use
2084
+ # bidirectional attention and do not perform predictions on them.
2085
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
2086
+
2087
+ - name: tsinghua/codegeex # NOT SUPPORTED
2088
+ display_name: CodeGeeX (13B)
2089
+ description: CodeGeeX (13B parameters) is an open dense code model trained on more than 20 programming languages on a corpus of more than 850B tokens ([blog](http://keg.cs.tsinghua.edu.cn/codegeex/)).
2090
+ creator_organization_name: Tsinghua
2091
+ access: open
2092
+ num_parameters: 13000000000
2093
+ release_date: 2022-09-19
2094
+ tags: [] # TODO: add tags
2095
+
2096
+
2097
+
2098
+ # Writer
2099
+ - name: writer/palmyra-base
2100
+ display_name: Palmyra Base (5B)
2101
+ description: Palmyra Base (5B)
2102
+ creator_organization_name: Writer
2103
+ access: limited
2104
+ num_parameters: 5000000000
2105
+ release_date: 2022-10-13
2106
+ # Does not support echo
2107
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2108
+
2109
+ - name: writer/palmyra-large
2110
+ display_name: Palmyra Large (20B)
2111
+ description: Palmyra Large (20B)
2112
+ creator_organization_name: Writer
2113
+ access: limited
2114
+ num_parameters: 20000000000
2115
+ release_date: 2022-12-23
2116
+ # Does not support echo
2117
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2118
+
2119
+ - name: writer/palmyra-instruct-30
2120
+ deprecated: true # Internal error
2121
+ display_name: InstructPalmyra (30B)
2122
+ description: InstructPalmyra (30B parameters) is trained using reinforcement learning techniques based on feedback from humans.
2123
+ creator_organization_name: Writer
2124
+ access: limited
2125
+ num_parameters: 30000000000
2126
+ release_date: 2023-02-16
2127
+ # Does not support echo
2128
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2129
+
2130
+ - name: writer/palmyra-e
2131
+ deprecated: true # Internal error
2132
+ display_name: Palmyra E (30B)
2133
+ description: Palmyra E (30B)
2134
+ creator_organization_name: Writer
2135
+ access: limited
2136
+ num_parameters: 30000000000
2137
+ release_date: 2023-03-03
2138
+ # Does not support echo
2139
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2140
+
2141
+ - name: writer/silk-road
2142
+ display_name: Silk Road (35B)
2143
+ description: Silk Road (35B)
2144
+ creator_organization_name: Writer
2145
+ access: limited
2146
+ num_parameters: 35000000000
2147
+ release_date: 2023-04-13
2148
+ # Does not support echo
2149
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2150
+
2151
+ - name: writer/palmyra-x
2152
+ display_name: Palmyra X (43B)
2153
+ description: Palmyra-X (43B parameters) is trained to adhere to instructions using human feedback and utilizes a technique called multiquery attention. Furthermore, a new feature called 'self-instruct' has been introduced, which includes the implementation of an early stopping criteria specifically designed for minimal instruction tuning ([paper](https://dev.writer.com/docs/becoming-self-instruct-introducing-early-stopping-criteria-for-minimal-instruct-tuning)).
2154
+ creator_organization_name: Writer
2155
+ access: limited
2156
+ num_parameters: 43000000000
2157
+ release_date: 2023-06-11
2158
+ # Does not support echo
2159
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2160
+
2161
+ - name: writer/palmyra-x-v2
2162
+ display_name: Palmyra X V2 (33B)
2163
+ description: Palmyra-X V2 (33B parameters) is a Transformer-based model, which is trained on extremely large-scale pre-training data. The pre-training data more than 2 trillion tokens types are diverse and cover a wide range of areas, used FlashAttention-2.
2164
+ creator_organization_name: Writer
2165
+ access: limited
2166
+ num_parameters: 33000000000
2167
+ release_date: 2023-12-01
2168
+ # Does not support echo
2169
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2170
+
2171
+ - name: writer/palmyra-x-v3
2172
+ display_name: Palmyra X V3 (72B)
2173
+ description: Palmyra-X V3 (72B parameters) is a Transformer-based model, which is trained on extremely large-scale pre-training data. It is trained via unsupervised learning and DPO and use multiquery attention.
2174
+ creator_organization_name: Writer
2175
+ access: limited
2176
+ num_parameters: 72000000000
2177
+ release_date: 2023-12-01
2178
+ # Does not support echo
2179
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2180
+
2181
+ - name: writer/palmyra-x-32k
2182
+ display_name: Palmyra X-32K (33B)
2183
+ description: Palmyra-X-32K (33B parameters) is a Transformer-based model, which is trained on large-scale pre-training data. The pre-training data types are diverse and cover a wide range of areas. These data types are used in conjunction and the alignment mechanism to extend context window.
2184
+ creator_organization_name: Writer
2185
+ access: limited
2186
+ num_parameters: 33000000000
2187
+ release_date: 2023-12-01
2188
+ # Does not support echo
2189
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2190
+
2191
+
2192
+
2193
+ # Yandex
2194
+ - name: yandex/yalm
2195
+ display_name: YaLM (100B)
2196
+ description: YaLM (100B parameters) is an autoregressive language model trained on English and Russian text ([GitHub](https://github.com/yandex/YaLM-100B)).
2197
+ creator_organization_name: Yandex
2198
+ access: open
2199
+ num_parameters: 100000000000
2200
+ release_date: 2022-06-23
2201
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]