crfm-helm 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (236) hide show
  1. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/METADATA +41 -57
  2. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/RECORD +197 -152
  3. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +32 -31
  5. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
  6. helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
  7. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
  8. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
  9. helm/benchmark/adaptation/common_adapter_specs.py +2 -0
  10. helm/benchmark/annotation/air_bench_annotator.py +64 -0
  11. helm/benchmark/annotation/annotator_factory.py +6 -0
  12. helm/benchmark/annotation/anthropic_red_team_annotator.py +70 -0
  13. helm/benchmark/annotation/call_center_annotator.py +247 -0
  14. helm/benchmark/annotation/financebench_annotator.py +79 -0
  15. helm/benchmark/annotation/harm_bench_annotator.py +68 -0
  16. helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
  17. helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
  18. helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
  19. helm/benchmark/annotation/live_qa_annotator.py +71 -0
  20. helm/benchmark/annotation/medication_qa_annotator.py +68 -0
  21. helm/benchmark/annotation/model_as_judge.py +45 -0
  22. helm/benchmark/annotation/simple_safety_tests_annotator.py +64 -0
  23. helm/benchmark/annotation/xstest_annotator.py +110 -0
  24. helm/benchmark/augmentations/translate_perturbation.py +1 -0
  25. helm/benchmark/huggingface_registration.py +16 -6
  26. helm/benchmark/metrics/air_bench_metrics.py +56 -0
  27. helm/benchmark/metrics/annotation_metrics.py +108 -0
  28. helm/benchmark/metrics/bhasa_metrics.py +188 -0
  29. helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
  30. helm/benchmark/metrics/code_metrics_helper.py +11 -1
  31. helm/benchmark/metrics/fin_qa_metrics.py +60 -0
  32. helm/benchmark/metrics/fin_qa_metrics_helper.py +398 -0
  33. helm/benchmark/metrics/gpt4v_originality_critique_metrics.py +126 -0
  34. helm/benchmark/metrics/instruction_following_critique_metrics.py +1 -0
  35. helm/benchmark/metrics/live_qa_metrics.py +23 -0
  36. helm/benchmark/metrics/medication_qa_metrics.py +23 -0
  37. helm/benchmark/metrics/prometheus_vision_critique_metrics.py +185 -0
  38. helm/benchmark/metrics/reka_vibe_critique_metrics.py +158 -0
  39. helm/benchmark/metrics/safety_metrics.py +57 -0
  40. helm/benchmark/metrics/summac/model_summac.py +3 -3
  41. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
  42. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
  43. helm/benchmark/metrics/unitxt_metrics.py +20 -10
  44. helm/benchmark/metrics/vision_language/emd_utils.py +4 -0
  45. helm/benchmark/metrics/vision_language/image_metrics.py +30 -72
  46. helm/benchmark/metrics/vision_language/image_utils.py +1 -1
  47. helm/benchmark/model_metadata_registry.py +3 -3
  48. helm/benchmark/presentation/schema.py +54 -4
  49. helm/benchmark/presentation/test_run_entry.py +1 -0
  50. helm/benchmark/presentation/test_schema.py +11 -0
  51. helm/benchmark/run.py +31 -2
  52. helm/benchmark/run_expander.py +113 -10
  53. helm/benchmark/run_spec_factory.py +4 -0
  54. helm/benchmark/run_specs/air_bench_run_specs.py +40 -0
  55. helm/benchmark/run_specs/bhasa_run_specs.py +638 -0
  56. helm/benchmark/run_specs/call_center_run_specs.py +152 -0
  57. helm/benchmark/run_specs/classic_run_specs.py +15 -11
  58. helm/benchmark/run_specs/decodingtrust_run_specs.py +11 -9
  59. helm/benchmark/run_specs/experimental_run_specs.py +85 -0
  60. helm/benchmark/run_specs/finance_run_specs.py +110 -0
  61. helm/benchmark/run_specs/safety_run_specs.py +154 -0
  62. helm/benchmark/run_specs/vlm_run_specs.py +251 -57
  63. helm/benchmark/scenarios/air_bench_scenario.py +50 -0
  64. helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
  65. helm/benchmark/scenarios/banking77_scenario.py +51 -0
  66. helm/benchmark/scenarios/bhasa_scenario.py +1798 -0
  67. helm/benchmark/scenarios/call_center_scenario.py +84 -0
  68. helm/benchmark/scenarios/ci_mcqa_scenario.py +80 -0
  69. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
  70. helm/benchmark/scenarios/entity_data_imputation_scenario.py +8 -2
  71. helm/benchmark/scenarios/ewok_scenario.py +116 -0
  72. helm/benchmark/scenarios/fin_qa_scenario.py +119 -0
  73. helm/benchmark/scenarios/financebench_scenario.py +53 -0
  74. helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
  75. helm/benchmark/scenarios/scenario.py +1 -1
  76. helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
  77. helm/benchmark/scenarios/test_air_bench_scenario.py +27 -0
  78. helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
  79. helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
  80. helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
  81. helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
  82. helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
  83. helm/benchmark/scenarios/test_math_scenario.py +2 -8
  84. helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
  85. helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
  86. helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
  87. helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
  88. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
  89. helm/benchmark/scenarios/vision_language/bingo_scenario.py +5 -5
  90. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
  91. helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
  92. helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
  93. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
  94. helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
  95. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
  96. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
  97. helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +13 -2
  98. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -7
  99. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -5
  100. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
  101. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
  102. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
  103. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +44 -13
  104. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
  105. helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
  106. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
  107. helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
  108. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
  109. helm/benchmark/scenarios/vision_language/pairs_scenario.py +7 -6
  110. helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
  111. helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
  112. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
  113. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +5 -5
  114. helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +98 -0
  115. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
  116. helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
  117. helm/benchmark/scenarios/xstest_scenario.py +35 -0
  118. helm/benchmark/server.py +1 -6
  119. helm/benchmark/static/schema_air_bench.yaml +3149 -0
  120. helm/benchmark/static/schema_bhasa.yaml +709 -0
  121. helm/benchmark/static/schema_call_center.yaml +232 -0
  122. helm/benchmark/static/schema_classic.yaml +3 -59
  123. helm/benchmark/static/schema_cleva.yaml +768 -0
  124. helm/benchmark/static/schema_decodingtrust.yaml +444 -0
  125. helm/benchmark/static/schema_ewok.yaml +367 -0
  126. helm/benchmark/static/schema_finance.yaml +189 -0
  127. helm/benchmark/static/schema_image2struct.yaml +588 -0
  128. helm/benchmark/static/schema_instruction_following.yaml +3 -52
  129. helm/benchmark/static/schema_lite.yaml +3 -61
  130. helm/benchmark/static/schema_medical.yaml +255 -0
  131. helm/benchmark/static/schema_mmlu.yaml +3 -61
  132. helm/benchmark/static/schema_safety.yaml +247 -0
  133. helm/benchmark/static/schema_tables.yaml +317 -0
  134. helm/benchmark/static/schema_thai.yaml +244 -0
  135. helm/benchmark/static/schema_unitxt.yaml +3 -61
  136. helm/benchmark/static/{schema_vlm.yaml → schema_vhelm.yaml} +304 -298
  137. helm/benchmark/static/schema_vhelm_lite.yaml +4 -59
  138. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  139. helm/benchmark/static_build/assets/air-overview-d2e6c49f.png +0 -0
  140. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  141. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  142. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  143. helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
  144. helm/benchmark/static_build/assets/index-58f97dcd.js +10 -0
  145. helm/benchmark/static_build/assets/overview-74aea3d8.png +0 -0
  146. helm/benchmark/static_build/assets/process-flow-bd2eba96.png +0 -0
  147. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  148. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  149. helm/benchmark/static_build/index.html +2 -2
  150. helm/benchmark/window_services/test_openai_window_service.py +8 -8
  151. helm/clients/ai21_client.py +71 -1
  152. helm/clients/anthropic_client.py +50 -28
  153. helm/clients/auto_client.py +11 -0
  154. helm/clients/client.py +24 -7
  155. helm/clients/cohere_client.py +98 -3
  156. helm/clients/huggingface_client.py +79 -19
  157. helm/clients/nvidia_nim_client.py +35 -0
  158. helm/clients/openai_client.py +11 -5
  159. helm/clients/palmyra_client.py +25 -0
  160. helm/clients/perspective_api_client.py +11 -6
  161. helm/clients/reka_client.py +189 -0
  162. helm/clients/test_client.py +7 -9
  163. helm/clients/test_huggingface_client.py +19 -3
  164. helm/clients/test_together_client.py +72 -2
  165. helm/clients/together_client.py +129 -23
  166. helm/clients/vertexai_client.py +62 -18
  167. helm/clients/vision_language/huggingface_vlm_client.py +1 -0
  168. helm/clients/vision_language/open_flamingo_client.py +1 -2
  169. helm/clients/vision_language/paligemma_client.py +146 -0
  170. helm/clients/vision_language/palmyra_vision_client.py +99 -0
  171. helm/clients/yi_client.py +31 -0
  172. helm/common/critique_request.py +10 -1
  173. helm/common/images_utils.py +25 -0
  174. helm/common/mongo_key_value_store.py +2 -1
  175. helm/common/request.py +16 -0
  176. helm/config/model_deployments.yaml +740 -363
  177. helm/config/model_metadata.yaml +824 -128
  178. helm/config/tokenizer_configs.yaml +207 -10
  179. helm/proxy/critique/model_critique_client.py +32 -4
  180. helm/proxy/example_queries.py +14 -21
  181. helm/proxy/services/server_service.py +2 -3
  182. helm/proxy/token_counters/test_auto_token_counter.py +2 -2
  183. helm/tokenizers/ai21_tokenizer.py +51 -59
  184. helm/tokenizers/auto_tokenizer.py +1 -1
  185. helm/tokenizers/cohere_tokenizer.py +29 -62
  186. helm/tokenizers/huggingface_tokenizer.py +35 -13
  187. helm/tokenizers/test_ai21_tokenizer.py +48 -0
  188. helm/tokenizers/test_cohere_tokenizer.py +39 -0
  189. helm/tokenizers/test_huggingface_tokenizer.py +5 -1
  190. helm/benchmark/static/benchmarking.css +0 -156
  191. helm/benchmark/static/benchmarking.js +0 -1705
  192. helm/benchmark/static/config.js +0 -3
  193. helm/benchmark/static/general.js +0 -122
  194. helm/benchmark/static/images/crfm-logo.png +0 -0
  195. helm/benchmark/static/images/helm-logo-simple.png +0 -0
  196. helm/benchmark/static/images/helm-logo.png +0 -0
  197. helm/benchmark/static/images/language-model-helm.png +0 -0
  198. helm/benchmark/static/images/organizations/ai21.png +0 -0
  199. helm/benchmark/static/images/organizations/anthropic.png +0 -0
  200. helm/benchmark/static/images/organizations/bigscience.png +0 -0
  201. helm/benchmark/static/images/organizations/cohere.png +0 -0
  202. helm/benchmark/static/images/organizations/eleutherai.png +0 -0
  203. helm/benchmark/static/images/organizations/google.png +0 -0
  204. helm/benchmark/static/images/organizations/meta.png +0 -0
  205. helm/benchmark/static/images/organizations/microsoft.png +0 -0
  206. helm/benchmark/static/images/organizations/nvidia.png +0 -0
  207. helm/benchmark/static/images/organizations/openai.png +0 -0
  208. helm/benchmark/static/images/organizations/together.png +0 -0
  209. helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
  210. helm/benchmark/static/images/organizations/yandex.png +0 -0
  211. helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
  212. helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
  213. helm/benchmark/static/index.html +0 -68
  214. helm/benchmark/static/info-icon.png +0 -0
  215. helm/benchmark/static/json-urls.js +0 -69
  216. helm/benchmark/static/plot-captions.js +0 -27
  217. helm/benchmark/static/schema_image2structure.yaml +0 -304
  218. helm/benchmark/static/utils.js +0 -285
  219. helm/benchmark/static_build/assets/index-737eef9e.js +0 -10
  220. helm/benchmark/static_build/assets/index-878a1094.css +0 -1
  221. helm/benchmark/window_services/ai21_window_service.py +0 -247
  222. helm/benchmark/window_services/cohere_window_service.py +0 -101
  223. helm/benchmark/window_services/test_ai21_window_service.py +0 -163
  224. helm/benchmark/window_services/test_cohere_window_service.py +0 -75
  225. helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
  226. helm/benchmark/window_services/test_ice_window_service.py +0 -327
  227. helm/tokenizers/ice_tokenizer.py +0 -30
  228. helm/tokenizers/test_ice_tokenizer.py +0 -57
  229. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/LICENSE +0 -0
  230. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/entry_points.txt +0 -0
  231. {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/top_level.txt +0 -0
  232. /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
  233. /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
  234. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/__init__.py +0 -0
  235. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/__init__.py +0 -0
  236. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
@@ -31,50 +31,41 @@ models:
31
31
 
32
32
 
33
33
  # AI21 Labs
34
- - name: ai21/j1-jumbo # DEPRECATED
34
+ - name: ai21/j1-jumbo
35
35
  display_name: J1-Jumbo v1 (178B)
36
36
  description: Jurassic-1 Jumbo (178B parameters) ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
37
37
  creator_organization_name: AI21 Labs
38
38
  access: limited
39
39
  num_parameters: 178000000000
40
40
  release_date: 2021-08-11
41
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
41
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
42
42
 
43
- - name: ai21/j1-large # DEPRECATED
43
+ - name: ai21/j1-large
44
44
  display_name: J1-Large v1 (7.5B)
45
45
  description: Jurassic-1 Large (7.5B parameters) ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
46
46
  creator_organization_name: AI21 Labs
47
47
  access: limited
48
48
  num_parameters: 7500000000
49
49
  release_date: 2021-08-11
50
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
50
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
51
51
 
52
- - name: ai21/j1-grande # DEPRECATED
52
+ - name: ai21/j1-grande
53
53
  display_name: J1-Grande v1 (17B)
54
54
  description: Jurassic-1 Grande (17B parameters) with a "few tweaks" to the training process ([docs](https://studio.ai21.com/docs/jurassic1-language-models/), [tech report](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)).
55
55
  creator_organization_name: AI21 Labs
56
56
  access: limited
57
57
  num_parameters: 17000000000
58
58
  release_date: 2022-05-03
59
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
59
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
60
60
 
61
- - name: ai21/j1-grande-v2-beta # DEPRECATED
61
+ - name: ai21/j1-grande-v2-beta
62
62
  display_name: J1-Grande v2 beta (17B)
63
63
  description: Jurassic-1 Grande v2 beta (17B parameters)
64
64
  creator_organization_name: AI21 Labs
65
65
  access: limited
66
66
  num_parameters: 17000000000
67
67
  release_date: 2022-10-28
68
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
69
-
70
- - name: ai21/j2-jumbo
71
- display_name: Jurassic-2 Jumbo (178B)
72
- description: Jurassic-2 Jumbo (178B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
73
- creator_organization_name: AI21 Labs
74
- access: limited
75
- num_parameters: 178000000000
76
- release_date: 2023-03-09
77
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
68
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
78
69
 
79
70
  - name: ai21/j2-large
80
71
  display_name: Jurassic-2 Large (7.5B)
@@ -83,7 +74,7 @@ models:
83
74
  access: limited
84
75
  num_parameters: 7500000000
85
76
  release_date: 2023-03-09
86
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
77
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
87
78
 
88
79
  - name: ai21/j2-grande
89
80
  display_name: Jurassic-2 Grande (17B)
@@ -92,13 +83,67 @@ models:
92
83
  access: limited
93
84
  num_parameters: 17000000000
94
85
  release_date: 2023-03-09
95
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
86
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
87
+
88
+ - name: ai21/j2-jumbo
89
+ display_name: Jurassic-2 Jumbo (178B)
90
+ description: Jurassic-2 Jumbo (178B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
91
+ creator_organization_name: AI21 Labs
92
+ access: limited
93
+ num_parameters: 178000000000
94
+ release_date: 2023-03-09
95
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
96
96
 
97
97
  # TODO(1524): Change AI21 model names
98
98
  # - j2-jumbo -> j2-ultra
99
99
  # - j2-grande -> j2-mid
100
100
  # - j2-large -> j2-light
101
101
 
102
+ - name: ai21/jamba-instruct
103
+ display_name: Jamba Instruct
104
+ description: Jamba Instruct is an instruction tuned version of Jamba, which uses a hybrid Transformer-Mamba mixture-of-experts (MoE) architecture that interleaves blocks of Transformer and Mamba layers. ([blog](https://www.ai21.com/blog/announcing-jamba-instruct))
105
+ creator_organization_name: AI21 Labs
106
+ access: limited
107
+ num_parameters: 52000000000
108
+ release_date: 2024-05-02
109
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
110
+
111
+ - name: ai21/jamba-1.5-mini
112
+ display_name: Jamba 1.5 Mini
113
+ description: Jamba 1.5 Mini is a long-context, hybrid SSM-Transformer instruction following foundation model that is optimized for function calling, structured output, and grounded generation. ([blog](https://www.ai21.com/blog/announcing-jamba-model-family))
114
+ creator_organization_name: AI21 Labs
115
+ access: open
116
+ num_parameters: 51600000000
117
+ release_date: 2024-08-22
118
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
119
+
120
+ - name: ai21/jamba-1.5-large
121
+ display_name: Jamba 1.5 Large
122
+ description: Jamba 1.5 Large is a long-context, hybrid SSM-Transformer instruction following foundation model that is optimized for function calling, structured output, and grounded generation. ([blog](https://www.ai21.com/blog/announcing-jamba-model-family))
123
+ creator_organization_name: AI21 Labs
124
+ access: open
125
+ num_parameters: 399000000000
126
+ release_date: 2024-08-22
127
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
128
+
129
+ # AI Singapore
130
+ - name: aisingapore/sea-lion-7b
131
+ display_name: SEA-LION (7B)
132
+ description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
133
+ creator_organization_name: AI Singapore
134
+ access: open
135
+ num_parameters: 7000000000
136
+ release_date: 2023-02-24
137
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
138
+
139
+ - name: aisingapore/sea-lion-7b-instruct
140
+ display_name: SEA-LION Instruct (7B)
141
+ description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
142
+ creator_organization_name: AI Singapore
143
+ access: open
144
+ num_parameters: 7000000000
145
+ release_date: 2023-02-24
146
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
102
147
 
103
148
 
104
149
  # Aleph Alpha
@@ -253,7 +298,14 @@ models:
253
298
  release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
254
299
  tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
255
300
 
256
- # DEPRECATED: Please do not use.
301
+ - name: anthropic/claude-3-5-sonnet-20240620
302
+ display_name: Claude 3.5 Sonnet (20240620)
303
+ description: Claude 3.5 Sonnet is a Claude 3 family model which outperforms Claude 3 Opus while operating faster and at a lower cost. ([blog](https://www.anthropic.com/news/claude-3-5-sonnet))
304
+ creator_organization_name: Anthropic
305
+ access: limited
306
+ release_date: 2024-06-20
307
+ tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
308
+
257
309
  - name: anthropic/stanford-online-all-v4-s3
258
310
  display_name: Anthropic-LM v4-s3 (52B)
259
311
  description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
@@ -261,7 +313,7 @@ models:
261
313
  access: closed
262
314
  num_parameters: 52000000000
263
315
  release_date: 2021-12-01
264
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
316
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
265
317
 
266
318
 
267
319
 
@@ -327,6 +379,18 @@ models:
327
379
  release_date: 2023-05-09 # ArXiv submission date
328
380
  tags: [CODE_MODEL_TAG]
329
381
 
382
+ # BioMistral
383
+
384
+ - name: biomistral/biomistral-7b
385
+ display_name: BioMistral (7B)
386
+ description: BioMistral 7B is an open-source LLM tailored for the biomedical domain, utilizing Mistral as its foundation model and further pre-trained on PubMed Central.
387
+ creator_organization_name: BioMistral
388
+ access: open
389
+ num_parameters: 7300000000
390
+ release_date: 2024-02-15
391
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
392
+
393
+
330
394
 
331
395
 
332
396
  # Cerebras Systems
@@ -370,16 +434,16 @@ models:
370
434
  access: limited
371
435
  num_parameters: 52400000000
372
436
  release_date: 2022-06-09
373
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
437
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
374
438
 
375
- - name: cohere/large-20220720 # DEPRECATED
439
+ - name: cohere/large-20220720
376
440
  display_name: Cohere large v20220720 (13.1B)
377
441
  description: Cohere large v20220720 (13.1B parameters), which is deprecated by Cohere as of December 2, 2022.
378
442
  creator_organization_name: Cohere
379
443
  access: limited
380
444
  num_parameters: 13100000000
381
445
  release_date: 2022-07-20
382
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
446
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
383
447
 
384
448
  - name: cohere/medium-20220720
385
449
  display_name: Cohere medium v20220720 (6.1B)
@@ -388,16 +452,16 @@ models:
388
452
  access: limited
389
453
  num_parameters: 6100000000
390
454
  release_date: 2022-07-20
391
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
455
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
392
456
 
393
- - name: cohere/small-20220720 # DEPRECATED
457
+ - name: cohere/small-20220720
394
458
  display_name: Cohere small v20220720 (410M)
395
459
  description: Cohere small v20220720 (410M parameters), which is deprecated by Cohere as of December 2, 2022.
396
460
  creator_organization_name: Cohere
397
461
  access: limited
398
462
  num_parameters: 410000000
399
463
  release_date: 2022-07-20
400
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
464
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
401
465
 
402
466
  - name: cohere/xlarge-20221108
403
467
  display_name: Cohere xlarge v20221108 (52.4B)
@@ -406,37 +470,37 @@ models:
406
470
  access: limited
407
471
  num_parameters: 52400000000
408
472
  release_date: 2022-11-08
409
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
473
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
410
474
 
411
- - name: cohere/medium-20221108 # DEPRECATED
475
+ - name: cohere/medium-20221108
412
476
  display_name: Cohere medium v20221108 (6.1B)
413
477
  description: Cohere medium v20221108 (6.1B parameters)
414
478
  creator_organization_name: Cohere
415
479
  access: limited
416
480
  num_parameters: 6100000000
417
481
  release_date: 2022-11-08
418
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
482
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
419
483
 
420
- - name: cohere/command-medium-beta # DEPRECATED
421
- display_name: Cohere Command beta (6.1B)
422
- description: Cohere Command beta (6.1B parameters) is fine-tuned from the medium model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
484
+ - name: cohere/command-medium-beta
485
+ display_name: Command beta (6.1B)
486
+ description: Command beta (6.1B parameters) is fine-tuned from the medium model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
423
487
  creator_organization_name: Cohere
424
488
  access: limited
425
489
  num_parameters: 6100000000
426
490
  release_date: 2022-11-08
427
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
491
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
428
492
 
429
- - name: cohere/command-xlarge-beta # DEPRECATED
430
- display_name: Cohere Command beta (52.4B)
431
- description: Cohere Command beta (52.4B parameters) is fine-tuned from the XL model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
493
+ - name: cohere/command-xlarge-beta
494
+ display_name: Command beta (52.4B)
495
+ description: Command beta (52.4B parameters) is fine-tuned from the XL model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
432
496
  creator_organization_name: Cohere
433
497
  access: limited
434
498
  num_parameters: 52400000000
435
499
  release_date: 2022-11-08
436
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
500
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
437
501
 
438
502
  - name: cohere/command
439
- display_name: Cohere Command
503
+ display_name: Command
440
504
  description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog)
441
505
  creator_organization_name: Cohere
442
506
  access: limited
@@ -444,12 +508,30 @@ models:
444
508
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
445
509
 
446
510
  - name: cohere/command-light
447
- display_name: Cohere Command Light
511
+ display_name: Command Light
448
512
  description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog)
449
513
  creator_organization_name: Cohere
450
514
  access: limited
451
515
  release_date: 2023-09-29
452
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
516
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
517
+
518
+ - name: cohere/command-r
519
+ display_name: Command R
520
+ description: Command R is a multilingual 35B parameter model with a context length of 128K that has been trained with conversational tool use capabilities.
521
+ creator_organization_name: Cohere
522
+ access: open
523
+ num_parameters: 35000000000
524
+ release_date: 2024-03-11
525
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
526
+
527
+ - name: cohere/command-r-plus
528
+ display_name: Command R Plus
529
+ description: Command R+ is a multilingual 104B parameter model with a context length of 128K that has been trained with conversational tool use capabilities.
530
+ creator_organization_name: Cohere
531
+ access: open
532
+ num_parameters: 104000000000
533
+ release_date: 2024-04-04
534
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
453
535
 
454
536
  # Craiyon
455
537
  - name: craiyon/dalle-mini
@@ -624,7 +706,16 @@ models:
624
706
  release_date: 2023-02-13
625
707
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
626
708
 
709
+ # EPFL LLM
627
710
 
711
+ - name: epfl-llm/meditron-7b
712
+ display_name: Meditron (7B)
713
+ description: Meditron-7B is a 7 billion parameter model adapted to the medical domain from Llama-2-7B through continued pretraining on a comprehensively curated medical corpus.
714
+ creator_organization_name: EPFL LLM
715
+ access: open
716
+ num_parameters: 7000000000
717
+ release_date: 2023-11-27
718
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
628
719
 
629
720
  # Google
630
721
  - name: google/t5-11b
@@ -673,13 +764,21 @@ models:
673
764
  tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
674
765
 
675
766
  - name: google/gemini-1.0-pro-001
676
- display_name: Gemini 1.0 Pro
767
+ display_name: Gemini 1.0 Pro (001)
677
768
  description: Gemini 1.0 Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
678
769
  creator_organization_name: Google
679
770
  access: limited
680
771
  release_date: 2023-12-13
681
772
  tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
682
773
 
774
+ - name: google/gemini-1.0-pro-002
775
+ display_name: Gemini 1.0 Pro (002)
776
+ description: Gemini 1.0 Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
777
+ creator_organization_name: Google
778
+ access: limited
779
+ release_date: 2024-04-09
780
+ tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
781
+
683
782
  # Note: This is aliased to a snapshot of gemini-pro-vision. When possible, please use a versioned snapshot instead.
684
783
  - name: google/gemini-pro-vision
685
784
  display_name: Gemini Pro Vision
@@ -697,18 +796,81 @@ models:
697
796
  release_date: 2023-12-13
698
797
  tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, GOOGLE_GEMINI_PRO_VISION_V1_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
699
798
 
799
+ - name: google/gemini-1.5-pro-001
800
+ display_name: Gemini 1.5 Pro (001)
801
+ description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
802
+ creator_organization_name: Google
803
+ access: limited
804
+ release_date: 2024-05-24
805
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
806
+
807
+ - name: google/gemini-1.5-flash-001
808
+ display_name: Gemini 1.5 Flash (001)
809
+ description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
810
+ creator_organization_name: Google
811
+ access: limited
812
+ release_date: 2024-05-24
813
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
814
+
700
815
  - name: google/gemini-1.5-pro-preview-0409
701
816
  display_name: Gemini 1.5 Pro (0409 preview)
702
- description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. ([paper](https://arxiv.org/abs/2403.05530))
817
+ description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
703
818
  creator_organization_name: Google
704
819
  access: limited
705
820
  release_date: 2024-04-10
706
821
  tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
707
822
 
823
+ - name: google/gemini-1.5-pro-preview-0514
824
+ display_name: Gemini 1.5 Pro (0514 preview)
825
+ description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
826
+ creator_organization_name: Google
827
+ access: limited
828
+ release_date: 2024-05-14
829
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
830
+
831
+ - name: google/gemini-1.5-flash-preview-0514
832
+ display_name: Gemini 1.5 Flash (0514 preview)
833
+ description: Gemini 1.5 Flash is a smaller Gemini model. It has a 1 million token context window and allows interleaving text, images, audio and video as inputs. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([blog](https://blog.google/technology/developers/gemini-gemma-developer-updates-may-2024/))
834
+ creator_organization_name: Google
835
+ access: limited
836
+ release_date: 2024-05-14
837
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
838
+
839
+ - name: google/gemini-1.5-pro-001-safety-default
840
+ display_name: Gemini 1.5 Pro (001, default safety)
841
+ description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and uses default safety settings. ([paper](https://arxiv.org/abs/2403.05530))
842
+ creator_organization_name: Google
843
+ access: limited
844
+ release_date: 2024-05-24
845
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
846
+
847
+ - name: google/gemini-1.5-pro-001-safety-block-none
848
+ display_name: Gemini 1.5 Pro (001, BLOCK_NONE safety)
849
+ description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
850
+ creator_organization_name: Google
851
+ access: limited
852
+ release_date: 2024-05-24
853
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
854
+
855
+ - name: google/gemini-1.5-flash-001-safety-default
856
+ display_name: Gemini 1.5 Flash (001, default safety)
857
+ description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and uses default safety settings. ([paper](https://arxiv.org/abs/2403.05530))
858
+ creator_organization_name: Google
859
+ access: limited
860
+ release_date: 2024-05-24
861
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
862
+
863
+ - name: google/gemini-1.5-flash-001-safety-block-none
864
+ display_name: Gemini 1.5 Flash (001, BLOCK_NONE safety)
865
+ description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
866
+ creator_organization_name: Google
867
+ access: limited
868
+ release_date: 2024-05-24
869
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
870
+
708
871
  - name: google/gemma-2b
709
872
  display_name: Gemma (2B)
710
- # TODO: Fill in Gemma description.
711
- description: TBD
873
+ description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
712
874
  creator_organization_name: Google
713
875
  access: open
714
876
  release_date: 2024-02-21
@@ -716,8 +878,7 @@ models:
716
878
 
717
879
  - name: google/gemma-2b-it
718
880
  display_name: Gemma Instruct (2B)
719
- # TODO: Fill in Gemma description.
720
- description: TBD
881
+ description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
721
882
  creator_organization_name: Google
722
883
  access: open
723
884
  release_date: 2024-02-21
@@ -725,8 +886,7 @@ models:
725
886
 
726
887
  - name: google/gemma-7b
727
888
  display_name: Gemma (7B)
728
- # TODO: Fill in Gemma description.
729
- description: TBD
889
+ description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
730
890
  creator_organization_name: Google
731
891
  access: open
732
892
  release_date: 2024-02-21
@@ -734,14 +894,60 @@ models:
734
894
 
735
895
  - name: google/gemma-7b-it
736
896
  display_name: Gemma Instruct (7B)
737
- # TODO: Fill in Gemma description.
738
- description: TBD
897
+ description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
739
898
  creator_organization_name: Google
740
899
  access: open
741
900
  release_date: 2024-02-21
742
- # TODO: Add OUTPUT_FORMAT_INSTRUCTIONS_TAG tag
743
901
  tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
744
902
 
903
+ - name: google/gemma-2-9b
904
+ display_name: Gemma 2 (9B)
905
+ description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
906
+ creator_organization_name: Google
907
+ access: open
908
+ release_date: 2024-06-27
909
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
910
+
911
+ - name: google/gemma-2-9b-it
912
+ display_name: Gemma 2 Instruct (9B)
913
+ description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
914
+ creator_organization_name: Google
915
+ access: open
916
+ release_date: 2024-06-27
917
+ tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
918
+
919
+ - name: google/gemma-2-27b
920
+ display_name: Gemma 2 (27B)
921
+ description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
922
+ creator_organization_name: Google
923
+ access: open
924
+ release_date: 2024-06-27
925
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
926
+
927
+ - name: google/gemma-2-27b-it
928
+ display_name: Gemma 2 Instruct (27B)
929
+ description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/google-gemma-2/))
930
+ creator_organization_name: Google
931
+ access: open
932
+ release_date: 2024-06-27
933
+ tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
934
+
935
+ - name: google/paligemma-3b-mix-224
936
+ display_name: PaliGemma (3B) Mix 224
937
+ description: PaliGemma is a versatile and lightweight vision-language model (VLM) inspired by PaLI-3 and based on open components such as the SigLIP vision model and the Gemma language model. Pre-trained with 224x224 input images and 128 token input/output text sequences. Finetuned on a mixture of downstream academic datasets. ([blog](https://developers.googleblog.com/en/gemma-family-and-toolkit-expansion-io-2024/))
938
+ creator_organization_name: Google
939
+ access: open
940
+ release_date: 2024-05-12
941
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
942
+
943
+ - name: google/paligemma-3b-mix-448
944
+ display_name: PaliGemma (3B) Mix 448
945
+ description: PaliGemma is a versatile and lightweight vision-language model (VLM) inspired by PaLI-3 and based on open components such as the SigLIP vision model and the Gemma language model. Pre-trained with 448x448 input images and 512 token input/output text sequences. Finetuned on a mixture of downstream academic datasets. ([blog](https://developers.googleblog.com/en/gemma-family-and-toolkit-expansion-io-2024/))
946
+ creator_organization_name: Google
947
+ access: open
948
+ release_date: 2024-05-12
949
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
950
+
745
951
  - name: google/text-bison@001
746
952
  display_name: PaLM-2 (Bison)
747
953
  description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
@@ -798,7 +1004,21 @@ models:
798
1004
  release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
799
1005
  tags: [CODE_MODEL_TAG]
800
1006
 
1007
+ - name: google/medlm-medium
1008
+ display_name: MedLM (Medium)
1009
+ description: MedLM is a family of foundation models fine-tuned for the healthcare industry based on Google Research's medically-tuned large language model, Med-PaLM 2. ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/medlm/overview))
1010
+ creator_organization_name: Google
1011
+ access: limited
1012
+ release_date: 2023-12-13
1013
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
801
1014
 
1015
+ - name: google/medlm-large
1016
+ display_name: MedLM (Large)
1017
+ description: MedLM is a family of foundation models fine-tuned for the healthcare industry based on Google Research's medically-tuned large language model, Med-PaLM 2. ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/medlm/overview))
1018
+ creator_organization_name: Google
1019
+ access: limited
1020
+ release_date: 2023-12-13
1021
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
802
1022
 
803
1023
  # HuggingFace
804
1024
  - name: HuggingFaceM4/idefics2-8b
@@ -1059,8 +1279,6 @@ models:
1059
1279
  release_date: 2023-06-22
1060
1280
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1061
1281
 
1062
-
1063
-
1064
1282
  # Meta
1065
1283
  - name: meta/opt-iml-175b # NOT SUPPORTED
1066
1284
  display_name: OPT-IML (175B)
@@ -1203,7 +1421,7 @@ models:
1203
1421
 
1204
1422
  - name: meta/llama-3-8b
1205
1423
  display_name: Llama 3 (8B)
1206
- description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability.
1424
+ description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
1207
1425
  creator_organization_name: Meta
1208
1426
  access: open
1209
1427
  num_parameters: 8000000000
@@ -1212,16 +1430,43 @@ models:
1212
1430
 
1213
1431
  - name: meta/llama-3-70b
1214
1432
  display_name: Llama 3 (70B)
1215
- description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability.
1433
+ description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
1216
1434
  creator_organization_name: Meta
1217
1435
  access: open
1218
1436
  num_parameters: 70000000000
1219
1437
  release_date: 2024-04-18
1220
1438
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1221
1439
 
1440
+ - name: meta/llama-3.1-8b-instruct-turbo
1441
+ display_name: Llama 3.1 Instruct Turbo (8B)
1442
+ description: Llama 3.1 (8B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
1443
+ creator_organization_name: Meta
1444
+ access: open
1445
+ num_parameters: 8000000000
1446
+ release_date: 2024-07-23
1447
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1448
+
1449
+ - name: meta/llama-3.1-70b-instruct-turbo
1450
+ display_name: Llama 3.1 Instruct Turbo (70B)
1451
+ description: Llama 3.1 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
1452
+ creator_organization_name: Meta
1453
+ access: open
1454
+ num_parameters: 70000000000
1455
+ release_date: 2024-07-23
1456
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1457
+
1458
+ - name: meta/llama-3.1-405b-instruct-turbo
1459
+ display_name: Llama 3.1 Instruct Turbo (405B)
1460
+ description: Llama 3.1 (405B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
1461
+ creator_organization_name: Meta
1462
+ access: open
1463
+ num_parameters: 405000000000
1464
+ release_date: 2024-07-23
1465
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1466
+
1222
1467
  - name: meta/llama-3-8b-chat
1223
- display_name: Llama 3 Chat (8B)
1224
- description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training.
1468
+ display_name: Llama 3 Instruct (8B)
1469
+ description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
1225
1470
  creator_organization_name: Meta
1226
1471
  access: open
1227
1472
  num_parameters: 8000000000
@@ -1229,14 +1474,44 @@ models:
1229
1474
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1230
1475
 
1231
1476
  - name: meta/llama-3-70b-chat
1232
- display_name: Llama 3 Chat (70B)
1233
- description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training.
1477
+ display_name: Llama 3 Instruct (70B)
1478
+ description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)
1234
1479
  creator_organization_name: Meta
1235
1480
  access: open
1236
1481
  num_parameters: 70000000000
1237
1482
  release_date: 2024-04-18
1238
1483
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1239
1484
 
1485
+ - name: meta/llama-guard-7b
1486
+ display_name: Llama Guard (7B)
1487
+ description: Llama-Guard is a 7B parameter Llama 2-based input-output safeguard model. It can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM it generates text in its output that indicates whether a given prompt or response is safe/unsafe, and if unsafe based on a policy, it also lists the violating subcategories.
1488
+ creator_organization_name: Meta
1489
+ access: open
1490
+ num_parameters: 7000000000
1491
+ release_date: 2023-12-07
1492
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1493
+
1494
+ - name: meta/llama-guard-2-8b
1495
+ display_name: Llama Guard 2 (8B)
1496
+ description: Llama Guard 2 is an 8B parameter Llama 3-based LLM safeguard model. Similar to Llama Guard, it can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.
1497
+ creator_organization_name: Meta
1498
+ access: open
1499
+ num_parameters: 8000000000
1500
+ release_date: 2024-04-18
1501
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1502
+
1503
+ - name: meta/llama-guard-3-8b
1504
+ display_name: Llama Guard 3 (8B)
1505
+ description: Llama Guard 3 is an 8B parameter Llama 3.1-based LLM safeguard model. Similar to Llama Guard, it can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.
1506
+ creator_organization_name: Meta
1507
+ access: open
1508
+ num_parameters: 8000000000
1509
+ release_date: 2024-07-23
1510
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1511
+
1512
+
1513
+
1514
+
1240
1515
 
1241
1516
  # Microsoft/NVIDIA
1242
1517
  - name: microsoft/TNLGv2_530B
@@ -1246,7 +1521,7 @@ models:
1246
1521
  access: closed
1247
1522
  num_parameters: 530000000000
1248
1523
  release_date: 2022-01-28
1249
- tags: [] # deprecated text model
1524
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1250
1525
 
1251
1526
  - name: microsoft/TNLGv2_7B
1252
1527
  display_name: TNLG v2 (6.7B)
@@ -1255,7 +1530,7 @@ models:
1255
1530
  access: closed
1256
1531
  num_parameters: 6700000000
1257
1532
  release_date: 2022-01-28
1258
- tags: [] # deprecated text model
1533
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1259
1534
 
1260
1535
  - name: microsoft/llava-1.5-7b-hf
1261
1536
  display_name: LLaVA 1.5 (7B)
@@ -1329,7 +1604,33 @@ models:
1329
1604
  release_date: 2023-10-05
1330
1605
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1331
1606
 
1607
+ - name: microsoft/phi-3-small-8k-instruct
1608
+ display_name: Phi-3 (7B)
1609
+ description: Phi-3-Small-8K-Instruct is a lightweight model trained with synthetic data and filtered publicly available website data with a focus on high-quality and reasoning dense properties. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://azure.microsoft.com/en-us/blog/new-models-added-to-the-phi-3-family-available-on-microsoft-azure/))
1610
+ creator_organization_name: Microsoft
1611
+ access: open
1612
+ num_parameters: 7000000000
1613
+ release_date: 2024-05-21
1614
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1615
+
1616
+ - name: microsoft/phi-3-medium-4k-instruct
1617
+ display_name: Phi-3 (14B)
1618
+ description: Phi-3-Medium-4K-Instruct is a lightweight model trained with synthetic data and filtered publicly available website data with a focus on high-quality and reasoning dense properties. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://azure.microsoft.com/en-us/blog/new-models-added-to-the-phi-3-family-available-on-microsoft-azure/))
1619
+ creator_organization_name: Microsoft
1620
+ access: open
1621
+ num_parameters: 14000000000
1622
+ release_date: 2024-05-21
1623
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1332
1624
 
1625
+ # KAIST AI
1626
+ - name: kaistai/prometheus-vision-13b-v1.0-hf
1627
+ display_name: LLaVA + Vicuna-v1.5 (13B)
1628
+ description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
1629
+ creator_organization_name: KAIST AI
1630
+ access: open
1631
+ num_parameters: 13000000000
1632
+ release_date: 2024-01-01
1633
+ tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
1333
1634
 
1334
1635
  # 01.AI
1335
1636
  - name: 01-ai/yi-6b
@@ -1340,6 +1641,7 @@ models:
1340
1641
  num_parameters: 6000000000
1341
1642
  release_date: 2023-11-02
1342
1643
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1644
+
1343
1645
  - name: 01-ai/yi-34b
1344
1646
  display_name: Yi (34B)
1345
1647
  description: The Yi models are large language models trained from scratch by developers at 01.AI.
@@ -1348,6 +1650,7 @@ models:
1348
1650
  num_parameters: 34000000000
1349
1651
  release_date: 2023-11-02
1350
1652
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1653
+
1351
1654
  - name: 01-ai/yi-6b-chat
1352
1655
  display_name: Yi Chat (6B)
1353
1656
  description: The Yi models are large language models trained from scratch by developers at 01.AI.
@@ -1356,6 +1659,7 @@ models:
1356
1659
  num_parameters: 6000000000
1357
1660
  release_date: 2023-11-23
1358
1661
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1662
+
1359
1663
  - name: 01-ai/yi-34b-chat
1360
1664
  display_name: Yi Chat (34B)
1361
1665
  description: The Yi models are large language models trained from scratch by developers at 01.AI.
@@ -1365,6 +1669,22 @@ models:
1365
1669
  release_date: 2023-11-23
1366
1670
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1367
1671
 
1672
+ - name: 01-ai/yi-large
1673
+ display_name: Yi Large
1674
+ description: The Yi models are large language models trained from scratch by developers at 01.AI. ([tweet](https://x.com/01AI_Yi/status/1789894091620458667))
1675
+ creator_organization_name: 01.AI
1676
+ access: limited
1677
+ release_date: 2024-05-12
1678
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
1679
+
1680
+ - name: 01-ai/yi-large-preview
1681
+ display_name: Yi Large (Preview)
1682
+ description: The Yi models are large language models trained from scratch by developers at 01.AI. ([tweet](https://x.com/01AI_Yi/status/1789894091620458667))
1683
+ creator_organization_name: 01.AI
1684
+ access: limited
1685
+ release_date: 2024-05-12
1686
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
1687
+
1368
1688
  # Allen Institute for AI
1369
1689
  # OLMo Blog: https://blog.allenai.org/olmo-open-language-model-87ccfc95f580
1370
1690
  - name: allenai/olmo-7b
@@ -1395,35 +1715,70 @@ models:
1395
1715
  # TODO: Add instruct tag.
1396
1716
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1397
1717
 
1718
+ - name: allenai/olmo-1.7-7b
1719
+ display_name: OLMo 1.7 (7B)
1720
+ description: OLMo is a series of Open Language Models trained on the Dolma dataset. The instruct versions was trained on the Tulu SFT mixture and a cleaned version of the UltraFeedback dataset.
1721
+ creator_organization_name: Allen Institute for AI
1722
+ access: open
1723
+ num_parameters: 7000000000
1724
+ release_date: 2024-04-17
1725
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1398
1726
 
1399
1727
  # Mistral AI
1400
1728
  - name: mistralai/mistral-7b-v0.1
1401
1729
  display_name: Mistral v0.1 (7B)
1402
- description: Mistral 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA).
1730
+ description: Mistral 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/announcing-mistral-7b/))
1403
1731
  creator_organization_name: Mistral AI
1404
1732
  access: open
1405
1733
  num_parameters: 7300000000
1406
1734
  release_date: 2023-09-27
1735
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
1736
+
1737
+ - name: mistralai/mistral-7b-instruct-v0.1
1738
+ display_name: Mistral Instruct v0.1 (7B)
1739
+ description: Mistral v0.1 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA). The instruct version was fined-tuned using publicly available conversation datasets. ([blog post](https://mistral.ai/news/announcing-mistral-7b/))
1740
+ creator_organization_name: Mistral AI
1741
+ access: open
1742
+ num_parameters: 7300000000
1743
+ release_date: 2023-09-27
1744
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1745
+
1746
+ - name: mistralai/mistral-7b-instruct-v0.2
1747
+ display_name: Mistral Instruct v0.2 (7B)
1748
+ description: Mistral v0.2 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
1749
+ creator_organization_name: Mistral AI
1750
+ access: open
1751
+ num_parameters: 7300000000
1752
+ release_date: 2024-03-23
1753
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1754
+
1755
+ - name: mistralai/mistral-7b-instruct-v0.3
1756
+ display_name: Mistral Instruct v0.3 (7B)
1757
+ description: Mistral v0.3 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
1758
+ creator_organization_name: Mistral AI
1759
+ access: open
1760
+ num_parameters: 7300000000
1761
+ release_date: 2024-05-22
1407
1762
  tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1408
1763
 
1409
1764
  - name: mistralai/mixtral-8x7b-32kseqlen
1410
1765
  display_name: Mixtral (8x7B 32K seqlen)
1411
- description: Mistral AI's mixture-of-experts model ([tweet](https://twitter.com/MistralAI/status/1733150512395038967)).
1766
+ description: Mixtral is a mixture-of-experts model that has 46.7B total parameters but only uses 12.9B parameters per token. ([blog post](https://mistral.ai/news/mixtral-of-experts/), [tweet](https://twitter.com/MistralAI/status/1733150512395038967)).
1412
1767
  creator_organization_name: Mistral AI
1413
1768
  access: open
1414
1769
  num_parameters: 46700000000
1415
1770
  release_date: 2023-12-08
1416
- tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1771
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
1417
1772
 
1418
1773
  - name: mistralai/mixtral-8x7b-instruct-v0.1
1419
- display_name: Mixtral (8x7B Instruct)
1420
- description: Mixtral (8x7B Instruct) is a version of Mixtral (8x7B) that was optimized through supervised fine-tuning and direct preference optimisation (DPO) for careful instruction following.
1774
+ display_name: Mixtral Instruct (8x7B)
1775
+ description: Mixtral Instruct (8x7B) is a version of Mixtral (8x7B) that was optimized through supervised fine-tuning and direct preference optimisation (DPO) for careful instruction following. ([blog post](https://mistral.ai/news/mixtral-of-experts/)).
1421
1776
  creator_organization_name: Mistral AI
1422
1777
  access: open
1423
1778
  num_parameters: 46700000000
1424
1779
  # Blog post: https://mistral.ai/news/mixtral-of-experts/
1425
1780
  release_date: 2023-12-11
1426
- tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
1781
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1427
1782
 
1428
1783
  - name: mistralai/mixtral-8x22b
1429
1784
  display_name: Mixtral (8x22B)
@@ -1432,7 +1787,7 @@ models:
1432
1787
  access: open
1433
1788
  num_parameters: 176000000000
1434
1789
  release_date: 2024-04-10
1435
- tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1790
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
1436
1791
 
1437
1792
  - name: mistralai/mixtral-8x22b-instruct-v0.1
1438
1793
  display_name: Mixtral Instruct (8x22B)
@@ -1454,13 +1809,11 @@ models:
1454
1809
 
1455
1810
  - name: mistralai/mistral-small-2402
1456
1811
  display_name: Mistral Small (2402)
1457
- # TODO: Fill in description
1458
- description: TBD
1812
+ description: Mistral Small is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
1459
1813
  creator_organization_name: Mistral AI
1460
1814
  access: limited
1461
- # Blog post: https://mistral.ai/news/mistral-large/
1462
1815
  release_date: 2023-02-26
1463
- tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
1816
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1464
1817
 
1465
1818
  - name: mistralai/mistral-medium-2312
1466
1819
  display_name: Mistral Medium (2312)
@@ -1468,18 +1821,32 @@ models:
1468
1821
  creator_organization_name: Mistral AI
1469
1822
  access: limited
1470
1823
  release_date: 2023-12-11
1471
- tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
1824
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1472
1825
 
1473
1826
  - name: mistralai/mistral-large-2402
1474
1827
  display_name: Mistral Large (2402)
1475
- # TODO: Fill in description
1476
- description: TBD
1828
+ description: Mistral Large is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
1477
1829
  creator_organization_name: Mistral AI
1478
1830
  access: limited
1479
- # Blog post: https://mistral.ai/news/mistral-large/
1480
1831
  release_date: 2023-02-26
1481
- tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG, MISTRAL_MODEL_TAG]
1832
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1482
1833
 
1834
+ - name: mistralai/mistral-large-2407
1835
+ display_name: Mistral Large 2 (2407)
1836
+ description: Mistral Large 2 is a 123 billion parameter model that has a 128k context window and supports dozens of languages and 80+ coding languages. ([blog](https://mistral.ai/news/mistral-large-2407/))
1837
+ creator_organization_name: Mistral AI
1838
+ access: open
1839
+ num_parameters: 123000000000
1840
+ release_date: 2023-07-24
1841
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1842
+
1843
+ - name: mistralai/open-mistral-nemo-2407
1844
+ display_name: Mistral NeMo (2402)
1845
+ description: Mistral NeMo is a multilingual 12B model with a large context window of 128K tokens. ([blog](https://mistral.ai/news/mistral-nemo/))
1846
+ creator_organization_name: Mistral AI
1847
+ access: open
1848
+ release_date: 2024-07-18
1849
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1483
1850
 
1484
1851
  # MosaicML
1485
1852
  - name: mosaicml/mpt-7b
@@ -1558,7 +1925,13 @@ models:
1558
1925
  release_date: 2019-09-17 # paper date
1559
1926
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, BUGGY_TEMP_0_TAG]
1560
1927
 
1561
-
1928
+ - name: nvidia/nemotron-4-340b-instruct
1929
+ display_name: Nemotron-4 Instruct (340B)
1930
+ description: Nemotron-4 Instruct (340B) is an open weights model sized to fit on a single DGX H100 with 8 GPUs when deployed in FP8 precision. 98% of the data used for model alignment was synthetically generated ([paper](https://arxiv.org/abs/2406.11704)).
1931
+ creator_organization_name: NVIDIA
1932
+ access: open
1933
+ release_date: 2024-06-17
1934
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1562
1935
 
1563
1936
  # OpenAI
1564
1937
 
@@ -1596,97 +1969,95 @@ models:
1596
1969
 
1597
1970
  # DEPRECATED: Announced on July 06 2023 that these models will be shut down on January 04 2024.
1598
1971
 
1599
- - name: openai/davinci # DEPRECATED
1972
+ - name: openai/davinci
1600
1973
  display_name: davinci (175B)
1601
1974
  description: Original GPT-3 (175B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
1602
1975
  creator_organization_name: OpenAI
1603
1976
  access: limited
1604
1977
  num_parameters: 175000000000
1605
1978
  release_date: 2020-05-28
1606
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1979
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1607
1980
 
1608
- - name: openai/curie # DEPRECATED
1981
+ - name: openai/curie
1609
1982
  display_name: curie (6.7B)
1610
1983
  description: Original GPT-3 (6.7B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
1611
1984
  creator_organization_name: OpenAI
1612
1985
  access: limited
1613
1986
  num_parameters: 6700000000
1614
1987
  release_date: 2020-05-28
1615
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1988
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1616
1989
 
1617
- - name: openai/babbage # DEPRECATED
1990
+ - name: openai/babbage
1618
1991
  display_name: babbage (1.3B)
1619
1992
  description: Original GPT-3 (1.3B parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
1620
1993
  creator_organization_name: OpenAI
1621
1994
  access: limited
1622
1995
  num_parameters: 1300000000
1623
1996
  release_date: 2020-05-28
1624
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1997
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1625
1998
 
1626
- - name: openai/ada # DEPRECATED
1999
+ - name: openai/ada
1627
2000
  display_name: ada (350M)
1628
2001
  description: Original GPT-3 (350M parameters) autoregressive language model ([paper](https://arxiv.org/pdf/2005.14165.pdf), [docs](https://beta.openai.com/docs/model-index-for-researchers)).
1629
2002
  creator_organization_name: OpenAI
1630
2003
  access: limited
1631
2004
  num_parameters: 350000000
1632
2005
  release_date: 2020-05-28
1633
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
2006
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1634
2007
 
1635
- - name: openai/text-davinci-003 # DEPRECATED
2008
+ - name: openai/text-davinci-003
1636
2009
  display_name: GPT-3.5 (text-davinci-003)
1637
2010
  description: text-davinci-003 model that involves reinforcement learning (PPO) with reward models. Derived from text-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
1638
2011
  creator_organization_name: OpenAI
1639
2012
  access: limited
1640
2013
  num_parameters: 175000000000
1641
2014
  release_date: 2022-11-28
1642
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2015
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1643
2016
 
1644
- # TODO: text-davinci-002 supports insertion. Support insertion in our framework.
1645
- # https://github.com/stanford-crfm/benchmarking/issues/359
1646
- - name: openai/text-davinci-002 # DEPRECATED
2017
+ - name: openai/text-davinci-002
1647
2018
  display_name: GPT-3.5 (text-davinci-002)
1648
2019
  description: text-davinci-002 model that involves supervised fine-tuning on human-written demonstrations. Derived from code-davinci-002 ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
1649
2020
  creator_organization_name: OpenAI
1650
2021
  access: limited
1651
2022
  num_parameters: 175000000000
1652
2023
  release_date: 2022-01-27
1653
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
2024
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1654
2025
 
1655
- - name: openai/text-davinci-001 # DEPRECATED
2026
+ - name: openai/text-davinci-001
1656
2027
  display_name: GPT-3.5 (text-davinci-001)
1657
2028
  description: text-davinci-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
1658
2029
  creator_organization_name: OpenAI
1659
2030
  access: limited
1660
2031
  num_parameters: 175000000000
1661
2032
  release_date: 2022-01-27
1662
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
2033
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1663
2034
 
1664
- - name: openai/text-curie-001 # DEPRECATED
2035
+ - name: openai/text-curie-001
1665
2036
  display_name: text-curie-001
1666
2037
  description: text-curie-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
1667
2038
  creator_organization_name: OpenAI
1668
2039
  access: limited
1669
2040
  num_parameters: 6700000000
1670
2041
  release_date: 2022-01-27
1671
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
2042
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1672
2043
 
1673
- - name: openai/text-babbage-001 # DEPRECATED
2044
+ - name: openai/text-babbage-001
1674
2045
  display_name: text-babbage-001
1675
2046
  description: text-babbage-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
1676
2047
  creator_organization_name: OpenAI
1677
2048
  access: limited
1678
2049
  num_parameters: 1300000000
1679
2050
  release_date: 2022-01-27
1680
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
2051
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1681
2052
 
1682
- - name: openai/text-ada-001 # DEPRECATED
2053
+ - name: openai/text-ada-001
1683
2054
  display_name: text-ada-001
1684
2055
  description: text-ada-001 model that involves supervised fine-tuning on human-written demonstrations ([docs](https://beta.openai.com/docs/model-index-for-researchers)).
1685
2056
  creator_organization_name: OpenAI
1686
2057
  access: limited
1687
2058
  num_parameters: 350000000
1688
2059
  release_date: 2022-01-27
1689
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
2060
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1690
2061
 
1691
2062
 
1692
2063
  ## GPT 3.5 Turbo Models
@@ -1727,7 +2098,7 @@ models:
1727
2098
  tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1728
2099
 
1729
2100
  - name: openai/gpt-3.5-turbo-0125
1730
- display_name: gpt-3.5-turbo-0125
2101
+ display_name: GPT-3.5 Turbo (0125)
1731
2102
  description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2024-01-25.
1732
2103
  creator_organization_name: OpenAI
1733
2104
  access: limited
@@ -1804,7 +2175,31 @@ models:
1804
2175
  creator_organization_name: OpenAI
1805
2176
  access: limited
1806
2177
  release_date: 2024-04-09
1807
- tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2178
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2179
+
2180
+ - name: openai/gpt-4o-2024-05-13
2181
+ display_name: GPT-4o (2024-05-13)
2182
+ description: GPT-4o (2024-05-13) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/hello-gpt-4o/))
2183
+ creator_organization_name: OpenAI
2184
+ access: limited
2185
+ release_date: 2024-04-09
2186
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2187
+
2188
+ - name: openai/gpt-4o-2024-08-06
2189
+ display_name: GPT-4o (2024-08-06)
2190
+ description: GPT-4o (2024-08-06) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/introducing-structured-outputs-in-the-api/))
2191
+ creator_organization_name: OpenAI
2192
+ access: limited
2193
+ release_date: 2024-08-06
2194
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2195
+
2196
+ - name: openai/gpt-4o-mini-2024-07-18
2197
+ display_name: GPT-4o mini (2024-07-18)
2198
+ description: GPT-4o mini (2024-07-18) is a multimodal model with a context window of 128K tokens and improved handling of non-English text. ([blog](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/))
2199
+ creator_organization_name: OpenAI
2200
+ access: limited
2201
+ release_date: 2024-07-18
2202
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1808
2203
 
1809
2204
  - name: openai/gpt-4-vision-preview
1810
2205
  # According to https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4, this model has pointed gpt-4-1106-vision-preview.
@@ -1826,30 +2221,30 @@ models:
1826
2221
  ## Codex Models
1827
2222
  # DEPRECATED: Codex models have been shut down on March 23 2023.
1828
2223
 
1829
- - name: openai/code-davinci-002 # DEPRECATED
2224
+ - name: openai/code-davinci-002
1830
2225
  display_name: code-davinci-002
1831
2226
  description: Codex-style model that is designed for pure code-completion tasks ([docs](https://beta.openai.com/docs/models/codex)).
1832
2227
  creator_organization_name: OpenAI
1833
2228
  access: limited
1834
2229
  release_date: 2021-07-01 # TODO: Find correct date (this is for v1)
1835
- tags: [CODE_MODEL_TAG]
2230
+ tags: [DEPRECATED_MODEL_TAG, CODE_MODEL_TAG]
1836
2231
 
1837
- - name: openai/code-davinci-001 # DEPRECATED
2232
+ - name: openai/code-davinci-001
1838
2233
  display_name: code-davinci-001
1839
2234
  description: code-davinci-001 model
1840
2235
  creator_organization_name: OpenAI
1841
2236
  access: limited
1842
2237
  release_date: 2021-07-01 # Paper date
1843
- tags: [CODE_MODEL_TAG]
2238
+ tags: [DEPRECATED_MODEL_TAG, CODE_MODEL_TAG]
1844
2239
 
1845
- - name: openai/code-cushman-001 # DEPRECATED
2240
+ - name: openai/code-cushman-001
1846
2241
  display_name: code-cushman-001 (12B)
1847
2242
  description: Codex-style model that is a stronger, multilingual version of the Codex (12B) model in the [Codex paper](https://arxiv.org/pdf/2107.03374.pdf).
1848
2243
  creator_organization_name: OpenAI
1849
2244
  access: limited
1850
2245
  num_parameters: 12000000000
1851
2246
  release_date: 2021-07-01 # Paper date
1852
- tags: [CODE_MODEL_TAG]
2247
+ tags: [DEPRECATED_MODEL_TAG, CODE_MODEL_TAG]
1853
2248
 
1854
2249
 
1855
2250
  ## Text Similarity Models
@@ -1859,41 +2254,41 @@ models:
1859
2254
  # DEPRECATED: Announced on July 06 2023 that first generation embeddings models
1860
2255
  # will be shut down on January 04 2024.
1861
2256
 
1862
- - name: openai/text-similarity-davinci-001 # DEPRECATED
2257
+ - name: openai/text-similarity-davinci-001
1863
2258
  display_name: text-similarity-davinci-001
1864
2259
  description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
1865
2260
  creator_organization_name: OpenAI
1866
2261
  access: limited
1867
2262
  num_parameters: 175000000000
1868
2263
  release_date: 2022-01-25 # Blog post date
1869
- tags: [TEXT_SIMILARITY_MODEL_TAG]
2264
+ tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
1870
2265
 
1871
- - name: openai/text-similarity-curie-001 # DEPRECATED
2266
+ - name: openai/text-similarity-curie-001
1872
2267
  display_name: text-similarity-curie-001
1873
2268
  description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
1874
2269
  creator_organization_name: OpenAI
1875
2270
  access: limited
1876
2271
  num_parameters: 6700000000
1877
2272
  release_date: 2022-01-25 # Blog post date
1878
- tags: [TEXT_SIMILARITY_MODEL_TAG]
2273
+ tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
1879
2274
 
1880
- - name: openai/text-similarity-babbage-001 # DEPRECATED
2275
+ - name: openai/text-similarity-babbage-001
1881
2276
  display_name: text-similarity-babbage-001
1882
2277
  description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
1883
2278
  creator_organization_name: OpenAI
1884
2279
  access: limited
1885
2280
  num_parameters: 1300000000
1886
2281
  release_date: 2022-01-25 # Blog post date
1887
- tags: [TEXT_SIMILARITY_MODEL_TAG]
2282
+ tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
1888
2283
 
1889
- - name: openai/text-similarity-ada-001 # DEPRECATED
2284
+ - name: openai/text-similarity-ada-001
1890
2285
  display_name: text-similarity-ada-001
1891
2286
  description: Embedding model that is designed for text similarity tasks ([docs](https://openai.com/blog/introducing-text-and-code-embeddings)).
1892
2287
  creator_organization_name: OpenAI
1893
2288
  access: limited
1894
2289
  num_parameters: 350000000
1895
2290
  release_date: 2022-01-25 # Blog post date
1896
- tags: [TEXT_SIMILARITY_MODEL_TAG]
2291
+ tags: [DEPRECATED_MODEL_TAG, TEXT_SIMILARITY_MODEL_TAG]
1897
2292
 
1898
2293
  - name: openai/text-embedding-ada-002
1899
2294
  display_name: text-embedding-ada-002
@@ -1949,11 +2344,39 @@ models:
1949
2344
  release_date: 2023-11-06
1950
2345
  tags: [TEXT_TO_IMAGE_MODEL_TAG]
1951
2346
 
2347
+ # OpenThaiGPT
2348
+ - name: openthaigpt/openthaigpt-1.0.0-7b-chat
2349
+ display_name: OpenThaiGPT v1.0.0 (7B)
2350
+ description: OpenThaiGPT v1.0.0 (7B) is a Thai language chat model based on Llama 2 that has been specifically fine-tuned for Thai instructions and enhanced by incorporating over 10,000 of the most commonly used Thai words into the dictionary. ([blog post](https://openthaigpt.aieat.or.th/openthaigpt-1.0.0-less-than-8-apr-2024-greater-than))
2351
+ creator_organization_name: OpenThaiGPT
2352
+ access: open
2353
+ num_parameters: 7000000000
2354
+ release_date: 2024-04-08
2355
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2356
+
2357
+ - name: openthaigpt/openthaigpt-1.0.0-13b-chat
2358
+ display_name: OpenThaiGPT v1.0.0 (13B)
2359
+ description: OpenThaiGPT v1.0.0 (13B) is a Thai language chat model based on Llama 2 that has been specifically fine-tuned for Thai instructions and enhanced by incorporating over 10,000 of the most commonly used Thai words into the dictionary. ([blog post](https://openthaigpt.aieat.or.th/openthaigpt-1.0.0-less-than-8-apr-2024-greater-than))
2360
+ creator_organization_name: OpenThaiGPT
2361
+ access: open
2362
+ num_parameters: 13000000000
2363
+ release_date: 2024-04-08
2364
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2365
+
2366
+ - name: openthaigpt/openthaigpt-1.0.0-70b-chat
2367
+ display_name: OpenThaiGPT v1.0.0 (70B)
2368
+ description: OpenThaiGPT v1.0.0 (70B) is a Thai language chat model based on Llama 2 that has been specifically fine-tuned for Thai instructions and enhanced by incorporating over 10,000 of the most commonly used Thai words into the dictionary. ([blog post](https://openthaigpt.aieat.or.th/openthaigpt-1.0.0-less-than-8-apr-2024-greater-than))
2369
+ creator_organization_name: OpenThaiGPT
2370
+ access: open
2371
+ num_parameters: 70000000000
2372
+ release_date: 2024-04-08
2373
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2374
+
1952
2375
  # Qwen
1953
2376
 
1954
2377
  - name: qwen/qwen-7b
1955
2378
  display_name: Qwen
1956
- description: 7B-parameter version of the large language model series, Qwen (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
2379
+ description: 7B-parameter version of the large language model series, Qwen (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
1957
2380
  creator_organization_name: Qwen
1958
2381
  access: open
1959
2382
  release_date: 2024-02-05
@@ -1961,7 +2384,7 @@ models:
1961
2384
 
1962
2385
  - name: qwen/qwen1.5-7b
1963
2386
  display_name: Qwen1.5 (7B)
1964
- description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
2387
+ description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
1965
2388
  creator_organization_name: Qwen
1966
2389
  access: open
1967
2390
  release_date: 2024-02-05
@@ -1969,7 +2392,7 @@ models:
1969
2392
 
1970
2393
  - name: qwen/qwen1.5-14b
1971
2394
  display_name: Qwen1.5 (14B)
1972
- description: 14B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
2395
+ description: 14B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
1973
2396
  creator_organization_name: Qwen
1974
2397
  access: open
1975
2398
  release_date: 2024-02-05
@@ -1977,20 +2400,68 @@ models:
1977
2400
 
1978
2401
  - name: qwen/qwen1.5-32b
1979
2402
  display_name: Qwen1.5 (32B)
1980
- description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
2403
+ description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 32B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-32b/))
1981
2404
  creator_organization_name: Qwen
1982
2405
  access: open
1983
- release_date: 2024-02-05
2406
+ release_date: 2024-04-02
1984
2407
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1985
2408
 
1986
2409
  - name: qwen/qwen1.5-72b
1987
2410
  display_name: Qwen1.5 (72B)
1988
- description: 72B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
2411
+ description: 72B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
1989
2412
  creator_organization_name: Qwen
1990
2413
  access: open
1991
2414
  release_date: 2024-02-05
1992
2415
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1993
2416
 
2417
+ - name: qwen/qwen1.5-7b-chat
2418
+ display_name: Qwen1.5 Chat (7B)
2419
+ description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
2420
+ creator_organization_name: Qwen
2421
+ access: open
2422
+ release_date: 2024-02-05
2423
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2424
+
2425
+ - name: qwen/qwen1.5-14b-chat
2426
+ display_name: Qwen1.5 Chat (14B)
2427
+ description: 14B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
2428
+ creator_organization_name: Qwen
2429
+ access: open
2430
+ release_date: 2024-02-05
2431
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2432
+
2433
+ - name: qwen/qwen1.5-32b-chat
2434
+ display_name: Qwen1.5 Chat (32B)
2435
+ description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 32B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-32b/))
2436
+ creator_organization_name: Qwen
2437
+ access: open
2438
+ release_date: 2024-04-02
2439
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2440
+
2441
+ - name: qwen/qwen1.5-72b-chat
2442
+ display_name: Qwen1.5 Chat (72B)
2443
+ description: 72B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
2444
+ creator_organization_name: Qwen
2445
+ access: open
2446
+ release_date: 2024-02-05
2447
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2448
+
2449
+ - name: qwen/qwen1.5-110b-chat
2450
+ display_name: Qwen1.5 Chat (110B)
2451
+ description: 110B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 110B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-110b/))
2452
+ creator_organization_name: Qwen
2453
+ access: open
2454
+ release_date: 2024-04-25
2455
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2456
+
2457
+ - name: qwen/qwen2-72b-instruct
2458
+ display_name: Qwen2 Instruct (72B)
2459
+ description: 72B-parameter chat version of the large language model series, Qwen2. Qwen2 uses Group Query Attention (GQA) and has extended context length support up to 128K tokens. ([blog](https://qwenlm.github.io/blog/qwen2/))
2460
+ creator_organization_name: Qwen
2461
+ access: open
2462
+ release_date: 2024-06-07
2463
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2464
+
1994
2465
  - name: qwen/qwen-vl
1995
2466
  display_name: Qwen-VL
1996
2467
  description: Visual multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2308.12966)).
@@ -2007,6 +2478,43 @@ models:
2007
2478
  release_date: 2023-08-24
2008
2479
  tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
2009
2480
 
2481
+ # SAIL (Sea AI Lab)
2482
+ - name: sail/sailor-7b
2483
+ display_name: Sailor (7B)
2484
+ description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
2485
+ creator_organization_name: SAIL
2486
+ access: open
2487
+ num_parameters: 7000000000
2488
+ release_date: 2024-04-04
2489
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
2490
+
2491
+ - name: sail/sailor-7b-chat
2492
+ display_name: Sailor Chat (7B)
2493
+ description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
2494
+ creator_organization_name: SAIL
2495
+ access: open
2496
+ num_parameters: 7000000000
2497
+ release_date: 2024-04-04
2498
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2499
+
2500
+ - name: sail/sailor-14b
2501
+ display_name: Sailor (14B)
2502
+ description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
2503
+ creator_organization_name: SAIL
2504
+ access: open
2505
+ num_parameters: 14000000000
2506
+ release_date: 2024-04-04
2507
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
2508
+
2509
+ - name: sail/sailor-14b-chat
2510
+ display_name: Sailor Chat (14B)
2511
+ description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
2512
+ creator_organization_name: SAIL
2513
+ access: open
2514
+ num_parameters: 14000000000
2515
+ release_date: 2024-04-04
2516
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2517
+
2010
2518
  # Salesforce
2011
2519
  - name: salesforce/codegen # NOT SUPPORTED
2012
2520
  display_name: CodeGen (16B)
@@ -2017,6 +2525,125 @@ models:
2017
2525
  release_date: 2022-03-25
2018
2526
  tags: [] # TODO: add tags
2019
2527
 
2528
+ # SambaNova
2529
+ - name: sambanova/sambalingo-thai-base
2530
+ display_name: SambaLingo-Thai-Base
2531
+ description: SambaLingo-Thai-Base is a pretrained bi-lingual Thai and English model that adapts Llama 2 (7B) to Thai by training on 38 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
2532
+ creator_organization_name: SambaLingo
2533
+ access: open
2534
+ num_parameters: 7000000000
2535
+ release_date: 2024-04-08
2536
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
2537
+
2538
+ - name: sambanova/sambalingo-thai-chat
2539
+ display_name: SambaLingo-Thai-Chat
2540
+ description: SambaLingo-Thai-Chat is a chat model trained using direct preference optimization on SambaLingo-Thai-Base. SambaLingo-Thai-Base adapts Llama 2 (7B) to Thai by training on 38 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
2541
+ creator_organization_name: SambaLingo
2542
+ access: open
2543
+ num_parameters: 7000000000
2544
+ release_date: 2024-04-08
2545
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2546
+
2547
+ - name: sambanova/sambalingo-thai-base-70b
2548
+ display_name: SambaLingo-Thai-Base-70B
2549
+ description: SambaLingo-Thai-Base-70B is a pretrained bi-lingual Thai and English model that adapts Llama 2 (70B) to Thai by training on 26 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
2550
+ creator_organization_name: SambaLingo
2551
+ access: open
2552
+ num_parameters: 70000000000
2553
+ release_date: 2024-04-08
2554
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
2555
+
2556
+ - name: sambanova/sambalingo-thai-chat-70b
2557
+ display_name: SambaLingo-Thai-Chat-70B
2558
+ description: SambaLingo-Thai-Chat-70B is a chat model trained using direct preference optimization on SambaLingo-Thai-Base-70B. SambaLingo-Thai-Base-70B adapts Llama 2 (7B) to Thai by training on 26 billion tokens from the Thai split of the Cultura-X dataset. ([paper](https://arxiv.org/abs/2404.05829))
2559
+ creator_organization_name: SambaLingo
2560
+ access: open
2561
+ num_parameters: 70000000000
2562
+ release_date: 2024-04-08
2563
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2564
+
2565
+ # SCB10X
2566
+ - name: scb10x/typhoon-7b
2567
+ display_name: Typhoon (7B)
2568
+ description: Typhoon (7B) is pretrained Thai large language model with 7 billion parameters based on Mistral 7B. ([paper](https://arxiv.org/abs/2312.13951))
2569
+ creator_organization_name: SCB10X
2570
+ access: open
2571
+ num_parameters: 7000000000
2572
+ release_date: 2023-12-21
2573
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
2574
+
2575
+ - name: scb10x/typhoon-v1.5-8b
2576
+ display_name: Typhoon v1.5 (8B)
2577
+ description: Typhoon v1.5 (8B) is a pretrained Thai large language model with 8 billion parameters based on Llama 3 8B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
2578
+ creator_organization_name: SCB10X
2579
+ access: open
2580
+ num_parameters: 8000000000
2581
+ release_date: 2024-05-08
2582
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
2583
+
2584
+ - name: scb10x/typhoon-v1.5-8b-instruct
2585
+ display_name: Typhoon v1.5 Instruct (8B)
2586
+ description: Typhoon v1.5 Instruct (8B) is a pretrained Thai large language model with 8 billion parameters based on Llama 3 8B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
2587
+ creator_organization_name: SCB10X
2588
+ access: open
2589
+ num_parameters: 8000000000
2590
+ release_date: 2024-05-08
2591
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2592
+
2593
+ - name: scb10x/typhoon-v1.5-72b
2594
+ display_name: Typhoon v1.5 (72B)
2595
+ description: Typhoon v1.5 (72B) is a pretrained Thai large language model with 72 billion parameters based on Qwen1.5-72B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
2596
+ creator_organization_name: SCB10X
2597
+ access: open
2598
+ num_parameters: 72000000000
2599
+ release_date: 2024-05-08
2600
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2601
+
2602
+ - name: scb10x/typhoon-v1.5-72b-instruct
2603
+ display_name: Typhoon v1.5 Instruct (72B)
2604
+ description: Typhoon v1.5 Instruct (72B) is a pretrained Thai large language model with 72 billion parameters based on Qwen1.5-72B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
2605
+ creator_organization_name: SCB10X
2606
+ access: open
2607
+ num_parameters: 72000000000
2608
+ release_date: 2024-05-08
2609
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2610
+
2611
+ - name: scb10x/llama-3-typhoon-v1.5x-8b-instruct
2612
+ display_name: Typhoon 1.5X instruct (8B)
2613
+ description: Llama-3-Typhoon-1.5X-8B-instruct is a 8 billion parameter instruct model designed for the Thai language based on Llama 3 Instruct. It utilizes the task-arithmetic model editing technique. ([blog](https://blog.opentyphoon.ai/typhoon-1-5x-our-experiment-designed-for-application-use-cases-7b85d9e9845c))
2614
+ creator_organization_name: SCB10X
2615
+ access: open
2616
+ num_parameters: 8000000000
2617
+ release_date: 2024-05-29
2618
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2619
+
2620
+ - name: scb10x/llama-3-typhoon-v1.5x-70b-instruct
2621
+ display_name: Typhoon 1.5X instruct (70B)
2622
+ description: Llama-3-Typhoon-1.5X-70B-instruct is a 70 billion parameter instruct model designed for the Thai language based on Llama 3 Instruct. It utilizes the task-arithmetic model editing technique. ([blog](https://blog.opentyphoon.ai/typhoon-1-5x-our-experiment-designed-for-application-use-cases-7b85d9e9845c))
2623
+ creator_organization_name: SCB10X
2624
+ access: open
2625
+ num_parameters: 70000000000
2626
+ release_date: 2024-05-29
2627
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2628
+
2629
+ # Alibaba DAMO Academy
2630
+ - name: damo/seallm-7b-v2
2631
+ display_name: SeaLLM v2 (7B)
2632
+ description: SeaLLM v2 is a multilingual LLM for Southeast Asian (SEA) languages trained from Mistral (7B). ([website](https://damo-nlp-sg.github.io/SeaLLMs/))
2633
+ creator_organization_name: Alibaba DAMO Academy
2634
+ access: open
2635
+ num_parameters: 7000000000
2636
+ release_date: 2024-02-02
2637
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
2638
+
2639
+ - name: damo/seallm-7b-v2.5
2640
+ display_name: SeaLLM v2.5 (7B)
2641
+ description: SeaLLM is a multilingual LLM for Southeast Asian (SEA) languages trained from Gemma (7B). ([website](https://damo-nlp-sg.github.io/SeaLLMs/))
2642
+ creator_organization_name: Alibaba DAMO Academy
2643
+ access: open
2644
+ num_parameters: 7000000000
2645
+ release_date: 2024-04-12
2646
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
2020
2647
 
2021
2648
  # Snowflake
2022
2649
  - name: snowflake/snowflake-arctic-instruct
@@ -2187,7 +2814,7 @@ models:
2187
2814
  release_date: 2022-08-04
2188
2815
  # Inference with echo=True is not feasible -- in the prompt encoding phase, they use
2189
2816
  # bidirectional attention and do not perform predictions on them.
2190
- tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
2817
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG]
2191
2818
 
2192
2819
  - name: tsinghua/codegeex # NOT SUPPORTED
2193
2820
  display_name: CodeGeeX (13B)
@@ -2222,7 +2849,6 @@ models:
2222
2849
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2223
2850
 
2224
2851
  - name: writer/palmyra-instruct-30
2225
- deprecated: true # Internal error
2226
2852
  display_name: InstructPalmyra (30B)
2227
2853
  description: InstructPalmyra (30B parameters) is trained using reinforcement learning techniques based on feedback from humans.
2228
2854
  creator_organization_name: Writer
@@ -2230,10 +2856,9 @@ models:
2230
2856
  num_parameters: 30000000000
2231
2857
  release_date: 2023-02-16
2232
2858
  # Does not support echo
2233
- tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2859
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2234
2860
 
2235
2861
  - name: writer/palmyra-e
2236
- deprecated: true # Internal error
2237
2862
  display_name: Palmyra E (30B)
2238
2863
  description: Palmyra E (30B)
2239
2864
  creator_organization_name: Writer
@@ -2241,7 +2866,7 @@ models:
2241
2866
  num_parameters: 30000000000
2242
2867
  release_date: 2023-03-03
2243
2868
  # Does not support echo
2244
- tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2869
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2245
2870
 
2246
2871
  - name: writer/silk-road
2247
2872
  display_name: Silk Road (35B)
@@ -2293,6 +2918,15 @@ models:
2293
2918
  # Does not support echo
2294
2919
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2295
2920
 
2921
+ - name: writer/palmyra-vision-003
2922
+ display_name: Palmyra Vision 003
2923
+ description: Palmyra Vision 003 (internal only)
2924
+ creator_organization_name: Writer
2925
+ access: limited
2926
+ num_parameters: 5000000000
2927
+ release_date: 2024-05-24
2928
+ # Does not support echo
2929
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
2296
2930
 
2297
2931
 
2298
2932
  # Yandex
@@ -2304,3 +2938,65 @@ models:
2304
2938
  num_parameters: 100000000000
2305
2939
  release_date: 2022-06-23
2306
2940
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
2941
+
2942
+ # Reka
2943
+ - name: reka/reka-core
2944
+ display_name: Reka-Core
2945
+ description: Reka-Core
2946
+ creator_organization_name: Reka AI
2947
+ access: limited
2948
+ release_date: 2024-04-18
2949
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2950
+
2951
+ - name: reka/reka-core-20240415
2952
+ display_name: Reka-Core-20240415
2953
+ description: Reka-Core-20240415
2954
+ creator_organization_name: Reka AI
2955
+ access: limited
2956
+ release_date: 2024-04-18
2957
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2958
+
2959
+ - name: reka/reka-core-20240501
2960
+ display_name: Reka-Core-20240501
2961
+ description: Reka-Core-20240501
2962
+ creator_organization_name: Reka AI
2963
+ access: limited
2964
+ release_date: 2024-05-01
2965
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2966
+
2967
+ - name: reka/reka-flash
2968
+ display_name: Reka-Flash (21B)
2969
+ description: Reka-Flash (21B)
2970
+ creator_organization_name: Reka AI
2971
+ access: limited
2972
+ num_parameters: 21000000000
2973
+ release_date: 2024-04-18
2974
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2975
+
2976
+ - name: reka/reka-flash-20240226
2977
+ display_name: Reka-Flash-20240226 (21B)
2978
+ description: Reka-Flash-20240226 (21B)
2979
+ creator_organization_name: Reka AI
2980
+ access: limited
2981
+ num_parameters: 21000000000
2982
+ release_date: 2024-04-18
2983
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2984
+
2985
+ - name: reka/reka-edge
2986
+ display_name: Reka-Edge (7B)
2987
+ description: Reka-Edge (7B)
2988
+ creator_organization_name: Reka AI
2989
+ access: limited
2990
+ num_parameters: 7000000000
2991
+ release_date: 2024-04-18
2992
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2993
+
2994
+ - name: reka/reka-edge-20240208
2995
+ display_name: Reka-Edge-20240208 (7B)
2996
+ description: Reka-Edge-20240208 (7B)
2997
+ creator_organization_name: Reka AI
2998
+ access: limited
2999
+ num_parameters: 7000000000
3000
+ release_date: 2024-04-18
3001
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
3002
+