crfm-helm 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (606) hide show
  1. crfm_helm-0.5.5.dist-info/METADATA +413 -0
  2. crfm_helm-0.5.5.dist-info/RECORD +894 -0
  3. {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +13 -1
  5. helm/benchmark/adaptation/adapters/adapter_factory.py +15 -1
  6. helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -1
  7. helm/benchmark/adaptation/adapters/chat_adapter.py +49 -0
  8. helm/benchmark/adaptation/adapters/ehr_instruction_adapter.py +108 -0
  9. helm/benchmark/adaptation/adapters/generation_adapter.py +1 -1
  10. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +1 -1
  11. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +1 -1
  12. helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +4 -2
  13. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -1
  14. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +1 -1
  15. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +4 -2
  16. helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +1 -1
  17. helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py +1 -1
  18. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +2 -2
  19. helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py +87 -0
  20. helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -1
  21. helm/benchmark/adaptation/adapters/test_generation_adapter.py +3 -3
  22. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +2 -2
  23. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +2 -2
  24. helm/benchmark/adaptation/common_adapter_specs.py +69 -4
  25. helm/benchmark/adaptation/prompt.py +1 -1
  26. helm/benchmark/annotation/aci_bench_annotator.py +95 -0
  27. helm/benchmark/annotation/air_bench_annotator.py +20 -5
  28. helm/benchmark/annotation/annotator.py +5 -0
  29. helm/benchmark/annotation/annotator_factory.py +3 -20
  30. helm/benchmark/annotation/anthropic_red_team_annotator.py +11 -24
  31. helm/benchmark/annotation/autobencher_capabilities_annotator.py +107 -0
  32. helm/benchmark/annotation/autobencher_safety_annotator.py +98 -0
  33. helm/benchmark/annotation/bigcodebench_annotator.py +108 -0
  34. helm/benchmark/annotation/bird_sql_annotator.py +58 -0
  35. helm/benchmark/annotation/call_center_annotator.py +22 -11
  36. helm/benchmark/annotation/chw_care_plan_annotator.py +98 -0
  37. helm/benchmark/annotation/czech_bank_qa_annotator.py +78 -0
  38. helm/benchmark/annotation/dischargeme_annotator.py +107 -0
  39. helm/benchmark/annotation/ehr_sql_annotator.py +87 -0
  40. helm/benchmark/annotation/harm_bench_annotator.py +11 -24
  41. helm/benchmark/annotation/helpdesk_call_summarization_annotator.py +131 -0
  42. helm/benchmark/annotation/image2struct/image_compiler_annotator.py +6 -1
  43. helm/benchmark/annotation/live_qa_annotator.py +10 -5
  44. helm/benchmark/annotation/med_dialog_annotator.py +99 -0
  45. helm/benchmark/annotation/medalign_annotator.py +100 -0
  46. helm/benchmark/annotation/medi_qa_annotator.py +98 -0
  47. helm/benchmark/annotation/medication_qa_annotator.py +90 -61
  48. helm/benchmark/annotation/mental_health_annotator.py +98 -0
  49. helm/benchmark/annotation/mimic_rrs_annotator.py +100 -0
  50. helm/benchmark/annotation/model_as_judge.py +281 -18
  51. helm/benchmark/annotation/mtsamples_procedures_annotator.py +98 -0
  52. helm/benchmark/annotation/mtsamples_replicate_annotator.py +101 -0
  53. helm/benchmark/annotation/omni_math/gpt_evaluation_template.txt +152 -0
  54. helm/benchmark/annotation/omni_math/gpt_evaluation_zero_shot_template.txt +36 -0
  55. helm/benchmark/annotation/omni_math_annotator.py +132 -0
  56. helm/benchmark/annotation/simple_safety_tests_annotator.py +11 -25
  57. helm/benchmark/annotation/spider_annotator.py +18 -0
  58. helm/benchmark/annotation/starr_patient_instructions_annotator.py +98 -0
  59. helm/benchmark/annotation/wildbench/eval_template.pairwise.v2.md +75 -0
  60. helm/benchmark/annotation/wildbench/eval_template.score.v2.md +66 -0
  61. helm/benchmark/annotation/wildbench_annotator.py +119 -0
  62. helm/benchmark/annotation/xstest_annotator.py +20 -30
  63. helm/benchmark/annotation_executor.py +35 -15
  64. helm/benchmark/augmentations/cleva_perturbation.py +9 -8
  65. helm/benchmark/augmentations/contraction_expansion_perturbation.py +2 -2
  66. helm/benchmark/augmentations/contrast_sets_perturbation.py +2 -2
  67. helm/benchmark/augmentations/dialect_perturbation.py +4 -5
  68. helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
  69. helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
  70. helm/benchmark/augmentations/gender_perturbation.py +2 -2
  71. helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
  72. helm/benchmark/augmentations/mild_mix_perturbation.py +6 -6
  73. helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
  74. helm/benchmark/augmentations/person_name_perturbation.py +4 -5
  75. helm/benchmark/augmentations/perturbation.py +1 -1
  76. helm/benchmark/augmentations/space_perturbation.py +2 -2
  77. helm/benchmark/augmentations/suffix_perturbation.py +2 -2
  78. helm/benchmark/augmentations/synonym_perturbation.py +4 -3
  79. helm/benchmark/augmentations/test_perturbation.py +16 -13
  80. helm/benchmark/augmentations/translate_perturbation.py +2 -2
  81. helm/benchmark/augmentations/typos_perturbation.py +2 -2
  82. helm/benchmark/data_preprocessor.py +2 -2
  83. helm/benchmark/huggingface_registration.py +2 -7
  84. helm/benchmark/metrics/aci_bench_metrics.py +34 -0
  85. helm/benchmark/metrics/basic_metrics.py +6 -6
  86. helm/benchmark/metrics/bbq_metrics.py +2 -2
  87. helm/benchmark/metrics/bias_metrics.py +12 -3
  88. helm/benchmark/metrics/bigcodebench_metrics.py +25 -0
  89. helm/benchmark/metrics/bird_sql_metrics.py +28 -0
  90. helm/benchmark/metrics/chw_care_plan_metrics.py +34 -0
  91. helm/benchmark/metrics/classification_metrics.py +76 -12
  92. helm/benchmark/metrics/cleva_harms_metrics.py +8 -7
  93. helm/benchmark/metrics/code_metrics.py +5 -5
  94. helm/benchmark/metrics/comet_metric.py +125 -0
  95. helm/benchmark/metrics/common_metric_specs.py +9 -2
  96. helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +72 -0
  97. helm/benchmark/metrics/copyright_metrics.py +4 -4
  98. helm/benchmark/metrics/czech_bank_qa_metrics.py +29 -0
  99. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +2 -2
  100. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +2 -2
  101. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +2 -2
  102. helm/benchmark/metrics/dischargeme_metrics.py +34 -0
  103. helm/benchmark/metrics/disinformation_metrics.py +4 -4
  104. helm/benchmark/metrics/dry_run_metrics.py +5 -5
  105. helm/benchmark/metrics/efficiency_metrics.py +3 -3
  106. helm/benchmark/metrics/ehr_sql_metrics.py +103 -0
  107. helm/benchmark/metrics/evaluate_instances_metric.py +3 -3
  108. helm/benchmark/metrics/evaluate_reference_metrics.py +144 -16
  109. helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +103 -0
  110. helm/benchmark/metrics/gpt4_audio_critique_metrics.py +167 -0
  111. helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +36 -0
  112. helm/benchmark/metrics/ifeval/instructions.py +1574 -0
  113. helm/benchmark/metrics/ifeval/instructions_registry.py +182 -0
  114. helm/benchmark/metrics/ifeval/instructions_registry.pyi +3 -0
  115. helm/benchmark/metrics/ifeval/instructions_util.py +153 -0
  116. helm/benchmark/metrics/ifeval_metrics.py +55 -0
  117. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +1 -1
  118. helm/benchmark/metrics/image_generation/detection_metrics.py +1 -1
  119. helm/benchmark/metrics/image_generation/detectors/vitdet.py +1 -1
  120. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +1 -1
  121. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +1 -1
  122. helm/benchmark/metrics/image_generation/nsfw_metrics.py +1 -1
  123. helm/benchmark/metrics/image_generation/q16/test_q16.py +3 -1
  124. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +1 -1
  125. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +2 -2
  126. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +1 -1
  127. helm/benchmark/metrics/image_generation/watermark_metrics.py +1 -1
  128. helm/benchmark/metrics/instruction_following_critique_metrics.py +4 -4
  129. helm/benchmark/metrics/language_modeling_metrics.py +4 -4
  130. helm/benchmark/metrics/machine_translation_metrics.py +2 -2
  131. helm/benchmark/metrics/med_dialog_metrics.py +34 -0
  132. helm/benchmark/metrics/medalign_metrics.py +34 -0
  133. helm/benchmark/metrics/medcalc_bench_metrics.py +124 -0
  134. helm/benchmark/metrics/medec_metrics.py +101 -0
  135. helm/benchmark/metrics/medi_qa_metrics.py +34 -0
  136. helm/benchmark/metrics/medication_qa_metrics.py +15 -4
  137. helm/benchmark/metrics/mental_health_metrics.py +34 -0
  138. helm/benchmark/metrics/metric.py +3 -3
  139. helm/benchmark/metrics/mimic_rrs_metrics.py +34 -0
  140. helm/benchmark/metrics/mimiciv_billing_code_metrics.py +96 -0
  141. helm/benchmark/metrics/mtsamples_procedures_metrics.py +34 -0
  142. helm/benchmark/metrics/mtsamples_replicate_metrics.py +34 -0
  143. helm/benchmark/metrics/nltk_helper.py +32 -0
  144. helm/benchmark/metrics/numeracy_metrics.py +4 -4
  145. helm/benchmark/metrics/omni_math_metrics.py +32 -0
  146. helm/benchmark/metrics/output_processing_metric.py +60 -0
  147. helm/benchmark/metrics/output_processors.py +15 -0
  148. helm/benchmark/metrics/paraphrase_generation_metrics.py +2 -2
  149. helm/benchmark/metrics/ranking_metrics.py +3 -3
  150. helm/benchmark/metrics/reference_metric.py +3 -3
  151. helm/benchmark/metrics/safety_metrics.py +39 -17
  152. helm/benchmark/metrics/{bhasa_metrics.py → seahelm_metrics.py} +3 -3
  153. helm/benchmark/metrics/seahelm_metrics_specs.py +10 -0
  154. helm/benchmark/metrics/spider_metrics.py +7 -0
  155. helm/benchmark/metrics/starr_patient_instructions_metrics.py +34 -0
  156. helm/benchmark/metrics/statistic.py +1 -1
  157. helm/benchmark/metrics/summac/model_summac.py +1 -1
  158. helm/benchmark/metrics/summarization_critique_metrics.py +4 -4
  159. helm/benchmark/metrics/summarization_metrics.py +19 -9
  160. helm/benchmark/metrics/test_bias_metrics.py +5 -1
  161. helm/benchmark/metrics/test_classification_metrics.py +140 -68
  162. helm/benchmark/metrics/test_evaluate_reference_metrics.py +15 -0
  163. helm/benchmark/metrics/test_metric.py +1 -1
  164. helm/benchmark/metrics/test_statistic.py +2 -2
  165. helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py +1 -1
  166. helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +6 -6
  167. helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py +1 -1
  168. helm/benchmark/metrics/tokens/free_token_cost_estimator.py +1 -1
  169. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +1 -1
  170. helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
  171. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +1 -1
  172. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +1 -1
  173. helm/benchmark/metrics/toxicity_metrics.py +4 -4
  174. helm/benchmark/metrics/unitxt_metrics.py +21 -4
  175. helm/benchmark/metrics/vision_language/image_metrics.py +7 -3
  176. helm/benchmark/metrics/wildbench_metrics.py +34 -0
  177. helm/benchmark/model_metadata_registry.py +16 -0
  178. helm/benchmark/presentation/create_plots.py +1 -1
  179. helm/benchmark/presentation/schema.py +3 -0
  180. helm/benchmark/presentation/summarize.py +119 -256
  181. helm/benchmark/presentation/test_summarize.py +145 -3
  182. helm/benchmark/presentation/torr_robustness_summarizer.py +178 -0
  183. helm/benchmark/reeval_run.py +203 -0
  184. helm/benchmark/reeval_runner.py +355 -0
  185. helm/benchmark/run.py +8 -17
  186. helm/benchmark/run_expander.py +105 -8
  187. helm/benchmark/run_spec_factory.py +12 -0
  188. helm/benchmark/run_specs/air_bench_run_specs.py +21 -3
  189. helm/benchmark/run_specs/audio_run_specs.py +613 -0
  190. helm/benchmark/run_specs/call_center_run_specs.py +49 -0
  191. helm/benchmark/run_specs/capabilities_run_specs.py +308 -0
  192. helm/benchmark/run_specs/classic_run_specs.py +1 -69
  193. helm/benchmark/run_specs/enem_challenge_specs.py +31 -0
  194. helm/benchmark/run_specs/enterprise_run_specs.py +260 -0
  195. helm/benchmark/run_specs/experimental_run_specs.py +112 -3
  196. helm/benchmark/run_specs/finance_run_specs.py +6 -2
  197. helm/benchmark/run_specs/imdb_ptbr_run_specs.py +30 -0
  198. helm/benchmark/run_specs/lite_run_specs.py +2 -2
  199. helm/benchmark/run_specs/long_context_run_specs.py +89 -0
  200. helm/benchmark/run_specs/medhelm_run_specs.py +1155 -0
  201. helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py +49 -0
  202. helm/benchmark/run_specs/oab_exams_specs.py +32 -0
  203. helm/benchmark/run_specs/safety_run_specs.py +37 -0
  204. helm/benchmark/run_specs/{bhasa_run_specs.py → seahelm_run_specs.py} +66 -52
  205. helm/benchmark/run_specs/sql_run_specs.py +54 -0
  206. helm/benchmark/run_specs/tweetsentbr_run_specs.py +32 -0
  207. helm/benchmark/run_specs/unitxt_run_specs.py +14 -5
  208. helm/benchmark/run_specs/vlm_run_specs.py +83 -5
  209. helm/benchmark/run_specs/winogrande_afr_run_specs.py +47 -0
  210. helm/benchmark/scenarios/aci_bench_scenario.py +120 -0
  211. helm/benchmark/scenarios/air_bench_scenario.py +6 -1
  212. helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +5 -3
  213. helm/benchmark/scenarios/anthropic_red_team_scenario.py +1 -1
  214. helm/benchmark/scenarios/audio_language/__init__.py +0 -0
  215. helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py +128 -0
  216. helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py +154 -0
  217. helm/benchmark/scenarios/audio_language/ami_scenario.py +96 -0
  218. helm/benchmark/scenarios/audio_language/audio_mnist_scenario.py +62 -0
  219. helm/benchmark/scenarios/audio_language/audio_pairs_scenario.py +62 -0
  220. helm/benchmark/scenarios/audio_language/audiocaps_scenario.py +59 -0
  221. helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py +152 -0
  222. helm/benchmark/scenarios/audio_language/common_voice_15_scenario.py +99 -0
  223. helm/benchmark/scenarios/audio_language/covost2_scenario.py +163 -0
  224. helm/benchmark/scenarios/audio_language/fleurs_fairness_scenario.py +83 -0
  225. helm/benchmark/scenarios/audio_language/fleurs_scenario.py +312 -0
  226. helm/benchmark/scenarios/audio_language/iemocap_audio_scenario.py +83 -0
  227. helm/benchmark/scenarios/audio_language/librispeech_fairness_scenario.py +96 -0
  228. helm/benchmark/scenarios/audio_language/librispeech_scenario.py +80 -0
  229. helm/benchmark/scenarios/audio_language/meld_audio_scenario.py +113 -0
  230. helm/benchmark/scenarios/audio_language/multilingual_librispeech_scenario.py +80 -0
  231. helm/benchmark/scenarios/audio_language/mustard_scenario.py +142 -0
  232. helm/benchmark/scenarios/audio_language/mutox_scenario.py +254 -0
  233. helm/benchmark/scenarios/audio_language/parade_scenario.py +97 -0
  234. helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py +124 -0
  235. helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py +69 -0
  236. helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py +87 -0
  237. helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py +106 -0
  238. helm/benchmark/scenarios/autobencher_capabilities_scenario.py +68 -0
  239. helm/benchmark/scenarios/autobencher_safety_scenario.py +51 -0
  240. helm/benchmark/scenarios/babi_qa_scenario.py +1 -1
  241. helm/benchmark/scenarios/banking77_scenario.py +6 -1
  242. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  243. helm/benchmark/scenarios/big_bench_scenario.py +11 -1
  244. helm/benchmark/scenarios/bigcodebench_scenario.py +58 -0
  245. helm/benchmark/scenarios/bird_sql_scenario.py +94 -0
  246. helm/benchmark/scenarios/bird_sql_scenario_helper.py +118 -0
  247. helm/benchmark/scenarios/blimp_scenario.py +1 -1
  248. helm/benchmark/scenarios/bold_scenario.py +1 -1
  249. helm/benchmark/scenarios/boolq_scenario.py +1 -1
  250. helm/benchmark/scenarios/casehold_scenario.py +79 -0
  251. helm/benchmark/scenarios/chw_care_plan_scenario.py +105 -0
  252. helm/benchmark/scenarios/civil_comments_scenario.py +1 -1
  253. helm/benchmark/scenarios/clear_scenario.py +153 -0
  254. helm/benchmark/scenarios/cleva_scenario.py +2 -2
  255. helm/benchmark/scenarios/code_scenario.py +17 -4
  256. helm/benchmark/scenarios/commonsense_scenario.py +1 -1
  257. helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +97 -0
  258. helm/benchmark/scenarios/copyright_scenario.py +1 -1
  259. helm/benchmark/scenarios/covid_dialog_scenario.py +10 -1
  260. helm/benchmark/scenarios/cti_to_mitre_scenario.py +240 -0
  261. helm/benchmark/scenarios/custom_mcqa_scenario.py +1 -1
  262. helm/benchmark/scenarios/czech_bank_qa_scenario.py +130 -0
  263. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +1 -1
  264. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +1 -1
  265. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +1 -1
  266. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +1 -1
  267. helm/benchmark/scenarios/dialogue_scenarios.py +13 -2
  268. helm/benchmark/scenarios/dischargeme_scenario.py +157 -0
  269. helm/benchmark/scenarios/disinformation_scenario.py +10 -1
  270. helm/benchmark/scenarios/dyck_language_scenario.py +10 -1
  271. helm/benchmark/scenarios/echr_judgment_classification_scenario.py +113 -0
  272. helm/benchmark/scenarios/ehr_sql_scenario.py +131 -0
  273. helm/benchmark/scenarios/ehrshot_scenario.py +1546 -0
  274. helm/benchmark/scenarios/enem_challenge_scenario.py +58 -0
  275. helm/benchmark/scenarios/entity_data_imputation_scenario.py +11 -1
  276. helm/benchmark/scenarios/entity_matching_scenario.py +12 -2
  277. helm/benchmark/scenarios/financial_phrasebank_scenario.py +94 -0
  278. helm/benchmark/scenarios/gold_commodity_news_scenario.py +124 -0
  279. helm/benchmark/scenarios/gpqa_scenario.py +80 -0
  280. helm/benchmark/scenarios/grammar_scenario.py +2 -2
  281. helm/benchmark/scenarios/gsm_scenario.py +10 -1
  282. helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +50 -0
  283. helm/benchmark/scenarios/harm_bench_scenario.py +1 -1
  284. helm/benchmark/scenarios/headqa_scenario.py +131 -0
  285. helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +37 -0
  286. helm/benchmark/scenarios/ice_scenario.py +8 -4
  287. helm/benchmark/scenarios/ifeval_scenario.py +53 -0
  288. helm/benchmark/scenarios/imdb_ptbr_scenario.py +60 -0
  289. helm/benchmark/scenarios/imdb_scenario.py +11 -2
  290. helm/benchmark/scenarios/infinite_bench_sum_scenario.py +82 -0
  291. helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py +2 -2
  292. helm/benchmark/scenarios/koala_scenario.py +1 -1
  293. helm/benchmark/scenarios/legal_contract_summarization_scenario.py +129 -0
  294. helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py +77 -0
  295. helm/benchmark/scenarios/legal_summarization_scenario.py +11 -1
  296. helm/benchmark/scenarios/legal_support_scenario.py +11 -1
  297. helm/benchmark/scenarios/legalbench_scenario.py +22 -3
  298. helm/benchmark/scenarios/lex_glue_scenario.py +12 -2
  299. helm/benchmark/scenarios/lextreme_scenario.py +11 -1
  300. helm/benchmark/scenarios/live_qa_scenario.py +1 -1
  301. helm/benchmark/scenarios/lm_entry_scenario.py +1 -1
  302. helm/benchmark/scenarios/lsat_qa_scenario.py +1 -1
  303. helm/benchmark/scenarios/math_scenario.py +9 -1
  304. helm/benchmark/scenarios/me_q_sum_scenario.py +10 -1
  305. helm/benchmark/scenarios/med_dialog_scenario.py +22 -24
  306. helm/benchmark/scenarios/med_mcqa_scenario.py +10 -1
  307. helm/benchmark/scenarios/med_paragraph_simplification_scenario.py +10 -1
  308. helm/benchmark/scenarios/med_qa_scenario.py +10 -1
  309. helm/benchmark/scenarios/medalign_scenario.py +88 -0
  310. helm/benchmark/scenarios/medalign_scenario_helper.py +429 -0
  311. helm/benchmark/scenarios/medbullets_scenario.py +140 -0
  312. helm/benchmark/scenarios/medcalc_bench_scenario.py +125 -0
  313. helm/benchmark/scenarios/medec_scenario.py +120 -0
  314. helm/benchmark/scenarios/medhallu_scenario.py +66 -0
  315. helm/benchmark/scenarios/medi_qa_scenario.py +105 -0
  316. helm/benchmark/scenarios/medication_qa_scenario.py +2 -2
  317. helm/benchmark/scenarios/mental_health_scenario.py +112 -0
  318. helm/benchmark/scenarios/mimic_bhc_scenario.py +98 -0
  319. helm/benchmark/scenarios/mimic_rrs_scenario.py +89 -0
  320. helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +71 -0
  321. helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py +74 -0
  322. helm/benchmark/scenarios/mmlu_pro_scenario.py +95 -0
  323. helm/benchmark/scenarios/mmlu_scenario.py +11 -1
  324. helm/benchmark/scenarios/msmarco_scenario.py +1 -1
  325. helm/benchmark/scenarios/mtsamples_procedures_scenario.py +141 -0
  326. helm/benchmark/scenarios/mtsamples_replicate_scenario.py +141 -0
  327. helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +271 -0
  328. helm/benchmark/scenarios/narrativeqa_scenario.py +1 -1
  329. helm/benchmark/scenarios/natural_qa_scenario.py +1 -1
  330. helm/benchmark/scenarios/newsqa_scenario.py +1 -1
  331. helm/benchmark/scenarios/numeracy_scenario.py +10 -1
  332. helm/benchmark/scenarios/oab_exams_scenario.py +57 -0
  333. helm/benchmark/scenarios/omni_math_scenario.py +53 -0
  334. helm/benchmark/scenarios/open_assistant_scenario.py +11 -2
  335. helm/benchmark/scenarios/opinions_qa_scenario.py +1 -1
  336. helm/benchmark/scenarios/pubmed_qa_scenario.py +54 -43
  337. helm/benchmark/scenarios/quac_scenario.py +10 -1
  338. helm/benchmark/scenarios/race_based_med_scenario.py +142 -0
  339. helm/benchmark/scenarios/raft_scenario.py +18 -3
  340. helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +1 -1
  341. helm/benchmark/scenarios/ruler_qa_scenario_helper.py +171 -0
  342. helm/benchmark/scenarios/ruler_qa_scenarios.py +88 -0
  343. helm/benchmark/scenarios/scenario.py +9 -1
  344. helm/benchmark/scenarios/{bhasa_scenario.py → seahelm_scenario.py} +233 -84
  345. helm/benchmark/scenarios/self_instruct_scenario.py +1 -1
  346. helm/benchmark/scenarios/shc_bmt_scenario.py +69 -0
  347. helm/benchmark/scenarios/shc_cdi_scenario.py +70 -0
  348. helm/benchmark/scenarios/shc_conf_scenario.py +70 -0
  349. helm/benchmark/scenarios/shc_ent_scenario.py +72 -0
  350. helm/benchmark/scenarios/shc_gip_scenario.py +66 -0
  351. helm/benchmark/scenarios/shc_ptbm_scenario.py +76 -0
  352. helm/benchmark/scenarios/shc_sei_scenario.py +89 -0
  353. helm/benchmark/scenarios/shc_sequoia_scenario.py +69 -0
  354. helm/benchmark/scenarios/simple_safety_tests_scenario.py +1 -1
  355. helm/benchmark/scenarios/spider_scenario.py +91 -0
  356. helm/benchmark/scenarios/starr_patient_instructions_scenario.py +90 -0
  357. helm/benchmark/scenarios/summarization_scenario.py +11 -1
  358. helm/benchmark/scenarios/sumosum_scenario.py +157 -0
  359. helm/benchmark/scenarios/synthetic_efficiency_scenario.py +1 -1
  360. helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +11 -1
  361. helm/benchmark/scenarios/synthetic_reasoning_scenario.py +11 -1
  362. helm/benchmark/scenarios/test_bigcodebench_scenario.py +26 -0
  363. helm/benchmark/scenarios/test_czech_bank_qa_scenario.py +18 -0
  364. helm/benchmark/scenarios/test_enem_challenge_scenario.py +53 -0
  365. helm/benchmark/scenarios/test_ewok_scenario.py +6 -2
  366. helm/benchmark/scenarios/test_gold_commodity_news_scenario.py +18 -0
  367. helm/benchmark/scenarios/test_gpqa_scenario.py +44 -0
  368. helm/benchmark/scenarios/test_ifeval_scenario.py +36 -0
  369. helm/benchmark/scenarios/test_imdb_ptbr_scenario.py +27 -0
  370. helm/benchmark/scenarios/test_infinite_bench_sum_scenario.py +46 -0
  371. helm/benchmark/scenarios/test_math_scenario.py +1 -0
  372. helm/benchmark/scenarios/test_mmlu_clinical_afr_scenario.py +21 -0
  373. helm/benchmark/scenarios/test_mmlu_pro_scenario.py +53 -0
  374. helm/benchmark/scenarios/test_oab_exams_scenario.py +51 -0
  375. helm/benchmark/scenarios/test_omni_math_scenario.py +27 -0
  376. helm/benchmark/scenarios/test_tweetsentbr_scenario.py +24 -0
  377. helm/benchmark/scenarios/test_wildbench_scenario.py +15 -0
  378. helm/benchmark/scenarios/test_winogrande_afr_scenario.py +19 -0
  379. helm/benchmark/scenarios/thai_exam_scenario.py +10 -1
  380. helm/benchmark/scenarios/the_pile_scenario.py +1 -1
  381. helm/benchmark/scenarios/truthful_qa_scenario.py +10 -1
  382. helm/benchmark/scenarios/tweetsentbr_scenario.py +66 -0
  383. helm/benchmark/scenarios/twitter_aae_scenario.py +1 -1
  384. helm/benchmark/scenarios/unitxt_scenario.py +8 -2
  385. helm/benchmark/scenarios/verifiability_judgment_scenario.py +1 -1
  386. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  387. helm/benchmark/scenarios/vision_language/blink_scenario.py +140 -0
  388. helm/benchmark/scenarios/vision_language/mm_star_scenario.py +95 -0
  389. helm/benchmark/scenarios/vision_language/vqa_rad_scenario.py +88 -0
  390. helm/benchmark/scenarios/wikifact_scenario.py +11 -1
  391. helm/benchmark/scenarios/wikitext_103_scenario.py +1 -1
  392. helm/benchmark/scenarios/wildbench_scenario.py +83 -0
  393. helm/benchmark/scenarios/winogrande_afr_scenario.py +78 -0
  394. helm/benchmark/scenarios/wmt_14_scenario.py +14 -2
  395. helm/benchmark/scenarios/xstest_scenario.py +1 -1
  396. helm/benchmark/server.py +11 -0
  397. helm/benchmark/slurm_runner.py +1 -1
  398. helm/benchmark/static/schema_audio.yaml +752 -0
  399. helm/benchmark/static/schema_autobencher.yaml +150 -0
  400. helm/benchmark/static/schema_call_center.yaml +97 -60
  401. helm/benchmark/static/schema_capabilities.yaml +254 -0
  402. helm/benchmark/static/schema_czech_bank.yaml +148 -0
  403. helm/benchmark/static/schema_enem_challenge.yaml +146 -0
  404. helm/benchmark/static/schema_enterprise.yaml +298 -0
  405. helm/benchmark/static/schema_finance.yaml +14 -12
  406. helm/benchmark/static/schema_heim.yaml +1389 -0
  407. helm/benchmark/static/schema_legal.yaml +566 -0
  408. helm/benchmark/static/{schema_medical.yaml → schema_long_context.yaml} +67 -82
  409. helm/benchmark/static/schema_medhelm.yaml +1081 -0
  410. helm/benchmark/static/schema_mmlu_winogrande_afr.yaml +1045 -0
  411. helm/benchmark/static/schema_safety.yaml +42 -6
  412. helm/benchmark/static/{schema_bhasa.yaml → schema_seahelm.yaml} +40 -26
  413. helm/benchmark/static/schema_social_audio.yaml +224 -0
  414. helm/benchmark/static/schema_sql.yaml +171 -0
  415. helm/benchmark/static/{schema_tables.yaml → schema_torr.yaml} +187 -30
  416. helm/benchmark/static/schema_tweetsentbr.yaml +146 -0
  417. helm/benchmark/static/schema_vhelm.yaml +151 -47
  418. helm/benchmark/static_build/assets/helm-safety-2907a7b6.png +0 -0
  419. helm/benchmark/static_build/assets/index-262903c1.js +10 -0
  420. helm/benchmark/static_build/assets/index-42060d71.css +1 -0
  421. helm/benchmark/static_build/assets/medhelm-overview-3ddfcd65.png +0 -0
  422. helm/benchmark/static_build/assets/{react-d4a0b69b.js → react-f82877fd.js} +1 -1
  423. helm/benchmark/static_build/assets/{recharts-6d337683.js → recharts-4037aff0.js} +1 -1
  424. helm/benchmark/static_build/assets/{tremor-54a99cc4.js → tremor-9cefc3c5.js} +1 -1
  425. helm/benchmark/static_build/assets/vhelm-aspects-1437d673.png +0 -0
  426. helm/benchmark/static_build/assets/vhelm-framework-a1ca3f3f.png +0 -0
  427. helm/benchmark/static_build/assets/vhelm-model-8afb7616.png +0 -0
  428. helm/benchmark/static_build/config.js +1 -1
  429. helm/benchmark/static_build/index.html +5 -5
  430. helm/benchmark/window_services/default_window_service.py +1 -1
  431. helm/benchmark/window_services/encoder_decoder_window_service.py +1 -1
  432. helm/benchmark/window_services/ice_window_service.py +1 -1
  433. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +1 -1
  434. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +1 -1
  435. helm/benchmark/window_services/local_window_service.py +2 -2
  436. helm/benchmark/window_services/test_anthropic_window_service.py +3 -3
  437. helm/benchmark/window_services/test_bloom_window_service.py +3 -3
  438. helm/benchmark/window_services/test_gpt2_window_service.py +7 -2
  439. helm/benchmark/window_services/test_gpt4_window_service.py +8 -3
  440. helm/benchmark/window_services/test_gptj_window_service.py +8 -3
  441. helm/benchmark/window_services/test_gptneox_window_service.py +3 -3
  442. helm/benchmark/window_services/test_openai_window_service.py +8 -3
  443. helm/benchmark/window_services/test_opt_window_service.py +3 -3
  444. helm/benchmark/window_services/test_palmyra_window_service.py +3 -3
  445. helm/benchmark/window_services/test_t0pp_window_service.py +3 -3
  446. helm/benchmark/window_services/test_t511b_window_service.py +3 -3
  447. helm/benchmark/window_services/test_ul2_window_service.py +3 -3
  448. helm/benchmark/window_services/test_utils.py +1 -1
  449. helm/benchmark/window_services/test_yalm_window_service.py +3 -3
  450. helm/benchmark/window_services/tokenizer_service.py +0 -5
  451. helm/benchmark/window_services/yalm_window_service.py +1 -1
  452. helm/clients/ai21_client.py +3 -3
  453. helm/clients/aleph_alpha_client.py +1 -1
  454. helm/clients/audio_language/__init__.py +0 -0
  455. helm/clients/audio_language/diva_llama_client.py +118 -0
  456. helm/clients/audio_language/llama_omni_client.py +198 -0
  457. helm/clients/audio_language/qwen2_audiolm_client.py +188 -0
  458. helm/clients/audio_language/qwen_audiolm_client.py +150 -0
  459. helm/clients/auto_client.py +4 -2
  460. helm/clients/azure_openai_client.py +55 -0
  461. helm/clients/bedrock_client.py +201 -7
  462. helm/clients/bedrock_utils.py +33 -0
  463. helm/clients/clip_scorers/clip_scorer.py +1 -1
  464. helm/clients/clip_scorers/multilingual_clip_scorer.py +1 -1
  465. helm/clients/cohere_client.py +3 -3
  466. helm/clients/google_client.py +1 -1
  467. helm/clients/http_model_client.py +1 -1
  468. helm/clients/huggingface_client.py +10 -18
  469. helm/clients/ibm_client.py +267 -0
  470. helm/clients/image_generation/adobe_vision_client.py +1 -1
  471. helm/clients/image_generation/aleph_alpha_image_generation_client.py +1 -1
  472. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +3 -3
  473. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +5 -2
  474. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +5 -2
  475. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +2 -2
  476. helm/clients/image_generation/cogview2_client.py +1 -1
  477. helm/clients/image_generation/dalle2_client.py +1 -1
  478. helm/clients/image_generation/dalle3_client.py +2 -2
  479. helm/clients/image_generation/dalle_mini/__init__.py +1 -1
  480. helm/clients/image_generation/dalle_mini/data.py +1 -1
  481. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -5
  482. helm/clients/image_generation/dalle_mini/model/configuration.py +1 -1
  483. helm/clients/image_generation/dalle_mini/model/modeling.py +2 -2
  484. helm/clients/image_generation/dalle_mini/model/processor.py +4 -4
  485. helm/clients/image_generation/dalle_mini/model/tokenizer.py +1 -1
  486. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -1
  487. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +2 -2
  488. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +1 -1
  489. helm/clients/image_generation/dalle_mini_client.py +1 -1
  490. helm/clients/image_generation/deep_floyd_client.py +1 -1
  491. helm/clients/image_generation/huggingface_diffusers_client.py +1 -1
  492. helm/clients/image_generation/lexica_client.py +1 -1
  493. helm/clients/image_generation/mindalle/models/__init__.py +6 -6
  494. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +1 -1
  495. helm/clients/image_generation/mindalle/models/stage2/transformer.py +1 -1
  496. helm/clients/image_generation/mindalle/utils/__init__.py +3 -3
  497. helm/clients/image_generation/mindalle_client.py +1 -1
  498. helm/clients/image_generation/together_image_generation_client.py +1 -1
  499. helm/clients/lit_gpt_client.py +2 -2
  500. helm/clients/mistral_client.py +62 -18
  501. helm/clients/nvidia_nim_client.py +0 -3
  502. helm/clients/openai_client.py +255 -21
  503. helm/clients/palmyra_client.py +2 -6
  504. helm/clients/reka_client.py +1 -1
  505. helm/clients/stanfordhealthcare_azure_openai_client.py +58 -0
  506. helm/clients/stanfordhealthcare_claude_client.py +31 -0
  507. helm/clients/stanfordhealthcare_google_client.py +43 -0
  508. helm/clients/stanfordhealthcare_http_model_client.py +93 -0
  509. helm/clients/stanfordhealthcare_openai_client.py +62 -0
  510. helm/clients/stanfordhealthcare_shc_openai_client.py +42 -0
  511. helm/clients/test_client.py +1 -1
  512. helm/clients/test_together_client.py +6 -1
  513. helm/clients/together_client.py +69 -7
  514. helm/clients/upstage_client.py +23 -0
  515. helm/clients/vertexai_client.py +39 -13
  516. helm/clients/vision_language/open_flamingo/__init__.py +2 -2
  517. helm/clients/vision_language/open_flamingo/src/factory.py +3 -3
  518. helm/clients/vision_language/open_flamingo/src/flamingo.py +2 -2
  519. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +2 -2
  520. helm/clients/vision_language/qwen2_vlm_client.py +175 -0
  521. helm/clients/vllm_client.py +4 -6
  522. helm/clients/yi_client.py +0 -3
  523. helm/common/audio_utils.py +111 -0
  524. helm/common/cache.py +8 -30
  525. helm/common/file_caches/local_file_cache.py +1 -1
  526. helm/common/file_caches/test_local_file_cache.py +1 -1
  527. helm/common/images_utils.py +2 -2
  528. helm/common/key_value_store.py +9 -9
  529. helm/common/media_object.py +2 -2
  530. helm/common/mongo_key_value_store.py +3 -3
  531. helm/common/multimodal_request_utils.py +26 -0
  532. helm/common/reeval_parameters.py +12 -0
  533. helm/common/request.py +6 -2
  534. helm/common/response_format.py +18 -0
  535. helm/common/test_cache.py +1 -48
  536. helm/common/test_media_object.py +1 -1
  537. helm/common/tokenization_request.py +0 -9
  538. helm/config/model_deployments.yaml +1258 -33
  539. helm/config/model_metadata.yaml +1110 -41
  540. helm/config/tokenizer_configs.yaml +403 -3
  541. helm/proxy/cli.py +2 -2
  542. helm/proxy/example_queries.py +1 -1
  543. helm/proxy/server.py +11 -13
  544. helm/proxy/services/remote_service.py +1 -7
  545. helm/proxy/services/server_service.py +6 -19
  546. helm/proxy/services/service.py +0 -6
  547. helm/proxy/services/test_remote_service.py +2 -2
  548. helm/proxy/services/test_service.py +1 -1
  549. helm/proxy/static/general.js +122 -0
  550. helm/proxy/static/help.html +99 -0
  551. helm/proxy/static/index.css +57 -0
  552. helm/proxy/static/index.html +40 -0
  553. helm/proxy/static/index.js +456 -0
  554. helm/proxy/static/info-icon.png +0 -0
  555. helm/proxy/test_retry.py +1 -1
  556. helm/proxy/token_counters/auto_token_counter.py +1 -1
  557. helm/tokenizers/aleph_alpha_tokenizer.py +1 -1
  558. helm/tokenizers/caching_tokenizer.py +2 -30
  559. helm/tokenizers/http_model_tokenizer.py +1 -1
  560. helm/tokenizers/huggingface_tokenizer.py +2 -2
  561. helm/tokenizers/lit_gpt_tokenizer.py +1 -1
  562. helm/tokenizers/test_anthropic_tokenizer.py +6 -2
  563. helm/tokenizers/test_huggingface_tokenizer.py +1 -1
  564. helm/tokenizers/test_yalm_tokenizer.py +1 -1
  565. helm/tokenizers/tiktoken_tokenizer.py +1 -1
  566. helm/tokenizers/tokenizer.py +3 -1
  567. helm/tokenizers/yalm_tokenizer.py +3 -3
  568. helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py +1 -1
  569. crfm_helm-0.5.3.dist-info/METADATA +0 -355
  570. crfm_helm-0.5.3.dist-info/RECORD +0 -699
  571. helm/benchmark/data_overlap/data_overlap_spec.py +0 -86
  572. helm/benchmark/data_overlap/export_scenario_text.py +0 -119
  573. helm/benchmark/data_overlap/light_scenario.py +0 -60
  574. helm/benchmark/metrics/bhasa_metrics_specs.py +0 -10
  575. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  576. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  577. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  578. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  579. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  580. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  581. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  582. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  583. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  584. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  585. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  586. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  587. helm/benchmark/static_build/assets/index-05c76bb1.css +0 -1
  588. helm/benchmark/static_build/assets/index-58f97dcd.js +0 -10
  589. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  590. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  591. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  592. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  593. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  594. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  595. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  596. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  597. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  598. helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  599. helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  600. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  601. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  602. helm/tokenizers/anthropic_tokenizer.py +0 -52
  603. {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info}/entry_points.txt +0 -0
  604. {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info/licenses}/LICENSE +0 -0
  605. {crfm_helm-0.5.3.dist-info → crfm_helm-0.5.5.dist-info}/top_level.txt +0 -0
  606. /helm/benchmark/{data_overlap → metrics/ifeval}/__init__.py +0 -0
@@ -18,7 +18,7 @@ models:
18
18
  access: open
19
19
  release_date: 2023-01-01
20
20
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
21
-
21
+
22
22
  # Adobe
23
23
  - name: adobe/giga-gan
24
24
  display_name: GigaGAN (1B)
@@ -128,7 +128,7 @@ models:
128
128
 
129
129
  # AI Singapore
130
130
  - name: aisingapore/sea-lion-7b
131
- display_name: SEA-LION (7B)
131
+ display_name: SEA-LION 7B
132
132
  description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
133
133
  creator_organization_name: AI Singapore
134
134
  access: open
@@ -137,7 +137,7 @@ models:
137
137
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
138
138
 
139
139
  - name: aisingapore/sea-lion-7b-instruct
140
- display_name: SEA-LION Instruct (7B)
140
+ display_name: SEA-LION 7B Instruct
141
141
  description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
142
142
  creator_organization_name: AI Singapore
143
143
  access: open
@@ -145,6 +145,77 @@ models:
145
145
  release_date: 2023-02-24
146
146
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
147
147
 
148
+ - name: aisingapore/llama3-8b-cpt-sea-lionv2-base
149
+ display_name: Llama3 8B CPT SEA-LIONv2
150
+ description: Llama3 8B CPT SEA-LIONv2 is a multilingual model which was continued pre-trained on 48B additional tokens, including tokens in Southeast Asian languages.
151
+ creator_organization_name: AI Singapore
152
+ access: open
153
+ num_parameters: 8030000000
154
+ release_date: 2024-07-31
155
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
156
+
157
+ - name: aisingapore/llama3-8b-cpt-sea-lionv2.1-instruct
158
+ display_name: Llama3 8B CPT SEA-LIONv2.1 Instruct
159
+ description: Llama3 8B CPT SEA-LIONv2.1 Instruct is a multilingual model which has been fine-tuned with around 100,000 English instruction-completion pairs alongside a smaller pool of around 50,000 instruction-completion pairs from other Southeast Asian languages, such as Indonesian, Thai and Vietnamese.
160
+ creator_organization_name: AI Singapore
161
+ access: open
162
+ num_parameters: 8030000000
163
+ release_date: 2024-08-21
164
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
165
+
166
+ - name: aisingapore/gemma2-9b-cpt-sea-lionv3-base
167
+ display_name: Gemma2 9B CPT SEA-LIONv3
168
+ description: Gemma2 9B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across the 11 official Southeast Asian languages, such as English, Chinese, Vietnamese, Indonesian, Thai, Tamil, Filipino, Malay, Khmer, Lao, Burmese.
169
+ creator_organization_name: AI Singapore
170
+ access: open
171
+ num_parameters: 9240000000
172
+ release_date: 2024-10-30
173
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
174
+
175
+ - name: aisingapore/gemma2-9b-cpt-sea-lionv3-instruct
176
+ display_name: Gemma2 9B CPT SEA-LIONv3 Instruct
177
+ description: Gemma2 9B CPT SEA-LIONv3 Instruct is a multilingual model which has been fine-tuned with around 500,000 English instruction-completion pairs alongside a larger pool of around 1,000,000 instruction-completion pairs from other ASEAN languages, such as Indonesian, Thai and Vietnamese.
178
+ creator_organization_name: AI Singapore
179
+ access: open
180
+ num_parameters: 9240000000
181
+ release_date: 2024-10-30
182
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
183
+
184
+ - name: aisingapore/llama3.1-8b-cpt-sea-lionv3-base
185
+ display_name: Llama3.1 8B CPT SEA-LIONv3
186
+ description: Llama3.1 8B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across 11 SEA languages, such as Burmese, Chinese, English, Filipino, Indonesia, Khmer, Lao, Malay, Tamil, Thai and Vietnamese.
187
+ creator_organization_name: AI Singapore
188
+ access: open
189
+ num_parameters: 9240000000
190
+ release_date: 2024-12-11
191
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
192
+
193
+ - name: aisingapore/llama3.1-8b-cpt-sea-lionv3-instruct
194
+ display_name: Llama3.1 8B CPT SEA-LIONv3 Instruct
195
+ description: Llama3.1 8B CPT SEA-LIONv3 Instruct is a multilingual model that has been fine-tuned in two stages on approximately 12.3M English instruction-completion pairs alongside a pool of 4.5M Southeast Asian instruction-completion pairs from SEA languages such as Indonesian, Javanese, Sundanese, Tamil, Thai and Vietnamese.
196
+ creator_organization_name: AI Singapore
197
+ access: open
198
+ num_parameters: 9240000000
199
+ release_date: 2024-12-11
200
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
201
+
202
+ - name: aisingapore/llama3.1-70b-cpt-sea-lionv3-base
203
+ display_name: Llama3.1 70B CPT SEA-LIONv3
204
+ description: Llama3.1 70B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across 11 SEA languages, such as Burmese, Chinese, English, Filipino, Indonesia, Khmer, Lao, Malay, Tamil, Thai and Vietnamese.
205
+ creator_organization_name: AI Singapore
206
+ access: open
207
+ num_parameters: 70600000000
208
+ release_date: 2024-12-11
209
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
210
+
211
+ - name: aisingapore/llama3.1-70b-cpt-sea-lionv3-instruct
212
+ display_name: Llama3.1 70B CPT SEA-LIONv3 Instruct
213
+ description: Llama3.1 70B CPT SEA-LIONv3 Instruct is a multilingual model that has been fine-tuned in two stages on approximately 12.3M English instruction-completion pairs alongside a pool of 4.5M Southeast Asian instruction-completion pairs from SEA languages such as Indonesian, Javanese, Sundanese, Tamil, Thai, and Vietnamese.
214
+ creator_organization_name: AI Singapore
215
+ access: open
216
+ num_parameters: 70600000000
217
+ release_date: 2024-12-11
218
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
148
219
 
149
220
  # Aleph Alpha
150
221
  # Aleph Alpha's Luminous models: https://docs.aleph-alpha.com/docs/introduction/luminous
@@ -202,7 +273,34 @@ models:
202
273
  tags: [TEXT_TO_IMAGE_MODEL_TAG]
203
274
 
204
275
 
205
- # Amazon
276
+ # Amazon Nova models
277
+ # References for Amazon Nova models:
278
+ # https://aws.amazon.com/ai/generative-ai/nova/
279
+ - name: amazon/nova-pro-v1:0
280
+ display_name: Amazon Nova Pro
281
+ description: Amazon Nova Pro Model
282
+ creator_organization_name: Amazon
283
+ access: limited
284
+ release_date: 2024-12-03
285
+ tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
286
+
287
+ - name: amazon/nova-lite-v1:0
288
+ display_name: Amazon Nova Lite
289
+ description: Amazon Nova Lite Model
290
+ creator_organization_name: Amazon
291
+ access: limited
292
+ release_date: 2024-12-03
293
+ tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
294
+
295
+ - name: amazon/nova-micro-v1:0
296
+ display_name: Amazon Nova Micro
297
+ description: Amazon Nova Micro Model
298
+ creator_organization_name: Amazon
299
+ access: limited
300
+ release_date: 2024-12-03
301
+ tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
302
+
303
+ # Titan Models
206
304
  # References for Amazon Titan models:
207
305
  # - https://aws.amazon.com/bedrock/titan/
208
306
  # - https://community.aws/content/2ZUVD3fkNtqEOYIa2iUJAFArS7c/family-of-titan-text-models---cli-demo
@@ -213,16 +311,8 @@ models:
213
311
  creator_organization_name: Amazon
214
312
  access: limited
215
313
  release_date: 2023-11-29
216
- tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
217
-
218
- - name: amazon/titan-tg1-large
219
- display_name: Amazon Titan Large
220
- description: Amazon Titan Large is efficient model perfect for fine-tuning English-language tasks like summarization, create article, marketing campaign.
221
- creator_organization_name: Amazon
222
- access: limited
223
- release_date: 2023-11-29
224
- tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
225
-
314
+ tags: [BEDROCK_MODEL_TAG,TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
315
+
226
316
  - name: amazon/titan-text-express-v1
227
317
  display_name: Amazon Titan Text Express
228
318
  description: Amazon Titan Text Express, with a context length of up to 8,000 tokens, excels in advanced language tasks like open-ended text generation and conversational chat. It's also optimized for Retrieval Augmented Generation (RAG). Initially designed for English, the model offers preview multilingual support for over 100 additional languages.
@@ -231,6 +321,93 @@ models:
231
321
  release_date: 2023-11-29
232
322
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
233
323
 
324
+ # Mistral Models on Bedrock
325
+ # References for Mistral on Amazon Bedrock
326
+ # https://aws.amazon.com/bedrock/mistral/
327
+
328
+ - name: mistralai/amazon-mistral-7b-instruct-v0:2
329
+ display_name: Mistral 7B Instruct on Amazon Bedrock
330
+ description: A 7B dense Transformer, fast-deployed and easily customisable. Small, yet powerful for a variety of use cases. Supports English and code, and a 32k context window.
331
+ creator_organization_name: Mistral
332
+ access: limited
333
+ release_date: 2024-03-23
334
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
335
+
336
+ - name: mistralai/amazon-mixtral-8x7b-instruct-v0:1
337
+ display_name: Mixtral 8x7B Instruct on Amazon Bedrock
338
+ description: A 7B sparse Mixture-of-Experts model with stronger capabilities than Mistral 7B. Uses 12B active parameters out of 45B total. Supports multiple languages, code and 32k context window.
339
+ creator_organization_name: Mistral
340
+ access: limited
341
+ release_date: 2023-12-11
342
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
343
+
344
+ - name: mistralai/amazon-mistral-large-2402-v1:0
345
+ display_name: Mistral Large(2402) on Amazon Bedrock
346
+ description: The most advanced Mistral AI Large Language model capable of handling any language task including complex multilingual reasoning, text understanding, transformation, and code generation.
347
+ creator_organization_name: Mistral
348
+ access: limited
349
+ release_date: 2023-07-26
350
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
351
+
352
+ - name: mistralai/amazon-mistral-small-2402-v1:0
353
+ display_name: Mistral Small on Amazon Bedrock
354
+ description: Mistral Small is perfectly suited for straightforward tasks that can be performed in bulk, such as classification, customer support, or text generation. It provides outstanding performance at a cost-effective price point.
355
+ creator_organization_name: Mistral
356
+ access: limited
357
+ release_date: 2023-02-26
358
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
359
+
360
+ - name: mistralai/amazon-mistral-large-2407-v1:0
361
+ display_name: Mistral Large(2407) on Amazon Bedrock
362
+ description: Mistral Large 2407 is an advanced Large Language Model (LLM) that supports dozens of languages and is trained on 80+ coding languages. It has best-in-class agentic capabilities with native function calling JSON outputting and reasoning capabilities.
363
+ creator_organization_name: Mistral
364
+ access: limited
365
+ release_date: 2024-07-24
366
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
367
+
368
+ # Llama3 on Amazon Bedrock
369
+ # References for Llama3 on Amazon Bedrock
370
+ # https://aws.amazon.com/bedrock/llama/
371
+
372
+ - name: meta/amazon-llama3-8b-instruct-v1:0
373
+ display_name: Llama 3 8B Instruct on Amazon Bedrock
374
+ description: Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for limited computational power and resources, edge devices, and faster training times.
375
+ creator_organization_name: Meta
376
+ access: limited
377
+ release_date: 2024-04-23
378
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
379
+
380
+ - name: meta/amazon-llama3-70b-instruct-v1:0
381
+ display_name: Llama 3 70B Instruct on Amazon Bedrock
382
+ description: Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for content creation, conversational AI, language understanding, R&D, and Enterprise applications.
383
+ creator_organization_name: Meta
384
+ access: limited
385
+ release_date: 2024-04-23
386
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
387
+
388
+ - name: meta/amazon-llama3-1-405b-instruct-v1:0
389
+ display_name: Llama 3.1 405b Instruct on Amazon Bedrock.
390
+ description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
391
+ creator_organization_name: Meta
392
+ access: limited
393
+ release_date: 2024-07-26
394
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
395
+
396
+ - name: meta/amazon-llama3-1-70b-instruct-v1:0
397
+ display_name: Llama 3.1 70b Instruct on Amazon Bedrock.
398
+ description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
399
+ creator_organization_name: Meta
400
+ access: limited
401
+ release_date: 2024-07-26
402
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
403
+
404
+ - name: meta/amazon-llama3-1-8b-instruct-v1:0
405
+ display_name: Llama 3.1 8b Instruct on Amazon Bedrock.
406
+ description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
407
+ creator_organization_name: Meta
408
+ access: limited
409
+ release_date: 2024-07-26
410
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
234
411
 
235
412
  # Anthropic
236
413
  - name: anthropic/claude-v1.3
@@ -298,6 +475,14 @@ models:
298
475
  release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
299
476
  tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
300
477
 
478
+ - name: anthropic/claude-3-5-haiku-20241022
479
+ display_name: Claude 3.5 Haiku (20241022)
480
+ description: Claude 3.5 Haiku is a Claude 3 family model which matches the performance of Claude 3 Opus at a similar speed to the previous generation of Haiku ([blog](https://www.anthropic.com/news/3-5-models-and-computer-use)).
481
+ creator_organization_name: Anthropic
482
+ access: limited
483
+ release_date: 2024-11-04 # Released after the blog post
484
+ tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
485
+
301
486
  - name: anthropic/claude-3-5-sonnet-20240620
302
487
  display_name: Claude 3.5 Sonnet (20240620)
303
488
  description: Claude 3.5 Sonnet is a Claude 3 family model which outperforms Claude 3 Opus while operating faster and at a lower cost. ([blog](https://www.anthropic.com/news/claude-3-5-sonnet))
@@ -306,6 +491,22 @@ models:
306
491
  release_date: 2024-06-20
307
492
  tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
308
493
 
494
+ - name: anthropic/claude-3-5-sonnet-20241022
495
+ display_name: Claude 3.5 Sonnet (20241022)
496
+ description: Claude 3.5 Sonnet is a Claude 3 family model which outperforms Claude 3 Opus while operating faster and at a lower cost ([blog](https://www.anthropic.com/news/claude-3-5-sonnet)). This is an upgraded snapshot released on 2024-10-22 ([blog](https://www.anthropic.com/news/3-5-models-and-computer-use)).
497
+ creator_organization_name: Anthropic
498
+ access: limited
499
+ release_date: 2024-10-22
500
+ tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
501
+
502
+ - name: anthropic/claude-3-7-sonnet-20250219
503
+ display_name: Claude 3.7 Sonnet (20250219)
504
+ description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)).
505
+ creator_organization_name: Anthropic
506
+ access: limited
507
+ release_date: 2025-02-24
508
+ tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
509
+
309
510
  - name: anthropic/stanford-online-all-v4-s3
310
511
  display_name: Anthropic-LM v4-s3 (52B)
311
512
  description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
@@ -325,7 +526,7 @@ models:
325
526
  access: open
326
527
  num_parameters: 13000000000
327
528
  release_date: 2022-04-03
328
- tags: [] # TODO: add tags
529
+ tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
329
530
 
330
531
 
331
532
 
@@ -346,7 +547,7 @@ models:
346
547
  access: open
347
548
  num_parameters: 176000000000
348
549
  release_date: 2022-11-03
349
- tags: [] # TODO: add tags
550
+ tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
350
551
 
351
552
  - name: bigscience/t0pp
352
553
  display_name: T0pp (11B)
@@ -401,7 +602,7 @@ models:
401
602
  access: limited
402
603
  num_parameters: 6700000000
403
604
  release_date: 2023-04-06
404
- tags: [] # TODO: add tags
605
+ tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
405
606
 
406
607
  - name: cerebras/cerebras-gpt-13b # NOT SUPPORTED
407
608
  display_name: Cerebras GPT (13B)
@@ -410,7 +611,7 @@ models:
410
611
  access: limited
411
612
  num_parameters: 13000000000
412
613
  release_date: 2023-04-06
413
- tags: [] # TODO: add tags
614
+ tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
414
615
 
415
616
 
416
617
 
@@ -627,7 +828,7 @@ models:
627
828
  access: closed
628
829
  num_parameters: 280000000000
629
830
  release_date: 2021-12-08
630
- tags: [] # TODO: add tags
831
+ tags: [UNSUPPORTED_MODEL_TAG]
631
832
 
632
833
  - name: deepmind/chinchilla # NOT SUPPORTED
633
834
  display_name: Chinchilla (70B)
@@ -636,7 +837,7 @@ models:
636
837
  access: closed
637
838
  num_parameters: 70000000000
638
839
  release_date: 2022-03-31
639
- tags: [] # TODO: add tags
840
+ tags: [UNSUPPORTED_MODEL_TAG]
640
841
 
641
842
 
642
843
  # Deepseek
@@ -649,7 +850,36 @@ models:
649
850
  release_date: 2024-01-05
650
851
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
651
852
 
853
+ - name: deepseek-ai/deepseek-v3
854
+ display_name: DeepSeek v3
855
+ description: DeepSeek v3 a Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. It adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures. ([paper](https://github.com/deepseek-ai/DeepSeek-V3/blob/main/DeepSeek_V3.pdf))
856
+ creator_organization_name: DeepSeek
857
+ access: open
858
+ # NOTE: The total size of DeepSeek-V3 models on HuggingFace is 685B, which includes 671B of the Main Model weights and 14B of the Multi-Token Prediction (MTP) Module weights.
859
+ num_parameters: 685000000000
860
+ release_date: 2024-12-24
861
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
862
+
863
+ - name: deepseek-ai/deepseek-r1
864
+ display_name: DeepSeek R1
865
+ description: DeepSeek R1 is DeepSeek's first-generation reasoning model which incoporates which incorporates multi-stage training and cold-start data before RL. ([paper](https://arxiv.org/abs/2501.12948))
866
+ creator_organization_name: DeepSeek
867
+ access: open
868
+ # NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
869
+ num_parameters: 685000000000
870
+ release_date: 2025-01-20
871
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
652
872
 
873
+ - name: deepseek-ai/deepseek-r1-hide-reasoning
874
+ display_name: DeepSeek R1 (hide reasoning)
875
+ description: DeepSeek R1 is DeepSeek's first-generation reasoning model which incoporates which incorporates multi-stage training and cold-start data before RL. ([paper](https://arxiv.org/abs/2501.12948)) The reasoning tokens are hidden from the output of the model.
876
+ creator_organization_name: DeepSeek
877
+ access: open
878
+ # NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
879
+ num_parameters: 685000000000
880
+ release_date: 2025-01-20
881
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
882
+
653
883
  # EleutherAI
654
884
  - name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together.
655
885
  display_name: GPT-J (6B)
@@ -752,7 +982,7 @@ models:
752
982
  access: closed
753
983
  num_parameters: 540000000000
754
984
  release_date: 2023-03-01 # was first announced on 2022-04 but remained private.
755
- tags: [] # TODO: add tags
985
+ tags: [UNSUPPORTED_MODEL_TAG]
756
986
 
757
987
  # Note: This is aliased to a snapshot of gemini-pro. When possible, please use a versioned snapshot instead.
758
988
  - name: google/gemini-pro
@@ -802,7 +1032,7 @@ models:
802
1032
  creator_organization_name: Google
803
1033
  access: limited
804
1034
  release_date: 2024-05-24
805
- tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1035
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
806
1036
 
807
1037
  - name: google/gemini-1.5-flash-001
808
1038
  display_name: Gemini 1.5 Flash (001)
@@ -810,7 +1040,7 @@ models:
810
1040
  creator_organization_name: Google
811
1041
  access: limited
812
1042
  release_date: 2024-05-24
813
- tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1043
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
814
1044
 
815
1045
  - name: google/gemini-1.5-pro-preview-0409
816
1046
  display_name: Gemini 1.5 Pro (0409 preview)
@@ -868,6 +1098,70 @@ models:
868
1098
  release_date: 2024-05-24
869
1099
  tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
870
1100
 
1101
+ - name: google/gemini-1.5-pro-002
1102
+ display_name: Gemini 1.5 Pro (002)
1103
+ description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
1104
+ creator_organization_name: Google
1105
+ access: limited
1106
+ release_date: 2024-09-24
1107
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1108
+
1109
+ - name: google/gemini-1.5-flash-002
1110
+ display_name: Gemini 1.5 Flash (002)
1111
+ description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
1112
+ creator_organization_name: Google
1113
+ access: limited
1114
+ release_date: 2024-09-24
1115
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1116
+
1117
+ - name: google/gemini-2.0-flash-exp
1118
+ display_name: Gemini 2.0 Flash (Experimental)
1119
+ description: Gemini 2.0 Flash (Experimental) is a Gemini model that supports multimodal inputs like images, video and audio, as well as multimodal output like natively generated images mixed with text and steerable text-to-speech (TTS) multilingual audio. ([blog](https://blog.google/technology/google-deepmind/google-gemini-ai-update-december-2024/#gemini-2-0-flash))
1120
+ creator_organization_name: Google
1121
+ access: limited
1122
+ release_date: 2024-12-11
1123
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1124
+
1125
+ - name: google/gemini-1.5-flash-8b-001
1126
+ display_name: Gemini 1.5 Flash 8B
1127
+ description: Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks. ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1128
+ creator_organization_name: Google
1129
+ access: limited
1130
+ release_date: 2024-10-01
1131
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1132
+
1133
+ - name: google/gemini-2.0-flash-001
1134
+ display_name: Gemini 2.0 Flash
1135
+ description: Gemini 2.0 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1136
+ creator_organization_name: Google
1137
+ access: limited
1138
+ release_date: 2025-02-01
1139
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1140
+
1141
+ - name: google/gemini-2.0-flash-lite-preview-02-05
1142
+ display_name: Gemini 2.0 Flash Lite (02-05 preview)
1143
+ description: Gemini 2.0 Flash Lite (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1144
+ creator_organization_name: Google
1145
+ access: limited
1146
+ release_date: 2025-02-05
1147
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1148
+
1149
+ - name: google/gemini-2.0-flash-thinking-exp-01-21
1150
+ display_name: Gemini 2.0 Flash Thinking (01-21 preview)
1151
+ description: Gemini 2.0 Flash Thinking (01-21 preview) ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/thinking))
1152
+ creator_organization_name: Google
1153
+ access: limited
1154
+ release_date: 2025-01-21
1155
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1156
+
1157
+ - name: google/gemini-2.0-pro-exp-02-05
1158
+ display_name: Gemini 2.0 Pro (02-05 preview)
1159
+ description: Gemini 2.0 Pro (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1160
+ creator_organization_name: Google
1161
+ access: limited
1162
+ release_date: 2025-02-05
1163
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1164
+
871
1165
  - name: google/gemma-2b
872
1166
  display_name: Gemma (2B)
873
1167
  description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
@@ -1287,7 +1581,7 @@ models:
1287
1581
  access: open
1288
1582
  num_parameters: 175000000000
1289
1583
  release_date: 2022-12-22
1290
- tags: [] # TODO: add tags
1584
+ tags: [UNSUPPORTED_MODEL_TAG]
1291
1585
 
1292
1586
  - name: meta/opt-iml-30b # NOT SUPPORTED
1293
1587
  display_name: OPT-IML (30B)
@@ -1296,7 +1590,7 @@ models:
1296
1590
  access: open
1297
1591
  num_parameters: 30000000000
1298
1592
  release_date: 2022-12-22
1299
- tags: [] # TODO: add tags
1593
+ tags: [UNSUPPORTED_MODEL_TAG]
1300
1594
 
1301
1595
  - name: meta/opt-175b
1302
1596
  display_name: OPT (175B)
@@ -1343,7 +1637,7 @@ models:
1343
1637
  access: open
1344
1638
  num_parameters: 120000000000
1345
1639
  release_date: 2022-11-15
1346
- tags: [] # TODO: add tags
1640
+ tags: [UNSUPPORTED_MODEL_TAG]
1347
1641
 
1348
1642
  - name: meta/galactica-30b # NOT SUPPORTED
1349
1643
  display_name: Galactica (30B)
@@ -1352,7 +1646,7 @@ models:
1352
1646
  access: open
1353
1647
  num_parameters: 30000000000
1354
1648
  release_date: 2022-11-15
1355
- tags: [] # TODO: add tags
1649
+ tags: [UNSUPPORTED_MODEL_TAG]
1356
1650
 
1357
1651
  - name: meta/llama-7b
1358
1652
  display_name: LLaMA (7B)
@@ -1427,6 +1721,24 @@ models:
1427
1721
  num_parameters: 8000000000
1428
1722
  release_date: 2024-04-18
1429
1723
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1724
+
1725
+ - name: meta/llama-3-8b-instruct-turbo
1726
+ display_name: Llama 3 Instruct Turbo (8B)
1727
+ description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Turbo is Together's implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
1728
+ creator_organization_name: Meta
1729
+ access: open
1730
+ num_parameters: 8000000000
1731
+ release_date: 2024-07-18
1732
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1733
+
1734
+ - name: meta/llama-3-8b-instruct-lite
1735
+ display_name: Llama 3 Instruct Lite (8B)
1736
+ description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Lite is Together's implementation, it leverages a number of optimizations including INT4 quantization, provides the most cost-efficient and scalable Llama 3 models available anywhere, while maintaining excellent quality relative to full precision reference implementations ([blog](https://www.together.ai/blog/together-inference-engine-2))
1737
+ creator_organization_name: Meta
1738
+ access: open
1739
+ num_parameters: 8000000000
1740
+ release_date: 2024-07-18
1741
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1430
1742
 
1431
1743
  - name: meta/llama-3-70b
1432
1744
  display_name: Llama 3 (70B)
@@ -1436,6 +1748,51 @@ models:
1436
1748
  num_parameters: 70000000000
1437
1749
  release_date: 2024-04-18
1438
1750
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1751
+
1752
+ - name: meta/llama-3-70b-instruct-turbo
1753
+ display_name: Llama 3 Instruct Turbo (70B)
1754
+ description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Turbo is Together's implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
1755
+ creator_organization_name: Meta
1756
+ access: open
1757
+ num_parameters: 70000000000
1758
+ release_date: 2024-07-18
1759
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1760
+
1761
+ - name: meta/llama-3-70b-instruct-lite
1762
+ display_name: Llama 3 Instruct Lite (70B)
1763
+ description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Lite is Together's implementation, it leverages a number of optimizations including INT4 quantization, provides the most cost-efficient and scalable Llama 3 models available anywhere, while maintaining excellent quality relative to full precision reference implementations ([blog](https://www.together.ai/blog/together-inference-engine-2))
1764
+ creator_organization_name: Meta
1765
+ access: open
1766
+ num_parameters: 70000000000
1767
+ release_date: 2024-07-18
1768
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1769
+
1770
+ - name: meta/llama-3.1-8b-instruct
1771
+ display_name: Llama 3.1 Instruct (8B)
1772
+ description: Llama 3.1 (8B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
1773
+ creator_organization_name: Meta
1774
+ access: open
1775
+ num_parameters: 8000000000
1776
+ release_date: 2024-07-23
1777
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1778
+
1779
+ - name: meta/llama-3.1-70b-instruct
1780
+ display_name: Llama 3.1 Instruct (70B)
1781
+ description: Llama 3.1 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
1782
+ creator_organization_name: Meta
1783
+ access: open
1784
+ num_parameters: 70000000000
1785
+ release_date: 2024-07-23
1786
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1787
+
1788
+ - name: meta/llama-3.1-405b-instruct
1789
+ display_name: Llama 3.1 Instruct (405B)
1790
+ description: Llama 3.1 (405B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
1791
+ creator_organization_name: Meta
1792
+ access: open
1793
+ num_parameters: 405000000000
1794
+ release_date: 2024-07-23
1795
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1439
1796
 
1440
1797
  - name: meta/llama-3.1-8b-instruct-turbo
1441
1798
  display_name: Llama 3.1 Instruct Turbo (8B)
@@ -1444,7 +1801,7 @@ models:
1444
1801
  access: open
1445
1802
  num_parameters: 8000000000
1446
1803
  release_date: 2024-07-23
1447
- tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1804
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1448
1805
 
1449
1806
  - name: meta/llama-3.1-70b-instruct-turbo
1450
1807
  display_name: Llama 3.1 Instruct Turbo (70B)
@@ -1453,7 +1810,7 @@ models:
1453
1810
  access: open
1454
1811
  num_parameters: 70000000000
1455
1812
  release_date: 2024-07-23
1456
- tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1813
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1457
1814
 
1458
1815
  - name: meta/llama-3.1-405b-instruct-turbo
1459
1816
  display_name: Llama 3.1 Instruct Turbo (405B)
@@ -1462,7 +1819,61 @@ models:
1462
1819
  access: open
1463
1820
  num_parameters: 405000000000
1464
1821
  release_date: 2024-07-23
1465
- tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1822
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1823
+
1824
+ - name: meta/llama-3.2-1b-instruct
1825
+ display_name: Llama 3.2 Instruct (1.23B)
1826
+ description: The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned text-only generative models in 1B and 3B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/))
1827
+ creator_organization_name: Meta
1828
+ access: open
1829
+ num_parameters: 1230000000
1830
+ release_date: 2024-09-25
1831
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1832
+
1833
+ - name: meta/llama-3.2-3b-instruct-turbo
1834
+ display_name: Llama 3.2 Instruct Turbo (3B)
1835
+ description: The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned text-only generative models in 1B and 3B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
1836
+ creator_organization_name: Meta
1837
+ access: open
1838
+ num_parameters: 3210000000
1839
+ release_date: 2024-09-25
1840
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1841
+
1842
+ - name: meta/llama-3.2-11b-vision-instruct-turbo
1843
+ display_name: Llama 3.2 Vision Instruct Turbo (11B)
1844
+ description: The Llama 3.2 Vision collection of multimodal large language models (LLMs) is a collection of pretrained and instruction-tuned image reasoning generative models in 11B and 90B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
1845
+ creator_organization_name: Meta
1846
+ access: open
1847
+ num_parameters: 10700000000
1848
+ release_date: 2024-09-25
1849
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1850
+
1851
+ - name: meta/llama-3.2-90b-vision-instruct-turbo
1852
+ display_name: Llama 3.2 Vision Instruct Turbo (90B)
1853
+ description: The Llama 3.2 Vision collection of multimodal large language models (LLMs) is a collection of pretrained and instruction-tuned image reasoning generative models in 11B and 90B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
1854
+ creator_organization_name: Meta
1855
+ access: open
1856
+ num_parameters: 88600000000
1857
+ release_date: 2024-09-25
1858
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1859
+
1860
+ - name: meta/llama-3.3-70b-instruct-turbo
1861
+ display_name: Llama 3.3 Instruct Turbo (70B)
1862
+ description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
1863
+ creator_organization_name: Meta
1864
+ access: open
1865
+ num_parameters: 70000000000
1866
+ release_date: 2024-12-06
1867
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1868
+
1869
+ - name: meta/llama-3.3-70b-instruct
1870
+ display_name: Llama 3.3 Instruct (70B)
1871
+ description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
1872
+ creator_organization_name: Meta
1873
+ access: open
1874
+ num_parameters: 70000000000
1875
+ release_date: 2024-12-06
1876
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1466
1877
 
1467
1878
  - name: meta/llama-3-8b-chat
1468
1879
  display_name: Llama 3 Instruct (8B)
@@ -1510,9 +1921,6 @@ models:
1510
1921
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1511
1922
 
1512
1923
 
1513
-
1514
-
1515
-
1516
1924
  # Microsoft/NVIDIA
1517
1925
  - name: microsoft/TNLGv2_530B
1518
1926
  display_name: TNLG v2 (530B)
@@ -1621,6 +2029,24 @@ models:
1621
2029
  num_parameters: 14000000000
1622
2030
  release_date: 2024-05-21
1623
2031
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2032
+
2033
+ - name: microsoft/phi-3.5-mini-instruct
2034
+ display_name: Phi-3.5-mini-instruct (3.8B)
2035
+ description: Phi-3.5-mini is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available websites. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/discover-the-new-multi-lingual-high-quality-phi-3-5-slms/4225280))
2036
+ creator_organization_name: Microsoft
2037
+ access: open
2038
+ num_parameters: 3800000000
2039
+ release_date: 2024-08-22
2040
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2041
+
2042
+ - name: microsoft/phi-3.5-moe-instruct
2043
+ display_name: Phi-3.5 MoE
2044
+ description: Phi-3.5 MoE is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available documents - with a focus on very high-quality, reasoning dense data. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/discover-the-new-multi-lingual-high-quality-phi-3-5-slms/4225280))
2045
+ creator_organization_name: Microsoft
2046
+ access: open
2047
+ num_parameters: 41900000000
2048
+ release_date: 2024-08-22
2049
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1624
2050
 
1625
2051
  # KAIST AI
1626
2052
  - name: kaistai/prometheus-vision-13b-v1.0-hf
@@ -1760,6 +2186,15 @@ models:
1760
2186
  num_parameters: 7300000000
1761
2187
  release_date: 2024-05-22
1762
2188
  tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2189
+
2190
+ - name: mistralai/mistral-7b-instruct-v0.3-hf
2191
+ display_name: Mistral Instruct v0.3 (7B)
2192
+ description: Mistral v0.3 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
2193
+ creator_organization_name: Mistral AI
2194
+ access: open
2195
+ num_parameters: 7300000000
2196
+ release_date: 2024-05-22
2197
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1763
2198
 
1764
2199
  - name: mistralai/mixtral-8x7b-32kseqlen
1765
2200
  display_name: Mixtral (8x7B 32K seqlen)
@@ -1807,6 +2242,22 @@ models:
1807
2242
  release_date: 2023-10-16
1808
2243
  tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
1809
2244
 
2245
+ - name: mistralai/ministral-3b-2410
2246
+ display_name: Ministral 3B (2402)
2247
+ description: Ministral 3B (2402) is a model for on-device computing and at-the-edge use cases ([blog](https://mistral.ai/news/ministraux/)).
2248
+ creator_organization_name: Mistral AI
2249
+ access: limited
2250
+ release_date: 2024-10-16
2251
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2252
+
2253
+ - name: mistralai/ministral-8b-2410
2254
+ display_name: Ministral 8B (2402)
2255
+ description: Ministral 8B (2402) is a model for on-device computing and at-the-edge use cases a special interleaved sliding-window attention pattern for faster and memory-efficient inference ([blog](https://mistral.ai/news/ministraux/)).
2256
+ creator_organization_name: Mistral AI
2257
+ access: open
2258
+ release_date: 2024-10-16
2259
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2260
+
1810
2261
  - name: mistralai/mistral-small-2402
1811
2262
  display_name: Mistral Small (2402)
1812
2263
  description: Mistral Small is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
@@ -1815,6 +2266,32 @@ models:
1815
2266
  release_date: 2023-02-26
1816
2267
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1817
2268
 
2269
+ - name: mistralai/mistral-small-2409
2270
+ display_name: Mistral Small (2409)
2271
+ description: Mistral Small is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
2272
+ creator_organization_name: Mistral AI
2273
+ access: limited
2274
+ release_date: 2024-09-18
2275
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2276
+
2277
+ - name: mistralai/mistral-small-2501
2278
+ display_name: Mistral Small 3 (2501)
2279
+ description: Mistral Small 3 (2501) is a pre-trained and instructed model catered to the '80%' of generative AI tasks—those that require robust language and instruction following performance, with very low latency. ([blog](https://mistral.ai/news/mistral-small-3/))
2280
+ creator_organization_name: Mistral AI
2281
+ access: open
2282
+ num_parameters: 23600000000
2283
+ release_date: 2025-01-30
2284
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2285
+
2286
+ - name: mistralai/mistral-small-2503
2287
+ display_name: Mistral Small 3.1 (2503)
2288
+ description: Mistral Small 3.1 (2503) is a model with improved text performance, multimodal understanding, and an expanded context window of up to 128k tokens. ([blog](https://mistral.ai/news/mistral-small-3-1))
2289
+ creator_organization_name: Mistral AI
2290
+ access: open
2291
+ num_parameters: 23600000000
2292
+ release_date: 2025-03-17
2293
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2294
+
1818
2295
  - name: mistralai/mistral-medium-2312
1819
2296
  display_name: Mistral Medium (2312)
1820
2297
  description: Mistral is a transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA).
@@ -1840,6 +2317,15 @@ models:
1840
2317
  release_date: 2023-07-24
1841
2318
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1842
2319
 
2320
+ - name: mistralai/mistral-large-2411
2321
+ display_name: Mistral Large (2411)
2322
+ description: Mistral Large (2411) is a 123B parameter model that has a 128k context window. ([blog](https://mistral.ai/news/pixtral-large/))
2323
+ creator_organization_name: Mistral AI
2324
+ access: open
2325
+ num_parameters: 123000000000
2326
+ release_date: 2024-11-18
2327
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2328
+
1843
2329
  - name: mistralai/open-mistral-nemo-2407
1844
2330
  display_name: Mistral NeMo (2402)
1845
2331
  description: Mistral NeMo is a multilingual 12B model with a large context window of 128K tokens. ([blog](https://mistral.ai/news/mistral-nemo/))
@@ -1848,6 +2334,24 @@ models:
1848
2334
  release_date: 2024-07-18
1849
2335
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1850
2336
 
2337
+ - name: mistralai/pixtral-12b-2409
2338
+ display_name: Mistral Pixtral (2409)
2339
+ description: Mistral Pixtral 12B is the first multimodal Mistral model for image understanding. ([blog](https://mistral.ai/news/pixtral-12b/))
2340
+ creator_organization_name: Mistral AI
2341
+ access: open
2342
+ release_date: 2024-09-17
2343
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2344
+
2345
+ - name: mistralai/pixtral-large-2411
2346
+ display_name: Mistral Pixtral Large (2411)
2347
+ description: Mistral Pixtral Large is a 124B open-weights multimodal model built on top of Mistral Large 2 (2407). ([blog](https://mistral.ai/news/pixtral-large/))
2348
+ creator_organization_name: Mistral AI
2349
+ access: open
2350
+ num_parameters: 124000000000
2351
+ release_date: 2024-11-18
2352
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2353
+
2354
+
1851
2355
  # MosaicML
1852
2356
  - name: mosaicml/mpt-7b
1853
2357
  display_name: MPT (7B)
@@ -1865,7 +2369,7 @@ models:
1865
2369
  access: open
1866
2370
  num_parameters: 6700000000
1867
2371
  release_date: 2023-05-05
1868
- tags: [] # TODO: add tags
2372
+ tags: [UNSUPPORTED_MODEL_TAG]
1869
2373
 
1870
2374
  - name: mosaicml/mpt-instruct-7b
1871
2375
  display_name: MPT-Instruct (7B)
@@ -1892,7 +2396,7 @@ models:
1892
2396
  access: open
1893
2397
  num_parameters: 30000000000
1894
2398
  release_date: 2023-06-22
1895
- tags: [] # TODO: add tags
2399
+ tags: [UNSUPPORTED_MODEL_TAG]
1896
2400
 
1897
2401
  - name: mosaicml/mpt-instruct-30b
1898
2402
  display_name: MPT-Instruct (30B)
@@ -1904,6 +2408,27 @@ models:
1904
2408
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1905
2409
 
1906
2410
 
2411
+
2412
+ # NECTEC
2413
+ - name: nectec/Pathumma-llm-text-1.0.0
2414
+ display_name: Pathumma-llm-text-1.0.0 (7B)
2415
+ description: Pathumma-llm-text-1.0.0 (7B) is a instruction model from OpenThaiLLM-Prebuilt-7B ([blog](https://medium.com/nectec/pathummallm-v-1-0-0-release-6a098ddfe276))
2416
+ creator_organization_name: nectec
2417
+ access: open
2418
+ num_parameters: 7620000000
2419
+ release_date: 2024-10-28
2420
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2421
+
2422
+ - name: nectec/OpenThaiLLM-Prebuilt-7B
2423
+ display_name: OpenThaiLLM-Prebuilt-7B (7B)
2424
+ description: OpenThaiLLM-Prebuilt-7B (7B) is a pretrained Thai large language model with 7 billion parameters based on Qwen2.5-7B.
2425
+ creator_organization_name: nectec
2426
+ access: open
2427
+ num_parameters: 7620000000
2428
+ release_date: 2024-10-28
2429
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
2430
+
2431
+
1907
2432
 
1908
2433
  # Neurips
1909
2434
  - name: neurips/local
@@ -1933,6 +2458,16 @@ models:
1933
2458
  release_date: 2024-06-17
1934
2459
  tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1935
2460
 
2461
+ - name: nvidia/llama-3.1-nemotron-70b-instruct
2462
+ display_name: Llama 3.1 Nemotron Instruct (70B)
2463
+ description: Llama-3.1-Nemotron-70B-Instruct is a large language model customized by NVIDIA to improve the helpfulness of LLM generated responses to user queries. It was trained using RLHF (specifically, REINFORCE), Llama-3.1-Nemotron-70B-Reward and HelpSteer2-Preference prompts on a Llama-3.1-70B-Instruct model. ([paper](https://arxiv.org/abs/2410.01257))
2464
+ creator_organization_name: NVIDIA
2465
+ access: open
2466
+ num_parameters: 70000000000
2467
+ release_date: 2024-10-02
2468
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2469
+
2470
+
1936
2471
  # OpenAI
1937
2472
 
1938
2473
  ## GPT 2 Models
@@ -2117,7 +2652,7 @@ models:
2117
2652
  tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2118
2653
 
2119
2654
 
2120
- ## GPT 4 Models
2655
+ ## GPT-4 and GPT-4 Turbo
2121
2656
 
2122
2657
  - name: openai/gpt-4-1106-preview
2123
2658
  display_name: GPT-4 Turbo (1106 preview)
@@ -2169,6 +2704,8 @@ models:
2169
2704
  release_date: 2024-01-25
2170
2705
  tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2171
2706
 
2707
+ ## GPT-4o
2708
+
2172
2709
  - name: openai/gpt-4-turbo-2024-04-09
2173
2710
  display_name: GPT-4 Turbo (2024-04-09)
2174
2711
  description: GPT-4 Turbo (2024-04-09) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Snapshot from 2024-04-09.
@@ -2193,6 +2730,14 @@ models:
2193
2730
  release_date: 2024-08-06
2194
2731
  tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2195
2732
 
2733
+ - name: openai/gpt-4o-2024-11-20
2734
+ display_name: GPT-4o (2024-11-20)
2735
+ description: GPT-4o (2024-11-20) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/introducing-structured-outputs-in-the-api/))
2736
+ creator_organization_name: OpenAI
2737
+ access: limited
2738
+ release_date: 2024-11-20
2739
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2740
+
2196
2741
  - name: openai/gpt-4o-mini-2024-07-18
2197
2742
  display_name: GPT-4o mini (2024-07-18)
2198
2743
  description: GPT-4o mini (2024-07-18) is a multimodal model with a context window of 128K tokens and improved handling of non-English text. ([blog](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/))
@@ -2201,6 +2746,40 @@ models:
2201
2746
  release_date: 2024-07-18
2202
2747
  tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2203
2748
 
2749
+ - name: openai/whisper-1_gpt-4o-2024-11-20
2750
+ display_name: Whisper-1 + GPT-4o (2024-11-20)
2751
+ description: Transcribes the text with Whisper-1 and then uses GPT-4o to generate a response.
2752
+ creator_organization_name: OpenAI
2753
+ access: limited
2754
+ release_date: 2024-11-20
2755
+ tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
2756
+
2757
+ - name: openai/gpt-4o-audio-preview-2024-10-01
2758
+ display_name: GPT-4o Audio (Preview 2024-10-01)
2759
+ description: GPT-4o Audio (Preview 2024-10-01) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
2760
+ creator_organization_name: OpenAI
2761
+ access: limited
2762
+ release_date: 2024-10-01
2763
+ tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2764
+
2765
+ - name: openai/gpt-4o-audio-preview-2024-12-17
2766
+ display_name: GPT-4o Audio (Preview 2024-12-17)
2767
+ description: GPT-4o Audio (Preview 2024-12-17) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
2768
+ creator_organization_name: OpenAI
2769
+ access: limited
2770
+ release_date: 2024-12-17
2771
+ tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2772
+
2773
+ - name: openai/gpt-4o-mini-audio-preview-2024-12-17
2774
+ display_name: GPT-4o mini Audio (Preview 2024-12-17)
2775
+ description: GPT-4o mini Audio (Preview 2024-12-17) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
2776
+ creator_organization_name: OpenAI
2777
+ access: limited
2778
+ release_date: 2024-12-17
2779
+ tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2780
+
2781
+ # GPT-4V
2782
+
2204
2783
  - name: openai/gpt-4-vision-preview
2205
2784
  # According to https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4, this model has pointed gpt-4-1106-vision-preview.
2206
2785
  display_name: GPT-4V (1106 preview)
@@ -2218,6 +2797,80 @@ models:
2218
2797
  release_date: 2023-11-06
2219
2798
  tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
2220
2799
 
2800
+ ## GPT-4.5
2801
+ - name: openai/gpt-4.5-preview-2025-02-27
2802
+ display_name: GPT-4.5 (2025-02-27 preview)
2803
+ description: GPT-4.5 (2025-02-27 preview) is a large multimodal model that is designed to be more general-purpose than OpenAI's STEM-focused reasoning models. It was trained using new supervision techniques combined with traditional methods like supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF). ([blog](https://openai.com/index/introducing-gpt-4-5/), [system card](https://openai.com/index/gpt-4-5-system-card/))
2804
+ creator_organization_name: OpenAI
2805
+ access: limited
2806
+ release_date: 2025-02-27
2807
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2808
+
2809
+ ## o1 Models
2810
+ - name: openai/o1-2024-12-17
2811
+ display_name: o1 (2024-12-17)
2812
+ description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
2813
+ creator_organization_name: OpenAI
2814
+ access: limited
2815
+ release_date: 2024-12-17
2816
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2817
+
2818
+ - name: openai/o1-2024-12-17-low-reasoning-effort
2819
+ display_name: o1 (2024-12-17, low reasoning effort)
2820
+ description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to low.
2821
+ creator_organization_name: OpenAI
2822
+ access: limited
2823
+ release_date: 2024-12-17
2824
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2825
+
2826
+ - name: openai/o1-2024-12-17-high-reasoning-effort
2827
+ display_name: o1 (2024-12-17, high reasoning effort)
2828
+ description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to high.
2829
+ creator_organization_name: OpenAI
2830
+ access: limited
2831
+ release_date: 2024-12-17
2832
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2833
+
2834
+ - name: openai/o1-preview-2024-09-12
2835
+ display_name: o1-preview (2024-09-12)
2836
+ description: o1-preview is a language model trained with reinforcement learning to perform complex reasoning that can produce a long internal chain of thought before responding to the user. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
2837
+ creator_organization_name: OpenAI
2838
+ access: limited
2839
+ release_date: 2024-09-12
2840
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2841
+
2842
+ - name: openai/o1-mini-2024-09-12
2843
+ display_name: o1-mini (2024-09-12)
2844
+ description: o1-mini is a cost-effective reasoning model for applications that require reasoning without broad world knowledge. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/openai-o1-mini-advancing-cost-efficient-reasoning/))
2845
+ creator_organization_name: OpenAI
2846
+ access: limited
2847
+ release_date: 2024-09-12
2848
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2849
+
2850
+ - name: openai/o3-mini-2025-01-31
2851
+ display_name: o3-mini (2025-01-31)
2852
+ description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/))
2853
+ creator_organization_name: OpenAI
2854
+ access: limited
2855
+ release_date: 2025-01-31
2856
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2857
+
2858
+ - name: openai/o3-mini-2025-01-31-low-reasoning-effort
2859
+ display_name: o3-mini (2025-01-31, low reasoning effort)
2860
+ description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/)) The requests' reasoning effort parameter in is set to low.
2861
+ creator_organization_name: OpenAI
2862
+ access: limited
2863
+ release_date: 2025-01-31
2864
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2865
+
2866
+ - name: openai/o3-mini-2025-01-31-high-reasoning-effort
2867
+ display_name: o3-mini (2025-01-31, high reasoning effort)
2868
+ description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/)) The requests' reasoning effort parameter in is set to high.
2869
+ creator_organization_name: OpenAI
2870
+ access: limited
2871
+ release_date: 2025-01-31
2872
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2873
+
2221
2874
  ## Codex Models
2222
2875
  # DEPRECATED: Codex models have been shut down on March 23 2023.
2223
2876
 
@@ -2462,6 +3115,39 @@ models:
2462
3115
  release_date: 2024-06-07
2463
3116
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2464
3117
 
3118
+ - name: qwen/qwen2.5-7b-instruct-turbo
3119
+ display_name: Qwen2.5 Instruct Turbo (7B)
3120
+ description: Qwen2.5 Instruct Turbo (7B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
3121
+ creator_organization_name: Qwen
3122
+ access: open
3123
+ release_date: 2024-09-19
3124
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3125
+
3126
+ - name: qwen/qwen2.5-7b-instruct
3127
+ display_name: Qwen2.5 Instruct (7B)
3128
+ description: Qwen2.5 Instruct (7B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
3129
+ creator_organization_name: Qwen
3130
+ access: open
3131
+ release_date: 2024-09-19
3132
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3133
+
3134
+ - name: qwen/qwen2.5-72b-instruct-turbo
3135
+ display_name: Qwen2.5 Instruct Turbo (72B)
3136
+ description: Qwen2.5 Instruct Turbo (72B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
3137
+ creator_organization_name: Qwen
3138
+ access: open
3139
+ release_date: 2024-09-19
3140
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3141
+
3142
+ - name: qwen/qwq-32b-preview
3143
+ display_name: QwQ (32B Preview)
3144
+ description: QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities. ([blog post](https://qwenlm.github.io/blog/qwq-32b-preview/)).
3145
+ creator_organization_name: Alibaba Cloud
3146
+ access: open
3147
+ num_parameters: 32800000000
3148
+ release_date: 2024-11-28
3149
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3150
+
2465
3151
  - name: qwen/qwen-vl
2466
3152
  display_name: Qwen-VL
2467
3153
  description: Visual multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2308.12966)).
@@ -2478,6 +3164,38 @@ models:
2478
3164
  release_date: 2023-08-24
2479
3165
  tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
2480
3166
 
3167
+ - name: qwen/qwen2-vl-7b-instruct
3168
+ display_name: Qwen2-VL Instruct (7B)
3169
+ description: The second generation of Qwen2-VL models ([paper](https://arxiv.org/abs/2409.12191)).
3170
+ creator_organization_name: Alibaba Group
3171
+ access: open
3172
+ release_date: 2024-08-29
3173
+ tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
3174
+
3175
+ - name: qwen/qwen2-vl-72b-instruct
3176
+ display_name: Qwen2-VL Instruct (72B)
3177
+ description: The second generation of Qwen2-VL models ([paper](https://arxiv.org/abs/2409.12191)).
3178
+ creator_organization_name: Alibaba Group
3179
+ access: open
3180
+ release_date: 2024-08-29
3181
+ tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
3182
+
3183
+ - name: qwen/qwen-audio-chat
3184
+ display_name: Qwen-Audio Chat
3185
+ description: Auditory multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2311.07919)).
3186
+ creator_organization_name: Alibaba Cloud
3187
+ access: open
3188
+ release_date: 2023-11-14
3189
+ tags: [AUDIO_LANGUAGE_MODEL_TAG]
3190
+
3191
+ - name: qwen/qwen2-audio-7b-instruct
3192
+ display_name: Qwen2-Audio Instruct (7B)
3193
+ description: The second version of auditory multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2407.10759)).
3194
+ creator_organization_name: Alibaba Cloud
3195
+ access: open
3196
+ release_date: 2024-07-15
3197
+ tags: [AUDIO_LANGUAGE_MODEL_TAG]
3198
+
2481
3199
  # SAIL (Sea AI Lab)
2482
3200
  - name: sail/sailor-7b
2483
3201
  display_name: Sailor (7B)
@@ -2523,7 +3241,7 @@ models:
2523
3241
  access: open
2524
3242
  num_parameters: 16000000000
2525
3243
  release_date: 2022-03-25
2526
- tags: [] # TODO: add tags
3244
+ tags: [UNSUPPORTED_MODEL_TAG]
2527
3245
 
2528
3246
  # SambaNova
2529
3247
  - name: sambanova/sambalingo-thai-base
@@ -2675,8 +3393,6 @@ models:
2675
3393
  release_date: 2023-04-20
2676
3394
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
2677
3395
 
2678
-
2679
-
2680
3396
  # Stanford
2681
3397
  - name: stanford/alpaca-7b
2682
3398
  display_name: Alpaca (7B)
@@ -2772,7 +3488,7 @@ models:
2772
3488
  access: open
2773
3489
  num_parameters: 3000000000
2774
3490
  release_date: 2023-05-05
2775
- tafs: [] # TODO: add tags
3491
+ tags: [UNSUPPORTED_MODEL_TAG]
2776
3492
 
2777
3493
  - name: together/redpajama-incite-base-7b
2778
3494
  display_name: RedPajama-INCITE-Base (7B)
@@ -2823,9 +3539,27 @@ models:
2823
3539
  access: open
2824
3540
  num_parameters: 13000000000
2825
3541
  release_date: 2022-09-19
2826
- tags: [] # TODO: add tags
3542
+ tags: [UNSUPPORTED_MODEL_TAG]
2827
3543
 
3544
+ # Upstage
3545
+ - name: upstage/solar-pro-preview-instruct
3546
+ display_name: Solar Pro Preview (22B)
3547
+ description: Solar Pro Preview (22B) is open-weights model for single GPU inference that is a preview of the upcoming Solar Pro model ([blog](https://www.upstage.ai/products/solar-pro-preview)).
3548
+ creator_organization_name: Upstage
3549
+ access: open
3550
+ num_parameters: 22000000000
3551
+ release_date: 2024-09-11
3552
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2828
3553
 
3554
+ - name: upstage/solar-pro-241126
3555
+ display_name: Solar Pro
3556
+ display_name: Solar Pro
3557
+ description: Solar Pro is a LLM designed for instruction-following and processing structured formats like HTML and Markdown. It supports English, Korean, and Japanese and has domain expertise in Finance, Healthcare, and Legal. ([blog](https://www.upstage.ai/blog/press/solar-pro-aws)).
3558
+ creator_organization_name: Upstage
3559
+ access: limited
3560
+ num_parameters: 22000000000
3561
+ release_date: 2024-11-26
3562
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2829
3563
 
2830
3564
  # Writer
2831
3565
  - name: writer/palmyra-base
@@ -2928,6 +3662,58 @@ models:
2928
3662
  # Does not support echo
2929
3663
  tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
2930
3664
 
3665
+ - name: writer/palmyra-x-004
3666
+ display_name: Palmyra-X-004
3667
+ description: Palmyra-X-004 language model with a large context window of up to 128,000 tokens that excels in processing and understanding complex tasks.
3668
+ creator_organization_name: Writer
3669
+ access: limited
3670
+ release_date: 2024-09-12
3671
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3672
+
3673
+ - name: writer/palmyra-med-32k
3674
+ display_name: Palmyra-Med 32K (70B)
3675
+ description: Palmyra-Med 32K (70B) is a model finetuned from Palmyra-X-003 intended for medical applications.
3676
+ creator_organization_name: Writer
3677
+ access: open
3678
+ num_parameters: 70600000000
3679
+ release_date: 2024-07-31
3680
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3681
+
3682
+ - name: writer/palmyra-med
3683
+ display_name: Palmyra-Med (70B)
3684
+ description: Palmyra-Med (70B) is a model finetuned from Palmyra-X-003 intended for medical applications.
3685
+ creator_organization_name: Writer
3686
+ access: open
3687
+ num_parameters: 70600000000
3688
+ release_date: 2024-07-31
3689
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3690
+
3691
+ - name: writer/palmyra-fin-32k
3692
+ display_name: Palmyra-Fin 32K (70B)
3693
+ description: Palmyra-Fin 32K (70B) is a model finetuned from Palmyra-X-003 intended for financial applications.
3694
+ creator_organization_name: Writer
3695
+ access: open
3696
+ num_parameters: 70600000000
3697
+ release_date: 2024-07-31
3698
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3699
+
3700
+ - name: writer/palmyra-fin
3701
+ display_name: Palmyra Fin
3702
+ description: Palmyra Fin is a financial LLM built using combining a well-curated set of financial training data with custom fine-tuning instruction data([blog](https://writer.com/blog/palmyra-med-fin-models/)).
3703
+ creator_organization_name: Writer
3704
+ access: limited
3705
+ release_date: 2024-07-31
3706
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3707
+
3708
+ # xAI
3709
+
3710
+ - name: xai/grok-beta
3711
+ display_name: Grok Beta
3712
+ description: Grok Beta is a model from xAI.
3713
+ creator_organization_name: xAI
3714
+ access: closed
3715
+ release_date: 2024-08-13
3716
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2931
3717
 
2932
3718
  # Yandex
2933
3719
  - name: yandex/yalm
@@ -3000,3 +3786,286 @@ models:
3000
3786
  release_date: 2024-04-18
3001
3787
  tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
3002
3788
 
3789
+ # Diva Llama
3790
+ - name: stanford/diva-llama
3791
+ display_name: Diva Llama 3 (8B)
3792
+ description: Diva Llama 3 is an end-to-end Voice Assistant Model which can handle speech and text as inputs. It was trained using distillation loss. ([paper](https://arxiv.org/abs/2410.02678))
3793
+ creator_organization_name: Stanford
3794
+ access: open
3795
+ num_parameters: 8000000000
3796
+ release_date: 2024-10-03
3797
+ tags: [AUDIO_LANGUAGE_MODEL_TAG]
3798
+
3799
+
3800
+ # LLaMA-Omni
3801
+ - name: ictnlp/llama-3.1-8b-omni
3802
+ display_name: LLaMA-Omni (8B)
3803
+ description: The audio-visual multimodal version of the LLaMA 3.1 model ([paper](https://arxiv.org/abs/2409.06666)).
3804
+ creator_organization_name: ICTNLP
3805
+ access: open
3806
+ num_parameters: 8000000000
3807
+ release_date: 2024-09-10
3808
+ tags: [AUDIO_LANGUAGE_MODEL_TAG]
3809
+
3810
+ # Granite - IBM
3811
+ # https://www.ibm.com/granite
3812
+ # https://github.com/ibm-granite/granite-3.0-language-models
3813
+
3814
+ - name: ibm-granite/granite-3.0-2b-base
3815
+ display_name: Granite 3.0 base (2B)
3816
+ description: Granite-3.0-2B-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
3817
+ creator_organization_name: IBM
3818
+ access: open
3819
+ num_parameters: 2530000000
3820
+ release: 2024-10-21
3821
+ tags: [TEXT_MODEL_TAG]
3822
+
3823
+ - name: ibm-granite/granite-3.0-2b-instruct
3824
+ display_name: Granite 3.0 Instruct (2B)
3825
+ description: Granite-3.0-2B-Instruct is a 2B parameter model finetuned from Granite-3.0-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
3826
+ creator_organization_name: IBM
3827
+ access: open
3828
+ num_parameters: 2630000000
3829
+ release: 2024-10-21
3830
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3831
+
3832
+ - name: ibm-granite/granite-3.0-8b-instruct
3833
+ display_name: Granite 3.0 instruct (8B)
3834
+ description: Granite-3.0-8B-Instruct is a 8B parameter model finetuned from Granite-3.0-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
3835
+ creator_organization_name: IBM
3836
+ access: open
3837
+ num_parameters: 8170000000
3838
+ release: 2024-10-21
3839
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3840
+
3841
+ - name: ibm-granite/granite-3.0-8b-base
3842
+ display_name: Granite 3.0 base (8B)
3843
+ description: Granite-3.0-8B-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
3844
+ creator_organization_name: IBM
3845
+ access: open
3846
+ num_parameters: 8170000000
3847
+ release: 2024-10-21
3848
+ tags: [TEXT_MODEL_TAG]
3849
+
3850
+ - name: ibm-granite/granite-3.0-3b-a800m-instruct
3851
+ display_name: Granite 3.0 A800M instruct (3B)
3852
+ description: Granite-3.0-3B-A800M-Instruct is a 3B parameter model finetuned from Granite-3.0-3B-A800M-Base-4K using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
3853
+ creator_organization_name: IBM
3854
+ access: open
3855
+ num_parameters: 3370000000
3856
+ release: 2024-10-21
3857
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3858
+
3859
+ - name: ibm-granite/granite-3.0-3b-a800m-base
3860
+ display_name: Granite 3.0 A800M base (3B)
3861
+ description: Granite-3.0-3B-A800M-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
3862
+ creator_organization_name: IBM
3863
+ access: open
3864
+ num_parameters: 3370000000
3865
+ release: 2024-10-21
3866
+ tags: [TEXT_MODEL_TAG]
3867
+
3868
+ - name: ibm-granite/granite-3.0-1b-a400m-instruct
3869
+ display_name: Granite 3.0 A400M instruct (1B)
3870
+ description: Granite-3.0-1B-A400M-Instruct is an 1B parameter model finetuned from Granite-3.0-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
3871
+ creator_organization_name: IBM
3872
+ access: open
3873
+ num_parameters: 1330000000
3874
+ release: 2024-10-21
3875
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3876
+
3877
+ - name: ibm-granite/granite-3.0-1b-a400m-base
3878
+ display_name: Granite 3.0 A400M base (1B)
3879
+ description: Granite-3.0-1B-A400M-Base is a decoder-only language model to support a variety of text-to-text generation tasks. It is trained from scratch following a two-stage training strategy.
3880
+ creator_organization_name: IBM
3881
+ access: open
3882
+ num_parameters: 1380000000
3883
+ release: 2024-10-21
3884
+ tags: [TEXT_MODEL_TAG]
3885
+
3886
+ - name: maritaca-ai/sabia-7b
3887
+ display_name: Sabia 7B
3888
+ description: Sabia 7B
3889
+ creator_organization_name: MARITACA-AI
3890
+ access: open
3891
+ num_parameters: 6740000000
3892
+ release_date: 2023-11-08
3893
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3894
+
3895
+ # Granite-3.1-8b-base
3896
+ - name: ibm-granite/granite-3.1-8b-base
3897
+ display_name: Granite 3.1 - 8B - Base
3898
+ description: Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
3899
+ creator_organization_name: IBM-GRANITE
3900
+ access: open
3901
+ num_parameters: 8170000000
3902
+ release_date: 2024-12-18
3903
+ tags: [TEXT_MODEL_TAG]
3904
+
3905
+ # Granite-3.1-8b-instruct
3906
+ - name: ibm-granite/granite-3.1-8b-instruct
3907
+ display_name: Granite 3.1 - 8B - Instruct
3908
+ description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
3909
+ creator_organization_name: IBM
3910
+ access: open
3911
+ num_parameters: 8170000000
3912
+ release_date: 2024-12-18
3913
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3914
+
3915
+ # Granite-3.1-2b-instruct
3916
+ - name: ibm-granite/granite-3.1-2b-instruct
3917
+ display_name: Granite 3.1 - 2B - Instruct
3918
+ description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
3919
+ creator_organization_name: IBM
3920
+ access: open
3921
+ num_parameters: 2530000000
3922
+ release_date: 2024-12-18
3923
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3924
+
3925
+ # Granite-3.1-2b-base
3926
+ - name: ibm-granite/granite-3.1-2b-base
3927
+ display_name: Granite 3.1 - 2B - Base
3928
+ description: Granite-3.1-2B-Base extends the context length of Granite-3.0-2B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
3929
+ creator_organization_name: IBM-GRANITE
3930
+ access: open
3931
+ num_parameters: 2530000000
3932
+ release_date: 2024-12-18
3933
+ tags: [TEXT_MODEL_TAG]
3934
+
3935
+ # Granite-3.1-3b-a800m-instruct
3936
+ - name: ibm-granite/granite-3.1-3b-a800m-instruct
3937
+ display_name: Granite 3.1 - 3B - A800M - Instruct
3938
+ description: Granite-3.1-3B-A800M-Instruct is a 3B parameter long-context instruct model finetuned from Granite-3.1-3B-A800M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
3939
+ creator_organization_name: IBM-GRANITE
3940
+ access: open
3941
+ num_parameters: 3300000000
3942
+ release_date: 2024-12-18
3943
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3944
+
3945
+ # Granite-3.1-3b-a800m-base
3946
+ - name: ibm-granite/granite-3.1-3b-a800m-base
3947
+ display_name: Granite 3.1 - 3B - A800M - Base
3948
+ description: Granite-3.1-3B-A800M-Base extends the context length of Granite-3.0-3B-A800M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
3949
+ creator_organization_name: IBM-GRANITE
3950
+ access: open
3951
+ num_parameters: 3300000000
3952
+ release_date: 2024-12-18
3953
+ tags: [TEXT_MODEL_TAG]
3954
+
3955
+ # Granite-3.1-1b-a400m-instruct
3956
+ - name: ibm-granite/granite-3.1-1b-a400m-instruct
3957
+ display_name: Granite 3.1 - 1B - A400M - Instruct
3958
+ description: Granite-3.1-1B-A400M-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
3959
+ creator_organization_name: IBM-GRANITE
3960
+ access: open
3961
+ num_parameters: 1330000000
3962
+ release_date: 2024-12-18
3963
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3964
+
3965
+ # Granite-3.1-1b-a400m-base
3966
+ - name: ibm-granite/granite-3.1-1b-a400m-base
3967
+ display_name: Granite 3.1 - 1B - A400M - Base
3968
+ description: Granite-3.1-1B-A400M-Base extends the context length of Granite-3.0-1B-A400M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
3969
+ creator_organization_name: IBM-GRANITE
3970
+ access: open
3971
+ num_parameters: 1330000000
3972
+ release_date: 2024-12-18
3973
+ tags: [TEXT_MODEL_TAG]
3974
+
3975
+ # DeepSeek-R1-Distill-Llama-3.1-8b
3976
+ - name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
3977
+ display_name: DeepSeek-R1-Distill-Llama-8b
3978
+ description: DeepSeek-R1-Distill-Llama-8b is a model that is distilled from LLaMA 8B model for the DeepSeek-R1 task.
3979
+ creator_organization_name: DeepSeek
3980
+ access: open
3981
+ num_parameters: 8000000000
3982
+ release_date: 2025-01-20
3983
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3984
+
3985
+ # deepseek-ai/deepseek-coder-6.7b-instruct
3986
+ - name: deepseek-ai/deepseek-coder-6.7b-instruct
3987
+ display_name: DeepSeek-Coder-6.7b-Instruct
3988
+ description: DeepSeek-Coder-6.7b-Instruct is a model that is fine-tuned from the LLaMA 6.7B model for the DeepSeek-Coder task.
3989
+ creator_organization_name: DeepSeek
3990
+ access: open
3991
+ num_parameters: 6740000000
3992
+ release_date: 2025-01-20
3993
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3994
+
3995
+ # WatsonX - IBM
3996
+ - name: ibm/granite-13b-instruct-v2
3997
+ display_name: Granite 13b instruct v2
3998
+ description: Granite Base (13B) Instruct V2.0 is a large decoder-only transformer model.The following features were used in the design of the model Decoder-only model
3999
+ creator_organization_name: IBM
4000
+ access: limited
4001
+ num_parameters: 13000000000
4002
+ release: 2023-11-30
4003
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4004
+
4005
+ - name: ibm/granite-20b-code-instruct-8k
4006
+ display_name: Granite 20b code instruct (8K)
4007
+ description: Granite-20B-Code-Base-8K is a decoder-only code model designed for code generative tasks (e.g., code generation, code explanation, code fixing, etc.). It is trained from scratch with a two-phase training strategy. In phase 1, our model is trained on 3 trillion tokens sourced from 116 programming languages, ensuring a comprehensive understanding of programming languages and syntax. In phase 2, our model is trained on 500 billion tokens with a carefully designed mixture of high-quality data from code and natural language domains to improve the models’ ability to reason and follow instructions.
4008
+ creator_organization_name: IBM
4009
+ access: limited
4010
+ num_parameters: 20000000000
4011
+ release: 2024-18-4
4012
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4013
+
4014
+ - name: ibm/granite-34b-code-instruct
4015
+ display_name: Granite 34b code instruct
4016
+ description: Granite Base (34B) Code Instruct is a 34B parameter model fine tuned from Granite-34B-Code-Base on a combination of permissively licensed instruction data to enhance instruction following capabilities including logical reasoning and problem-solving skills.
4017
+ creator_organization_name: IBM
4018
+ access: open
4019
+ num_parameters: 34000000000
4020
+ release: 2024-6-5
4021
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4022
+
4023
+
4024
+ - name: ibm/granite-3b-code-instruct
4025
+ display_name: Granite 3b code instruct
4026
+ description: Granite-3B-Code-Instruct-128K is a 3B parameter long-context instruct model fine tuned from Granite-3B-Code-Base-128K on a combination of permissively licensed data used in training the original Granite code instruct models, in addition to synthetically generated code instruction datasets tailored for solving long context problems. By exposing the model to both short and long context data, we aim to enhance its long-context capability without sacrificing code generation performance at short input context.
4027
+ creator_organization_name: IBM
4028
+ access: open
4029
+ num_parameters: 3000000000
4030
+ release: 2024-6-18
4031
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4032
+
4033
+ - name: ibm/granite-8b-code-instruct
4034
+ display_name: Granite 8b code instruct
4035
+ description: Granite-8B-Code-Instruct-128K is a 8B parameter long-context instruct model fine tuned from Granite-8B-Code-Base-128K on a combination of permissively licensed data used in training the original Granite code instruct models, in addition to synthetically generated code instruction datasets tailored for solving long context problems. By exposing the model to both short and long context data, we aim to enhance its long-context capability without sacrificing code generation performance at short input context.
4036
+ creator_organization_name: IBM
4037
+ access: open
4038
+ num_parameters: 8000000000
4039
+ release: 2024-6-18
4040
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4041
+
4042
+
4043
+
4044
+
4045
+
4046
+
4047
+ - name: ibm/granite-3.1-8b-instruct
4048
+ display_name: Granite 3.1 - 8B - Instruct
4049
+ description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
4050
+ creator_organization_name: IBM
4051
+ access: open
4052
+ num_parameters: 8170000000
4053
+ release_date: 2024-12-18
4054
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4055
+
4056
+ - name: ibm/granite-3.1-2b-instruct
4057
+ display_name: Granite 3.1 - 2B - Instruct
4058
+ description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
4059
+ creator_organization_name: IBM
4060
+ access: open
4061
+ num_parameters: 2530000000
4062
+ release_date: 2024-12-18
4063
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4064
+
4065
+ - name: mistralai/mixtral-8x7b-instruct-v0:1
4066
+ display_name: Mixtral 8x7B Instruct on IBM WatsonX
4067
+ description: A 7B sparse Mixture-of-Experts model with stronger capabilities than Mistral 7B. Uses 12B active parameters out of 45B total. Supports multiple languages, code and 32k context window.
4068
+ creator_organization_name: Mistral
4069
+ access: limited
4070
+ release_date: 2023-12-11
4071
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]