crfm-helm 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (652) hide show
  1. crfm_helm-0.5.6.dist-info/METADATA +427 -0
  2. crfm_helm-0.5.6.dist-info/RECORD +941 -0
  3. {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +13 -1
  5. helm/benchmark/adaptation/adapters/adapter_factory.py +15 -1
  6. helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -1
  7. helm/benchmark/adaptation/adapters/chat_adapter.py +49 -0
  8. helm/benchmark/adaptation/adapters/ehr_instruction_adapter.py +108 -0
  9. helm/benchmark/adaptation/adapters/generation_adapter.py +1 -1
  10. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +4 -4
  11. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +1 -1
  12. helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +4 -2
  13. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -1
  14. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +1 -1
  15. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +4 -2
  16. helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +1 -1
  17. helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py +1 -1
  18. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +2 -2
  19. helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py +87 -0
  20. helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -1
  21. helm/benchmark/adaptation/adapters/test_adapter.py +4 -4
  22. helm/benchmark/adaptation/adapters/test_generation_adapter.py +3 -3
  23. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +2 -2
  24. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +2 -2
  25. helm/benchmark/adaptation/common_adapter_specs.py +69 -4
  26. helm/benchmark/adaptation/prompt.py +1 -1
  27. helm/benchmark/annotation/aci_bench_annotator.py +95 -0
  28. helm/benchmark/annotation/air_bench_annotator.py +21 -6
  29. helm/benchmark/annotation/annotator.py +5 -0
  30. helm/benchmark/annotation/annotator_factory.py +3 -20
  31. helm/benchmark/annotation/autobencher_capabilities_annotator.py +107 -0
  32. helm/benchmark/annotation/autobencher_safety_annotator.py +98 -0
  33. helm/benchmark/annotation/bigcodebench_annotator.py +108 -0
  34. helm/benchmark/annotation/bird_sql_annotator.py +58 -0
  35. helm/benchmark/annotation/chw_care_plan_annotator.py +93 -0
  36. helm/benchmark/annotation/czech_bank_qa_annotator.py +78 -0
  37. helm/benchmark/annotation/dischargeme_annotator.py +107 -0
  38. helm/benchmark/annotation/ehr_sql_annotator.py +87 -0
  39. helm/benchmark/annotation/helpdesk_call_summarization_annotator.py +131 -0
  40. helm/benchmark/annotation/image2struct/image_compiler_annotator.py +6 -1
  41. helm/benchmark/annotation/live_qa_annotator.py +1 -1
  42. helm/benchmark/annotation/med_dialog_annotator.py +99 -0
  43. helm/benchmark/annotation/medalign_annotator.py +100 -0
  44. helm/benchmark/annotation/medi_qa_annotator.py +98 -0
  45. helm/benchmark/annotation/medication_qa_annotator.py +87 -63
  46. helm/benchmark/annotation/mental_health_annotator.py +98 -0
  47. helm/benchmark/annotation/mimic_bhc_annotator.py +100 -0
  48. helm/benchmark/annotation/mimic_rrs_annotator.py +100 -0
  49. helm/benchmark/annotation/model_as_judge.py +214 -6
  50. helm/benchmark/annotation/mtsamples_procedures_annotator.py +98 -0
  51. helm/benchmark/annotation/mtsamples_replicate_annotator.py +101 -0
  52. helm/benchmark/annotation/omni_math/gpt_evaluation_template.txt +152 -0
  53. helm/benchmark/annotation/omni_math/gpt_evaluation_zero_shot_template.txt +36 -0
  54. helm/benchmark/annotation/omni_math_annotator.py +131 -0
  55. helm/benchmark/annotation/spider_annotator.py +18 -0
  56. helm/benchmark/annotation/starr_patient_instructions_annotator.py +98 -0
  57. helm/benchmark/annotation/wildbench/eval_template.pairwise.v2.md +75 -0
  58. helm/benchmark/annotation/wildbench/eval_template.score.v2.md +66 -0
  59. helm/benchmark/annotation/wildbench_annotator.py +119 -0
  60. helm/benchmark/annotation_executor.py +35 -15
  61. helm/benchmark/augmentations/cleva_perturbation.py +9 -8
  62. helm/benchmark/augmentations/contraction_expansion_perturbation.py +2 -2
  63. helm/benchmark/augmentations/contrast_sets_perturbation.py +2 -2
  64. helm/benchmark/augmentations/dialect_perturbation.py +4 -5
  65. helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
  66. helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
  67. helm/benchmark/augmentations/gender_perturbation.py +2 -2
  68. helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
  69. helm/benchmark/augmentations/mild_mix_perturbation.py +6 -6
  70. helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
  71. helm/benchmark/augmentations/person_name_perturbation.py +4 -5
  72. helm/benchmark/augmentations/perturbation.py +1 -1
  73. helm/benchmark/augmentations/space_perturbation.py +2 -2
  74. helm/benchmark/augmentations/suffix_perturbation.py +2 -2
  75. helm/benchmark/augmentations/synonym_perturbation.py +4 -3
  76. helm/benchmark/augmentations/test_perturbation.py +16 -13
  77. helm/benchmark/augmentations/translate_perturbation.py +2 -2
  78. helm/benchmark/augmentations/typos_perturbation.py +2 -2
  79. helm/benchmark/data_preprocessor.py +2 -2
  80. helm/benchmark/executor.py +11 -12
  81. helm/benchmark/huggingface_registration.py +2 -7
  82. helm/benchmark/metrics/aci_bench_metrics.py +14 -0
  83. helm/benchmark/metrics/basic_metrics.py +6 -6
  84. helm/benchmark/metrics/bbq_metrics.py +2 -2
  85. helm/benchmark/metrics/bias_metrics.py +12 -3
  86. helm/benchmark/metrics/bias_word_lists.py +1 -1
  87. helm/benchmark/metrics/bigcodebench_metrics.py +25 -0
  88. helm/benchmark/metrics/bird_sql_metrics.py +28 -0
  89. helm/benchmark/metrics/chw_care_plan_metrics.py +14 -0
  90. helm/benchmark/metrics/classification_metrics.py +76 -12
  91. helm/benchmark/metrics/cleva_harms_metrics.py +10 -9
  92. helm/benchmark/metrics/code_metrics.py +5 -5
  93. helm/benchmark/metrics/comet_metric.py +125 -0
  94. helm/benchmark/metrics/common_metric_specs.py +9 -2
  95. helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +72 -0
  96. helm/benchmark/metrics/copyright_metrics.py +4 -4
  97. helm/benchmark/metrics/czech_bank_qa_metrics.py +29 -0
  98. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +2 -2
  99. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +2 -2
  100. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +2 -2
  101. helm/benchmark/metrics/dischargeme_metrics.py +14 -0
  102. helm/benchmark/metrics/disinformation_metrics.py +4 -4
  103. helm/benchmark/metrics/dry_run_metrics.py +5 -5
  104. helm/benchmark/metrics/efficiency_metrics.py +6 -6
  105. helm/benchmark/metrics/ehr_sql_metrics.py +103 -0
  106. helm/benchmark/metrics/evaluate_instances_metric.py +3 -3
  107. helm/benchmark/metrics/evaluate_reference_metrics.py +144 -16
  108. helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +103 -0
  109. helm/benchmark/metrics/gpt4_audio_critique_metrics.py +167 -0
  110. helm/benchmark/metrics/gpt4_audio_refusal_metrics.py +145 -0
  111. helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +36 -0
  112. helm/benchmark/metrics/ifeval/__init__.py +0 -0
  113. helm/benchmark/metrics/ifeval/instructions.py +1574 -0
  114. helm/benchmark/metrics/ifeval/instructions_registry.py +182 -0
  115. helm/benchmark/metrics/ifeval/instructions_registry.pyi +3 -0
  116. helm/benchmark/metrics/ifeval/instructions_util.py +153 -0
  117. helm/benchmark/metrics/ifeval_metrics.py +55 -0
  118. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +1 -1
  119. helm/benchmark/metrics/image_generation/detection_metrics.py +1 -1
  120. helm/benchmark/metrics/image_generation/detectors/vitdet.py +1 -1
  121. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +1 -1
  122. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +1 -1
  123. helm/benchmark/metrics/image_generation/nsfw_metrics.py +1 -1
  124. helm/benchmark/metrics/image_generation/q16/test_q16.py +3 -1
  125. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +1 -1
  126. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +2 -2
  127. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +1 -1
  128. helm/benchmark/metrics/image_generation/watermark_metrics.py +1 -1
  129. helm/benchmark/metrics/instruction_following_critique_metrics.py +4 -4
  130. helm/benchmark/metrics/kpi_edgar_metrics.py +121 -0
  131. helm/benchmark/metrics/language_modeling_metrics.py +4 -4
  132. helm/benchmark/metrics/llm_jury_metrics.py +46 -0
  133. helm/benchmark/metrics/machine_translation_metrics.py +2 -2
  134. helm/benchmark/metrics/med_dialog_metrics.py +14 -0
  135. helm/benchmark/metrics/medalign_metrics.py +14 -0
  136. helm/benchmark/metrics/medcalc_bench_metrics.py +124 -0
  137. helm/benchmark/metrics/medec_metrics.py +101 -0
  138. helm/benchmark/metrics/medi_qa_metrics.py +14 -0
  139. helm/benchmark/metrics/medication_qa_metrics.py +10 -19
  140. helm/benchmark/metrics/melt_bias_metric.py +234 -0
  141. helm/benchmark/metrics/melt_bias_word_lists.py +1367 -0
  142. helm/benchmark/metrics/melt_metric_specs.py +43 -0
  143. helm/benchmark/metrics/melt_toxicity_metric.py +107 -0
  144. helm/benchmark/metrics/mental_health_metrics.py +14 -0
  145. helm/benchmark/metrics/metric.py +3 -3
  146. helm/benchmark/metrics/metric_service.py +11 -11
  147. helm/benchmark/metrics/mimic_bhc_metrics.py +14 -0
  148. helm/benchmark/metrics/mimic_rrs_metrics.py +14 -0
  149. helm/benchmark/metrics/mimiciv_billing_code_metrics.py +96 -0
  150. helm/benchmark/metrics/mtsamples_procedures_metrics.py +14 -0
  151. helm/benchmark/metrics/mtsamples_replicate_metrics.py +14 -0
  152. helm/benchmark/metrics/nltk_helper.py +32 -0
  153. helm/benchmark/metrics/numeracy_metrics.py +4 -4
  154. helm/benchmark/metrics/omni_math_metrics.py +32 -0
  155. helm/benchmark/metrics/openai_mrcr_metrics.py +52 -0
  156. helm/benchmark/metrics/output_processing_metric.py +60 -0
  157. helm/benchmark/metrics/output_processors.py +15 -0
  158. helm/benchmark/metrics/paraphrase_generation_metrics.py +2 -2
  159. helm/benchmark/metrics/ranking_metrics.py +3 -3
  160. helm/benchmark/metrics/reference_metric.py +3 -3
  161. helm/benchmark/metrics/ruler_qa_metrics.py +34 -0
  162. helm/benchmark/metrics/{bhasa_metrics.py → seahelm_metrics.py} +3 -3
  163. helm/benchmark/metrics/seahelm_metrics_specs.py +10 -0
  164. helm/benchmark/metrics/spider_metrics.py +7 -0
  165. helm/benchmark/metrics/starr_patient_instructions_metrics.py +14 -0
  166. helm/benchmark/metrics/statistic.py +1 -1
  167. helm/benchmark/metrics/summac/model_summac.py +2 -3
  168. helm/benchmark/metrics/summarization_critique_metrics.py +4 -4
  169. helm/benchmark/metrics/summarization_metrics.py +20 -9
  170. helm/benchmark/metrics/test_bias_metrics.py +5 -1
  171. helm/benchmark/metrics/test_classification_metrics.py +140 -68
  172. helm/benchmark/metrics/test_evaluate_reference_metrics.py +15 -0
  173. helm/benchmark/metrics/test_metric.py +1 -1
  174. helm/benchmark/metrics/test_statistic.py +2 -2
  175. helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py +1 -1
  176. helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +6 -6
  177. helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py +1 -1
  178. helm/benchmark/metrics/tokens/free_token_cost_estimator.py +1 -1
  179. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +1 -1
  180. helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
  181. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +1 -1
  182. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +3 -3
  183. helm/benchmark/metrics/toxicity_metrics.py +6 -6
  184. helm/benchmark/metrics/unitxt_metrics.py +7 -5
  185. helm/benchmark/metrics/vision_language/emd_utils.py +4 -2
  186. helm/benchmark/metrics/vision_language/image_metrics.py +1 -1
  187. helm/benchmark/metrics/vision_language/image_utils.py +2 -2
  188. helm/benchmark/metrics/wildbench_metrics.py +34 -0
  189. helm/benchmark/model_deployment_registry.py +6 -8
  190. helm/benchmark/model_metadata_registry.py +16 -0
  191. helm/benchmark/presentation/contamination.py +3 -3
  192. helm/benchmark/presentation/create_plots.py +33 -12
  193. helm/benchmark/presentation/run_display.py +13 -0
  194. helm/benchmark/presentation/schema.py +2 -1
  195. helm/benchmark/presentation/summarize.py +97 -67
  196. helm/benchmark/presentation/torr_robustness_summarizer.py +178 -0
  197. helm/benchmark/reeval_run.py +202 -0
  198. helm/benchmark/reeval_runner.py +355 -0
  199. helm/benchmark/run.py +86 -90
  200. helm/benchmark/run_expander.py +90 -9
  201. helm/benchmark/run_spec_factory.py +13 -0
  202. helm/benchmark/run_specs/air_bench_run_specs.py +21 -3
  203. helm/benchmark/run_specs/audio_run_specs.py +657 -0
  204. helm/benchmark/run_specs/call_center_run_specs.py +49 -0
  205. helm/benchmark/run_specs/capabilities_run_specs.py +308 -0
  206. helm/benchmark/run_specs/classic_run_specs.py +1 -69
  207. helm/benchmark/run_specs/enem_challenge_specs.py +31 -0
  208. helm/benchmark/run_specs/enterprise_run_specs.py +280 -0
  209. helm/benchmark/run_specs/experimental_run_specs.py +142 -3
  210. helm/benchmark/run_specs/imdb_ptbr_run_specs.py +30 -0
  211. helm/benchmark/run_specs/lite_run_specs.py +2 -2
  212. helm/benchmark/run_specs/long_context_run_specs.py +141 -0
  213. helm/benchmark/run_specs/medhelm_run_specs.py +1260 -0
  214. helm/benchmark/run_specs/melt_run_specs.py +783 -0
  215. helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py +49 -0
  216. helm/benchmark/run_specs/oab_exams_specs.py +32 -0
  217. helm/benchmark/run_specs/safety_run_specs.py +37 -0
  218. helm/benchmark/run_specs/{bhasa_run_specs.py → seahelm_run_specs.py} +44 -44
  219. helm/benchmark/run_specs/speech_disorder_audio_run_specs.py +169 -0
  220. helm/benchmark/run_specs/sql_run_specs.py +54 -0
  221. helm/benchmark/run_specs/tweetsentbr_run_specs.py +32 -0
  222. helm/benchmark/run_specs/unitxt_run_specs.py +14 -5
  223. helm/benchmark/run_specs/vlm_run_specs.py +103 -2
  224. helm/benchmark/run_specs/winogrande_afr_run_specs.py +47 -0
  225. helm/benchmark/runner.py +5 -5
  226. helm/benchmark/scenarios/aci_bench_scenario.py +126 -0
  227. helm/benchmark/scenarios/air_bench_scenario.py +6 -1
  228. helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +5 -3
  229. helm/benchmark/scenarios/anthropic_red_team_scenario.py +1 -1
  230. helm/benchmark/scenarios/audio_language/__init__.py +0 -0
  231. helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py +130 -0
  232. helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py +154 -0
  233. helm/benchmark/scenarios/audio_language/ami_scenario.py +96 -0
  234. helm/benchmark/scenarios/audio_language/audio_mnist_scenario.py +62 -0
  235. helm/benchmark/scenarios/audio_language/audio_pairs_scenario.py +62 -0
  236. helm/benchmark/scenarios/audio_language/audiocaps_scenario.py +59 -0
  237. helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py +152 -0
  238. helm/benchmark/scenarios/audio_language/common_voice_15_scenario.py +99 -0
  239. helm/benchmark/scenarios/audio_language/corebench_scenario.py +77 -0
  240. helm/benchmark/scenarios/audio_language/covost2_scenario.py +163 -0
  241. helm/benchmark/scenarios/audio_language/fleurs_fairness_scenario.py +83 -0
  242. helm/benchmark/scenarios/audio_language/fleurs_scenario.py +312 -0
  243. helm/benchmark/scenarios/audio_language/iemocap_audio_scenario.py +83 -0
  244. helm/benchmark/scenarios/audio_language/librispeech_fairness_scenario.py +96 -0
  245. helm/benchmark/scenarios/audio_language/librispeech_scenario.py +80 -0
  246. helm/benchmark/scenarios/audio_language/meld_audio_scenario.py +113 -0
  247. helm/benchmark/scenarios/audio_language/multilingual_librispeech_scenario.py +80 -0
  248. helm/benchmark/scenarios/audio_language/mustard_scenario.py +142 -0
  249. helm/benchmark/scenarios/audio_language/mutox_scenario.py +254 -0
  250. helm/benchmark/scenarios/audio_language/parade_scenario.py +97 -0
  251. helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py +124 -0
  252. helm/benchmark/scenarios/audio_language/ultra_suite_asr_classification.py +103 -0
  253. helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py +110 -0
  254. helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py +78 -0
  255. helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py +109 -0
  256. helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py +83 -0
  257. helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py +87 -0
  258. helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py +105 -0
  259. helm/benchmark/scenarios/autobencher_capabilities_scenario.py +68 -0
  260. helm/benchmark/scenarios/autobencher_safety_scenario.py +51 -0
  261. helm/benchmark/scenarios/babi_qa_scenario.py +1 -1
  262. helm/benchmark/scenarios/banking77_scenario.py +6 -1
  263. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  264. helm/benchmark/scenarios/big_bench_scenario.py +11 -1
  265. helm/benchmark/scenarios/bigcodebench_scenario.py +58 -0
  266. helm/benchmark/scenarios/bird_sql_scenario.py +94 -0
  267. helm/benchmark/scenarios/bird_sql_scenario_helper.py +118 -0
  268. helm/benchmark/scenarios/blimp_scenario.py +1 -1
  269. helm/benchmark/scenarios/bold_scenario.py +1 -1
  270. helm/benchmark/scenarios/boolq_scenario.py +1 -1
  271. helm/benchmark/scenarios/casehold_scenario.py +79 -0
  272. helm/benchmark/scenarios/chw_care_plan_scenario.py +106 -0
  273. helm/benchmark/scenarios/civil_comments_scenario.py +1 -1
  274. helm/benchmark/scenarios/clear_scenario.py +157 -0
  275. helm/benchmark/scenarios/cleva_scenario.py +2 -2
  276. helm/benchmark/scenarios/code_scenario.py +17 -4
  277. helm/benchmark/scenarios/commonsense_scenario.py +1 -1
  278. helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +97 -0
  279. helm/benchmark/scenarios/copyright_scenario.py +1 -1
  280. helm/benchmark/scenarios/covid_dialog_scenario.py +10 -1
  281. helm/benchmark/scenarios/cti_to_mitre_scenario.py +240 -0
  282. helm/benchmark/scenarios/custom_mcqa_scenario.py +1 -1
  283. helm/benchmark/scenarios/czech_bank_qa_scenario.py +130 -0
  284. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +1 -1
  285. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +1 -1
  286. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +1 -1
  287. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +1 -1
  288. helm/benchmark/scenarios/dialogue_scenarios.py +13 -2
  289. helm/benchmark/scenarios/dischargeme_scenario.py +172 -0
  290. helm/benchmark/scenarios/disinformation_scenario.py +10 -1
  291. helm/benchmark/scenarios/dyck_language_scenario.py +10 -1
  292. helm/benchmark/scenarios/echr_judgment_classification_scenario.py +113 -0
  293. helm/benchmark/scenarios/ehr_sql_scenario.py +137 -0
  294. helm/benchmark/scenarios/ehrshot_scenario.py +1519 -0
  295. helm/benchmark/scenarios/enem_challenge_scenario.py +58 -0
  296. helm/benchmark/scenarios/entity_data_imputation_scenario.py +11 -1
  297. helm/benchmark/scenarios/entity_matching_scenario.py +12 -2
  298. helm/benchmark/scenarios/financial_phrasebank_scenario.py +94 -0
  299. helm/benchmark/scenarios/gold_commodity_news_scenario.py +124 -0
  300. helm/benchmark/scenarios/gpqa_scenario.py +80 -0
  301. helm/benchmark/scenarios/grammar.py +2 -2
  302. helm/benchmark/scenarios/grammar_scenario.py +2 -2
  303. helm/benchmark/scenarios/gsm_scenario.py +10 -1
  304. helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +50 -0
  305. helm/benchmark/scenarios/harm_bench_scenario.py +1 -1
  306. helm/benchmark/scenarios/headqa_scenario.py +136 -0
  307. helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +37 -0
  308. helm/benchmark/scenarios/ice_scenario.py +8 -4
  309. helm/benchmark/scenarios/ifeval_scenario.py +53 -0
  310. helm/benchmark/scenarios/imdb_ptbr_scenario.py +60 -0
  311. helm/benchmark/scenarios/imdb_scenario.py +11 -2
  312. helm/benchmark/scenarios/infinite_bench_en_qa_scenario.py +85 -0
  313. helm/benchmark/scenarios/infinite_bench_en_sum_scenario.py +79 -0
  314. helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py +2 -2
  315. helm/benchmark/scenarios/koala_scenario.py +1 -1
  316. helm/benchmark/scenarios/kpi_edgar_scenario.py +151 -0
  317. helm/benchmark/scenarios/legal_contract_summarization_scenario.py +129 -0
  318. helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py +77 -0
  319. helm/benchmark/scenarios/legal_summarization_scenario.py +11 -1
  320. helm/benchmark/scenarios/legal_support_scenario.py +11 -1
  321. helm/benchmark/scenarios/legalbench_scenario.py +22 -3
  322. helm/benchmark/scenarios/lex_glue_scenario.py +12 -2
  323. helm/benchmark/scenarios/lextreme_scenario.py +11 -1
  324. helm/benchmark/scenarios/live_qa_scenario.py +1 -1
  325. helm/benchmark/scenarios/lm_entry_scenario.py +1 -1
  326. helm/benchmark/scenarios/lsat_qa_scenario.py +1 -1
  327. helm/benchmark/scenarios/math_scenario.py +9 -1
  328. helm/benchmark/scenarios/me_q_sum_scenario.py +10 -1
  329. helm/benchmark/scenarios/med_dialog_scenario.py +25 -22
  330. helm/benchmark/scenarios/med_mcqa_scenario.py +10 -1
  331. helm/benchmark/scenarios/med_paragraph_simplification_scenario.py +10 -1
  332. helm/benchmark/scenarios/med_qa_scenario.py +10 -1
  333. helm/benchmark/scenarios/medalign_scenario.py +94 -0
  334. helm/benchmark/scenarios/medalign_scenario_helper.py +432 -0
  335. helm/benchmark/scenarios/medbullets_scenario.py +145 -0
  336. helm/benchmark/scenarios/medcalc_bench_scenario.py +127 -0
  337. helm/benchmark/scenarios/medec_scenario.py +125 -0
  338. helm/benchmark/scenarios/medhallu_scenario.py +72 -0
  339. helm/benchmark/scenarios/medi_qa_scenario.py +111 -0
  340. helm/benchmark/scenarios/medication_qa_scenario.py +8 -2
  341. helm/benchmark/scenarios/melt_ir_scenario.py +171 -0
  342. helm/benchmark/scenarios/melt_knowledge_scenario.py +246 -0
  343. helm/benchmark/scenarios/melt_lm_scenarios.py +252 -0
  344. helm/benchmark/scenarios/melt_scenarios.py +793 -0
  345. helm/benchmark/scenarios/melt_srn_scenario.py +342 -0
  346. helm/benchmark/scenarios/melt_synthetic_reasoning_scenario.py +222 -0
  347. helm/benchmark/scenarios/melt_translation_scenario.py +152 -0
  348. helm/benchmark/scenarios/mental_health_scenario.py +123 -0
  349. helm/benchmark/scenarios/mimic_bhc_scenario.py +103 -0
  350. helm/benchmark/scenarios/mimic_rrs_scenario.py +98 -0
  351. helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +77 -0
  352. helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py +74 -0
  353. helm/benchmark/scenarios/mmlu_pro_scenario.py +95 -0
  354. helm/benchmark/scenarios/mmlu_scenario.py +11 -1
  355. helm/benchmark/scenarios/msmarco_scenario.py +1 -1
  356. helm/benchmark/scenarios/mtsamples_procedures_scenario.py +144 -0
  357. helm/benchmark/scenarios/mtsamples_replicate_scenario.py +142 -0
  358. helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +277 -0
  359. helm/benchmark/scenarios/narrativeqa_scenario.py +1 -1
  360. helm/benchmark/scenarios/natural_qa_scenario.py +1 -1
  361. helm/benchmark/scenarios/newsqa_scenario.py +1 -1
  362. helm/benchmark/scenarios/numeracy_scenario.py +12 -2
  363. helm/benchmark/scenarios/oab_exams_scenario.py +57 -0
  364. helm/benchmark/scenarios/omni_math_scenario.py +53 -0
  365. helm/benchmark/scenarios/open_assistant_scenario.py +11 -2
  366. helm/benchmark/scenarios/openai_mrcr_scenario.py +79 -0
  367. helm/benchmark/scenarios/opinions_qa_scenario.py +1 -1
  368. helm/benchmark/scenarios/pubmed_qa_scenario.py +59 -43
  369. helm/benchmark/scenarios/quac_scenario.py +10 -1
  370. helm/benchmark/scenarios/race_based_med_scenario.py +152 -0
  371. helm/benchmark/scenarios/raft_scenario.py +17 -2
  372. helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +1 -1
  373. helm/benchmark/scenarios/ruler_qa_scenario_helper.py +171 -0
  374. helm/benchmark/scenarios/ruler_qa_scenarios.py +88 -0
  375. helm/benchmark/scenarios/scenario.py +9 -1
  376. helm/benchmark/scenarios/{bhasa_scenario.py → seahelm_scenario.py} +7 -2
  377. helm/benchmark/scenarios/self_instruct_scenario.py +1 -1
  378. helm/benchmark/scenarios/shc_bmt_scenario.py +75 -0
  379. helm/benchmark/scenarios/shc_cdi_scenario.py +75 -0
  380. helm/benchmark/scenarios/shc_conf_scenario.py +76 -0
  381. helm/benchmark/scenarios/shc_ent_scenario.py +77 -0
  382. helm/benchmark/scenarios/shc_gip_scenario.py +74 -0
  383. helm/benchmark/scenarios/shc_privacy_scenario.py +78 -0
  384. helm/benchmark/scenarios/shc_proxy_scenario.py +76 -0
  385. helm/benchmark/scenarios/shc_ptbm_scenario.py +81 -0
  386. helm/benchmark/scenarios/shc_sei_scenario.py +94 -0
  387. helm/benchmark/scenarios/shc_sequoia_scenario.py +77 -0
  388. helm/benchmark/scenarios/simple_safety_tests_scenario.py +1 -1
  389. helm/benchmark/scenarios/spider_scenario.py +91 -0
  390. helm/benchmark/scenarios/starr_patient_instructions_scenario.py +97 -0
  391. helm/benchmark/scenarios/summarization_scenario.py +11 -1
  392. helm/benchmark/scenarios/sumosum_scenario.py +157 -0
  393. helm/benchmark/scenarios/synthetic_efficiency_scenario.py +1 -1
  394. helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +11 -1
  395. helm/benchmark/scenarios/synthetic_reasoning_scenario.py +11 -1
  396. helm/benchmark/scenarios/test_bigcodebench_scenario.py +26 -0
  397. helm/benchmark/scenarios/test_czech_bank_qa_scenario.py +18 -0
  398. helm/benchmark/scenarios/test_enem_challenge_scenario.py +53 -0
  399. helm/benchmark/scenarios/test_ewok_scenario.py +6 -2
  400. helm/benchmark/scenarios/test_gold_commodity_news_scenario.py +18 -0
  401. helm/benchmark/scenarios/test_gpqa_scenario.py +44 -0
  402. helm/benchmark/scenarios/test_ifeval_scenario.py +36 -0
  403. helm/benchmark/scenarios/test_imdb_ptbr_scenario.py +27 -0
  404. helm/benchmark/scenarios/test_infinite_bench_en_qa_scenario.py +18 -0
  405. helm/benchmark/scenarios/test_infinite_bench_en_sum_scenario.py +31 -0
  406. helm/benchmark/scenarios/test_math_scenario.py +1 -0
  407. helm/benchmark/scenarios/test_mmlu_clinical_afr_scenario.py +21 -0
  408. helm/benchmark/scenarios/test_mmlu_pro_scenario.py +53 -0
  409. helm/benchmark/scenarios/test_oab_exams_scenario.py +51 -0
  410. helm/benchmark/scenarios/test_omni_math_scenario.py +27 -0
  411. helm/benchmark/scenarios/test_tweetsentbr_scenario.py +24 -0
  412. helm/benchmark/scenarios/test_wildbench_scenario.py +15 -0
  413. helm/benchmark/scenarios/test_winogrande_afr_scenario.py +19 -0
  414. helm/benchmark/scenarios/thai_exam_scenario.py +10 -1
  415. helm/benchmark/scenarios/the_pile_scenario.py +1 -1
  416. helm/benchmark/scenarios/truthful_qa_scenario.py +12 -2
  417. helm/benchmark/scenarios/tweetsentbr_scenario.py +66 -0
  418. helm/benchmark/scenarios/twitter_aae_scenario.py +1 -1
  419. helm/benchmark/scenarios/unitxt_scenario.py +8 -2
  420. helm/benchmark/scenarios/verifiability_judgment_scenario.py +1 -1
  421. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  422. helm/benchmark/scenarios/vision_language/blink_scenario.py +140 -0
  423. helm/benchmark/scenarios/vision_language/mm_star_scenario.py +95 -0
  424. helm/benchmark/scenarios/vision_language/msr_vtt_scenario.py +75 -0
  425. helm/benchmark/scenarios/vision_language/vqa_rad_scenario.py +88 -0
  426. helm/benchmark/scenarios/wikifact_scenario.py +11 -1
  427. helm/benchmark/scenarios/wikitext_103_scenario.py +1 -1
  428. helm/benchmark/scenarios/wildbench_scenario.py +83 -0
  429. helm/benchmark/scenarios/winogrande_afr_scenario.py +78 -0
  430. helm/benchmark/scenarios/wmt_14_scenario.py +14 -2
  431. helm/benchmark/scenarios/xstest_scenario.py +1 -1
  432. helm/benchmark/server.py +13 -1
  433. helm/benchmark/slurm_runner.py +1 -1
  434. helm/benchmark/static/schema_audio.yaml +763 -0
  435. helm/benchmark/static/schema_autobencher.yaml +150 -0
  436. helm/benchmark/static/schema_call_center.yaml +97 -60
  437. helm/benchmark/static/{schema_medical.yaml → schema_capabilities.yaml} +100 -101
  438. helm/benchmark/static/schema_czech_bank.yaml +148 -0
  439. helm/benchmark/static/schema_enem_challenge.yaml +146 -0
  440. helm/benchmark/static/schema_enterprise.yaml +319 -0
  441. helm/benchmark/static/schema_finance.yaml +14 -12
  442. helm/benchmark/static/schema_heim.yaml +1389 -0
  443. helm/benchmark/static/schema_long_context.yaml +283 -0
  444. helm/benchmark/static/schema_medhelm.yaml +1140 -0
  445. helm/benchmark/static/schema_melt.yaml +1257 -0
  446. helm/benchmark/static/schema_mmlu_winogrande_afr.yaml +1045 -0
  447. helm/benchmark/static/schema_safety.yaml +18 -1
  448. helm/benchmark/static/{schema_bhasa.yaml → schema_seahelm.yaml} +30 -16
  449. helm/benchmark/static/schema_slphelm.yaml +162 -0
  450. helm/benchmark/static/schema_social_audio.yaml +224 -0
  451. helm/benchmark/static/schema_sql.yaml +171 -0
  452. helm/benchmark/static/{schema_tables.yaml → schema_torr.yaml} +169 -36
  453. helm/benchmark/static/schema_tweetsentbr.yaml +146 -0
  454. helm/benchmark/static/schema_vhelm.yaml +129 -56
  455. helm/benchmark/static/schema_video.yaml +219 -0
  456. helm/benchmark/static_build/assets/helm-safety-2907a7b6.png +0 -0
  457. helm/benchmark/static_build/assets/index-94295e78.js +10 -0
  458. helm/benchmark/static_build/assets/index-b9779128.css +1 -0
  459. helm/benchmark/static_build/assets/medhelm-overview-eac29843.png +0 -0
  460. helm/benchmark/static_build/assets/medhelm-v1-overview-3ddfcd65.png +0 -0
  461. helm/benchmark/static_build/assets/{react-d4a0b69b.js → react-f82877fd.js} +1 -1
  462. helm/benchmark/static_build/assets/{recharts-6d337683.js → recharts-4037aff0.js} +1 -1
  463. helm/benchmark/static_build/assets/{tremor-54a99cc4.js → tremor-38a10867.js} +2 -2
  464. helm/benchmark/static_build/config.js +1 -1
  465. helm/benchmark/static_build/index.html +6 -6
  466. helm/benchmark/window_services/default_window_service.py +1 -1
  467. helm/benchmark/window_services/encoder_decoder_window_service.py +4 -4
  468. helm/benchmark/window_services/ice_window_service.py +1 -1
  469. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +1 -1
  470. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +1 -1
  471. helm/benchmark/window_services/local_window_service.py +2 -2
  472. helm/benchmark/window_services/test_anthropic_window_service.py +3 -3
  473. helm/benchmark/window_services/test_bloom_window_service.py +3 -3
  474. helm/benchmark/window_services/test_gpt2_window_service.py +7 -2
  475. helm/benchmark/window_services/test_gpt4_window_service.py +8 -3
  476. helm/benchmark/window_services/test_gptj_window_service.py +8 -3
  477. helm/benchmark/window_services/test_gptneox_window_service.py +3 -3
  478. helm/benchmark/window_services/test_openai_window_service.py +8 -3
  479. helm/benchmark/window_services/test_opt_window_service.py +3 -3
  480. helm/benchmark/window_services/test_palmyra_window_service.py +3 -3
  481. helm/benchmark/window_services/test_t0pp_window_service.py +3 -3
  482. helm/benchmark/window_services/test_t511b_window_service.py +3 -3
  483. helm/benchmark/window_services/test_ul2_window_service.py +3 -3
  484. helm/benchmark/window_services/test_utils.py +4 -5
  485. helm/benchmark/window_services/test_yalm_window_service.py +3 -3
  486. helm/benchmark/window_services/tokenizer_service.py +7 -8
  487. helm/benchmark/window_services/yalm_window_service.py +1 -1
  488. helm/clients/ai21_client.py +3 -3
  489. helm/clients/aleph_alpha_client.py +1 -1
  490. helm/clients/anthropic_client.py +69 -29
  491. helm/clients/audio_language/__init__.py +0 -0
  492. helm/clients/audio_language/diva_llama_client.py +120 -0
  493. helm/clients/audio_language/llama_omni_client.py +198 -0
  494. helm/clients/audio_language/qwen2_5_omni_client.py +197 -0
  495. helm/clients/audio_language/qwen2_audiolm_client.py +190 -0
  496. helm/clients/audio_language/qwen_audiolm_client.py +152 -0
  497. helm/clients/audio_language/test.py +62 -0
  498. helm/clients/auto_client.py +4 -2
  499. helm/clients/azure_openai_client.py +55 -0
  500. helm/clients/bedrock_client.py +203 -7
  501. helm/clients/bedrock_utils.py +33 -0
  502. helm/clients/client.py +7 -7
  503. helm/clients/clip_scorers/clip_scorer.py +1 -1
  504. helm/clients/clip_scorers/multilingual_clip_scorer.py +1 -1
  505. helm/clients/cohere_client.py +3 -3
  506. helm/clients/google_client.py +1 -1
  507. helm/clients/grok_client.py +36 -0
  508. helm/clients/http_model_client.py +1 -1
  509. helm/clients/huggingface_client.py +52 -21
  510. helm/clients/huggingface_pipeline_client.py +138 -0
  511. helm/clients/ibm_client.py +267 -0
  512. helm/clients/image_generation/adobe_vision_client.py +1 -1
  513. helm/clients/image_generation/aleph_alpha_image_generation_client.py +1 -1
  514. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +3 -3
  515. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +5 -2
  516. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +5 -2
  517. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +2 -2
  518. helm/clients/image_generation/cogview2_client.py +1 -1
  519. helm/clients/image_generation/dalle2_client.py +1 -1
  520. helm/clients/image_generation/dalle3_client.py +2 -2
  521. helm/clients/image_generation/dalle_mini/__init__.py +1 -1
  522. helm/clients/image_generation/dalle_mini/data.py +1 -1
  523. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -5
  524. helm/clients/image_generation/dalle_mini/model/configuration.py +2 -2
  525. helm/clients/image_generation/dalle_mini/model/modeling.py +3 -3
  526. helm/clients/image_generation/dalle_mini/model/processor.py +5 -5
  527. helm/clients/image_generation/dalle_mini/model/tokenizer.py +2 -2
  528. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -1
  529. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +2 -2
  530. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +1 -1
  531. helm/clients/image_generation/dalle_mini_client.py +1 -1
  532. helm/clients/image_generation/deep_floyd_client.py +1 -1
  533. helm/clients/image_generation/huggingface_diffusers_client.py +1 -1
  534. helm/clients/image_generation/lexica_client.py +1 -1
  535. helm/clients/image_generation/mindalle/models/__init__.py +6 -6
  536. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +1 -1
  537. helm/clients/image_generation/mindalle/models/stage2/transformer.py +1 -1
  538. helm/clients/image_generation/mindalle/utils/__init__.py +3 -3
  539. helm/clients/image_generation/mindalle_client.py +1 -1
  540. helm/clients/image_generation/together_image_generation_client.py +1 -1
  541. helm/clients/lit_gpt_client.py +2 -2
  542. helm/clients/mistral_client.py +62 -18
  543. helm/clients/nvidia_nim_client.py +0 -3
  544. helm/clients/openai_client.py +308 -43
  545. helm/clients/openai_responses_client.py +174 -0
  546. helm/clients/palmyra_client.py +3 -9
  547. helm/clients/reka_client.py +3 -3
  548. helm/clients/stanfordhealthcare_azure_openai_client.py +58 -0
  549. helm/clients/stanfordhealthcare_claude_client.py +31 -0
  550. helm/clients/stanfordhealthcare_google_client.py +43 -0
  551. helm/clients/stanfordhealthcare_http_model_client.py +93 -0
  552. helm/clients/stanfordhealthcare_openai_client.py +62 -0
  553. helm/clients/stanfordhealthcare_shc_openai_client.py +42 -0
  554. helm/clients/test_client.py +1 -1
  555. helm/clients/test_together_client.py +6 -1
  556. helm/clients/together_client.py +76 -9
  557. helm/clients/upstage_client.py +23 -0
  558. helm/clients/vertexai_client.py +45 -13
  559. helm/clients/vision_language/huggingface_vision2seq_client.py +6 -4
  560. helm/clients/vision_language/huggingface_vlm_client.py +2 -2
  561. helm/clients/vision_language/idefics_client.py +6 -2
  562. helm/clients/vision_language/open_flamingo/__init__.py +2 -2
  563. helm/clients/vision_language/open_flamingo/src/factory.py +3 -3
  564. helm/clients/vision_language/open_flamingo/src/flamingo.py +2 -2
  565. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +2 -2
  566. helm/clients/vision_language/paligemma_client.py +2 -2
  567. helm/clients/vision_language/qwen2_vlm_client.py +188 -0
  568. helm/clients/vision_language/qwen_vlm_client.py +7 -5
  569. helm/clients/vllm_client.py +4 -6
  570. helm/clients/writer_client.py +102 -0
  571. helm/clients/yi_client.py +0 -3
  572. helm/common/audio_utils.py +111 -0
  573. helm/common/context.py +80 -0
  574. helm/common/credentials_utils.py +5 -5
  575. helm/common/file_caches/local_file_cache.py +1 -1
  576. helm/common/file_caches/test_local_file_cache.py +1 -1
  577. helm/common/general.py +9 -2
  578. helm/common/hierarchical_logger.py +46 -3
  579. helm/common/images_utils.py +2 -2
  580. helm/common/local_context.py +140 -0
  581. helm/common/media_object.py +2 -2
  582. helm/common/multimodal_request_utils.py +26 -0
  583. helm/common/reeval_parameters.py +12 -0
  584. helm/common/remote_context.py +61 -0
  585. helm/common/request.py +14 -2
  586. helm/common/response_format.py +18 -0
  587. helm/common/test_media_object.py +1 -1
  588. helm/config/model_deployments.yaml +1792 -28
  589. helm/config/model_metadata.yaml +1606 -51
  590. helm/config/tokenizer_configs.yaml +521 -4
  591. helm/proxy/cli.py +5 -3
  592. helm/proxy/critique/mechanical_turk_utils.py +1 -1
  593. helm/proxy/example_queries.py +1 -1
  594. helm/proxy/server.py +11 -4
  595. helm/proxy/services/remote_service.py +1 -1
  596. helm/proxy/services/server_service.py +22 -86
  597. helm/proxy/services/test_remote_service.py +2 -2
  598. helm/proxy/services/test_service.py +1 -1
  599. helm/proxy/static/general.js +122 -0
  600. helm/proxy/static/help.html +99 -0
  601. helm/proxy/static/index.css +57 -0
  602. helm/proxy/static/index.html +40 -0
  603. helm/proxy/static/index.js +456 -0
  604. helm/proxy/static/info-icon.png +0 -0
  605. helm/proxy/test_retry.py +1 -1
  606. helm/proxy/token_counters/auto_token_counter.py +1 -1
  607. helm/tokenizers/aleph_alpha_tokenizer.py +1 -1
  608. helm/tokenizers/caching_tokenizer.py +2 -30
  609. helm/tokenizers/grok_tokenizer.py +53 -0
  610. helm/tokenizers/http_model_tokenizer.py +1 -1
  611. helm/tokenizers/huggingface_tokenizer.py +3 -3
  612. helm/tokenizers/lit_gpt_tokenizer.py +1 -1
  613. helm/tokenizers/test_anthropic_tokenizer.py +6 -2
  614. helm/tokenizers/test_grok_tokenizer.py +33 -0
  615. helm/tokenizers/test_huggingface_tokenizer.py +1 -1
  616. helm/tokenizers/test_yalm_tokenizer.py +1 -1
  617. helm/tokenizers/tiktoken_tokenizer.py +1 -1
  618. helm/tokenizers/tokenizer.py +3 -1
  619. helm/tokenizers/yalm_tokenizer.py +3 -3
  620. helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py +1 -1
  621. crfm_helm-0.5.4.dist-info/METADATA +0 -350
  622. crfm_helm-0.5.4.dist-info/RECORD +0 -697
  623. helm/benchmark/metrics/bhasa_metrics_specs.py +0 -10
  624. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  625. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  626. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  627. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  628. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  629. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  630. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  631. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  632. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  633. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  634. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  635. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  636. helm/benchmark/static_build/assets/index-05c76bb1.css +0 -1
  637. helm/benchmark/static_build/assets/index-3ee38b3d.js +0 -10
  638. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  639. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  640. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  641. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  642. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  643. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  644. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  645. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  646. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  647. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  648. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  649. helm/tokenizers/anthropic_tokenizer.py +0 -52
  650. {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info}/entry_points.txt +0 -0
  651. {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info/licenses}/LICENSE +0 -0
  652. {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.6.dist-info}/top_level.txt +0 -0
@@ -18,7 +18,7 @@ models:
18
18
  access: open
19
19
  release_date: 2023-01-01
20
20
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
21
-
21
+
22
22
  # Adobe
23
23
  - name: adobe/giga-gan
24
24
  display_name: GigaGAN (1B)
@@ -128,7 +128,7 @@ models:
128
128
 
129
129
  # AI Singapore
130
130
  - name: aisingapore/sea-lion-7b
131
- display_name: SEA-LION (7B)
131
+ display_name: SEA-LION 7B
132
132
  description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
133
133
  creator_organization_name: AI Singapore
134
134
  access: open
@@ -137,7 +137,7 @@ models:
137
137
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
138
138
 
139
139
  - name: aisingapore/sea-lion-7b-instruct
140
- display_name: SEA-LION Instruct (7B)
140
+ display_name: SEA-LION 7B Instruct
141
141
  description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
142
142
  creator_organization_name: AI Singapore
143
143
  access: open
@@ -146,23 +146,77 @@ models:
146
146
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
147
147
 
148
148
  - name: aisingapore/llama3-8b-cpt-sea-lionv2-base
149
- display_name: Llama 3 CPT SEA-Lion v2 (8B)
150
- description: Llama 3 CPT SEA-Lion v2 (8B) is a multilingual model which was continued pre-trained on 48B additional tokens, including tokens in Southeast Asian languages.
149
+ display_name: Llama3 8B CPT SEA-LIONv2
150
+ description: Llama3 8B CPT SEA-LIONv2 is a multilingual model which was continued pre-trained on 48B additional tokens, including tokens in Southeast Asian languages.
151
151
  creator_organization_name: AI Singapore
152
152
  access: open
153
- num_parameters: 80300000000
153
+ num_parameters: 8030000000
154
154
  release_date: 2024-07-31
155
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
155
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
156
156
 
157
157
  - name: aisingapore/llama3-8b-cpt-sea-lionv2.1-instruct
158
- display_name: Llama 3 CPT SEA-Lion v2.1 Instruct (8B)
159
- description: Llama 3 CPT SEA-Lion v2.1 Instruct (8B) is a multilingual model which has been fine-tuned with around 100,000 English instruction-completion pairs alongside a smaller pool of around 50,000 instruction-completion pairs from other Southeast Asian languages, such as Indonesian, Thai and Vietnamese.
158
+ display_name: Llama3 8B CPT SEA-LIONv2.1 Instruct
159
+ description: Llama3 8B CPT SEA-LIONv2.1 Instruct is a multilingual model which has been fine-tuned with around 100,000 English instruction-completion pairs alongside a smaller pool of around 50,000 instruction-completion pairs from other Southeast Asian languages, such as Indonesian, Thai and Vietnamese.
160
160
  creator_organization_name: AI Singapore
161
161
  access: open
162
- num_parameters: 80300000000
162
+ num_parameters: 8030000000
163
163
  release_date: 2024-08-21
164
164
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
165
165
 
166
+ - name: aisingapore/gemma2-9b-cpt-sea-lionv3-base
167
+ display_name: Gemma2 9B CPT SEA-LIONv3
168
+ description: Gemma2 9B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across the 11 official Southeast Asian languages, such as English, Chinese, Vietnamese, Indonesian, Thai, Tamil, Filipino, Malay, Khmer, Lao, Burmese.
169
+ creator_organization_name: AI Singapore
170
+ access: open
171
+ num_parameters: 9240000000
172
+ release_date: 2024-10-30
173
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
174
+
175
+ - name: aisingapore/gemma2-9b-cpt-sea-lionv3-instruct
176
+ display_name: Gemma2 9B CPT SEA-LIONv3 Instruct
177
+ description: Gemma2 9B CPT SEA-LIONv3 Instruct is a multilingual model which has been fine-tuned with around 500,000 English instruction-completion pairs alongside a larger pool of around 1,000,000 instruction-completion pairs from other ASEAN languages, such as Indonesian, Thai and Vietnamese.
178
+ creator_organization_name: AI Singapore
179
+ access: open
180
+ num_parameters: 9240000000
181
+ release_date: 2024-10-30
182
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
183
+
184
+ - name: aisingapore/llama3.1-8b-cpt-sea-lionv3-base
185
+ display_name: Llama3.1 8B CPT SEA-LIONv3
186
+ description: Llama3.1 8B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across 11 SEA languages, such as Burmese, Chinese, English, Filipino, Indonesia, Khmer, Lao, Malay, Tamil, Thai and Vietnamese.
187
+ creator_organization_name: AI Singapore
188
+ access: open
189
+ num_parameters: 9240000000
190
+ release_date: 2024-12-11
191
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
192
+
193
+ - name: aisingapore/llama3.1-8b-cpt-sea-lionv3-instruct
194
+ display_name: Llama3.1 8B CPT SEA-LIONv3 Instruct
195
+ description: Llama3.1 8B CPT SEA-LIONv3 Instruct is a multilingual model that has been fine-tuned in two stages on approximately 12.3M English instruction-completion pairs alongside a pool of 4.5M Southeast Asian instruction-completion pairs from SEA languages such as Indonesian, Javanese, Sundanese, Tamil, Thai and Vietnamese.
196
+ creator_organization_name: AI Singapore
197
+ access: open
198
+ num_parameters: 9240000000
199
+ release_date: 2024-12-11
200
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
201
+
202
+ - name: aisingapore/llama3.1-70b-cpt-sea-lionv3-base
203
+ display_name: Llama3.1 70B CPT SEA-LIONv3
204
+ description: Llama3.1 70B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across 11 SEA languages, such as Burmese, Chinese, English, Filipino, Indonesia, Khmer, Lao, Malay, Tamil, Thai and Vietnamese.
205
+ creator_organization_name: AI Singapore
206
+ access: open
207
+ num_parameters: 70600000000
208
+ release_date: 2024-12-11
209
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
210
+
211
+ - name: aisingapore/llama3.1-70b-cpt-sea-lionv3-instruct
212
+ display_name: Llama3.1 70B CPT SEA-LIONv3 Instruct
213
+ description: Llama3.1 70B CPT SEA-LIONv3 Instruct is a multilingual model that has been fine-tuned in two stages on approximately 12.3M English instruction-completion pairs alongside a pool of 4.5M Southeast Asian instruction-completion pairs from SEA languages such as Indonesian, Javanese, Sundanese, Tamil, Thai, and Vietnamese.
214
+ creator_organization_name: AI Singapore
215
+ access: open
216
+ num_parameters: 70600000000
217
+ release_date: 2024-12-11
218
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
219
+
166
220
  # Aleph Alpha
167
221
  # Aleph Alpha's Luminous models: https://docs.aleph-alpha.com/docs/introduction/luminous
168
222
  # TODO: add Luminous World when it's released
@@ -219,7 +273,42 @@ models:
219
273
  tags: [TEXT_TO_IMAGE_MODEL_TAG]
220
274
 
221
275
 
222
- # Amazon
276
+ # Amazon Nova models
277
+ # References for Amazon Nova models:
278
+ # https://aws.amazon.com/ai/generative-ai/nova/
279
+ - name: amazon/nova-premier-v1:0
280
+ display_name: Amazon Nova Premier
281
+ description: Amazon Nova Premier is the most capable model in the Nova family of foundation models. ([blog](https://aws.amazon.com/blogs/aws/amazon-nova-premier-our-most-capable-model-for-complex-tasks-and-teacher-for-model-distillation/))
282
+ creator_organization_name: Amazon
283
+ access: limited
284
+ release_date: 2025-04-30
285
+ tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
286
+
287
+ - name: amazon/nova-pro-v1:0
288
+ display_name: Amazon Nova Pro
289
+ description: Amazon Nova Pro Model
290
+ creator_organization_name: Amazon
291
+ access: limited
292
+ release_date: 2024-12-03
293
+ tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
294
+
295
+ - name: amazon/nova-lite-v1:0
296
+ display_name: Amazon Nova Lite
297
+ description: Amazon Nova Lite Model
298
+ creator_organization_name: Amazon
299
+ access: limited
300
+ release_date: 2024-12-03
301
+ tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
302
+
303
+ - name: amazon/nova-micro-v1:0
304
+ display_name: Amazon Nova Micro
305
+ description: Amazon Nova Micro Model
306
+ creator_organization_name: Amazon
307
+ access: limited
308
+ release_date: 2024-12-03
309
+ tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
310
+
311
+ # Titan Models
223
312
  # References for Amazon Titan models:
224
313
  # - https://aws.amazon.com/bedrock/titan/
225
314
  # - https://community.aws/content/2ZUVD3fkNtqEOYIa2iUJAFArS7c/family-of-titan-text-models---cli-demo
@@ -230,16 +319,8 @@ models:
230
319
  creator_organization_name: Amazon
231
320
  access: limited
232
321
  release_date: 2023-11-29
233
- tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
234
-
235
- - name: amazon/titan-tg1-large
236
- display_name: Amazon Titan Large
237
- description: Amazon Titan Large is efficient model perfect for fine-tuning English-language tasks like summarization, create article, marketing campaign.
238
- creator_organization_name: Amazon
239
- access: limited
240
- release_date: 2023-11-29
241
- tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
242
-
322
+ tags: [BEDROCK_MODEL_TAG,TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
323
+
243
324
  - name: amazon/titan-text-express-v1
244
325
  display_name: Amazon Titan Text Express
245
326
  description: Amazon Titan Text Express, with a context length of up to 8,000 tokens, excels in advanced language tasks like open-ended text generation and conversational chat. It's also optimized for Retrieval Augmented Generation (RAG). Initially designed for English, the model offers preview multilingual support for over 100 additional languages.
@@ -248,6 +329,93 @@ models:
248
329
  release_date: 2023-11-29
249
330
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
250
331
 
332
+ # Mistral Models on Bedrock
333
+ # References for Mistral on Amazon Bedrock
334
+ # https://aws.amazon.com/bedrock/mistral/
335
+
336
+ - name: mistralai/amazon-mistral-7b-instruct-v0:2
337
+ display_name: Mistral 7B Instruct on Amazon Bedrock
338
+ description: A 7B dense Transformer, fast-deployed and easily customisable. Small, yet powerful for a variety of use cases. Supports English and code, and a 32k context window.
339
+ creator_organization_name: Mistral
340
+ access: limited
341
+ release_date: 2024-03-23
342
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
343
+
344
+ - name: mistralai/amazon-mixtral-8x7b-instruct-v0:1
345
+ display_name: Mixtral 8x7B Instruct on Amazon Bedrock
346
+ description: A 7B sparse Mixture-of-Experts model with stronger capabilities than Mistral 7B. Uses 12B active parameters out of 45B total. Supports multiple languages, code and 32k context window.
347
+ creator_organization_name: Mistral
348
+ access: limited
349
+ release_date: 2023-12-11
350
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
351
+
352
+ - name: mistralai/amazon-mistral-large-2402-v1:0
353
+ display_name: Mistral Large(2402) on Amazon Bedrock
354
+ description: The most advanced Mistral AI Large Language model capable of handling any language task including complex multilingual reasoning, text understanding, transformation, and code generation.
355
+ creator_organization_name: Mistral
356
+ access: limited
357
+ release_date: 2023-07-26
358
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
359
+
360
+ - name: mistralai/amazon-mistral-small-2402-v1:0
361
+ display_name: Mistral Small on Amazon Bedrock
362
+ description: Mistral Small is perfectly suited for straightforward tasks that can be performed in bulk, such as classification, customer support, or text generation. It provides outstanding performance at a cost-effective price point.
363
+ creator_organization_name: Mistral
364
+ access: limited
365
+ release_date: 2023-02-26
366
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
367
+
368
+ - name: mistralai/amazon-mistral-large-2407-v1:0
369
+ display_name: Mistral Large(2407) on Amazon Bedrock
370
+ description: Mistral Large 2407 is an advanced Large Language Model (LLM) that supports dozens of languages and is trained on 80+ coding languages. It has best-in-class agentic capabilities with native function calling JSON outputting and reasoning capabilities.
371
+ creator_organization_name: Mistral
372
+ access: limited
373
+ release_date: 2024-07-24
374
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
375
+
376
+ # Llama3 on Amazon Bedrock
377
+ # References for Llama3 on Amazon Bedrock
378
+ # https://aws.amazon.com/bedrock/llama/
379
+
380
+ - name: meta/amazon-llama3-8b-instruct-v1:0
381
+ display_name: Llama 3 8B Instruct on Amazon Bedrock
382
+ description: Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for limited computational power and resources, edge devices, and faster training times.
383
+ creator_organization_name: Meta
384
+ access: limited
385
+ release_date: 2024-04-23
386
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
387
+
388
+ - name: meta/amazon-llama3-70b-instruct-v1:0
389
+ display_name: Llama 3 70B Instruct on Amazon Bedrock
390
+ description: Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for content creation, conversational AI, language understanding, R&D, and Enterprise applications.
391
+ creator_organization_name: Meta
392
+ access: limited
393
+ release_date: 2024-04-23
394
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
395
+
396
+ - name: meta/amazon-llama3-1-405b-instruct-v1:0
397
+ display_name: Llama 3.1 405b Instruct on Amazon Bedrock.
398
+ description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
399
+ creator_organization_name: Meta
400
+ access: limited
401
+ release_date: 2024-07-26
402
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
403
+
404
+ - name: meta/amazon-llama3-1-70b-instruct-v1:0
405
+ display_name: Llama 3.1 70b Instruct on Amazon Bedrock.
406
+ description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
407
+ creator_organization_name: Meta
408
+ access: limited
409
+ release_date: 2024-07-26
410
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
411
+
412
+ - name: meta/amazon-llama3-1-8b-instruct-v1:0
413
+ display_name: Llama 3.1 8b Instruct on Amazon Bedrock.
414
+ description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
415
+ creator_organization_name: Meta
416
+ access: limited
417
+ release_date: 2024-07-26
418
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
251
419
 
252
420
  # Anthropic
253
421
  - name: anthropic/claude-v1.3
@@ -315,6 +483,14 @@ models:
315
483
  release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
316
484
  tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
317
485
 
486
+ - name: anthropic/claude-3-5-haiku-20241022
487
+ display_name: Claude 3.5 Haiku (20241022)
488
+ description: Claude 3.5 Haiku is a Claude 3 family model which matches the performance of Claude 3 Opus at a similar speed to the previous generation of Haiku ([blog](https://www.anthropic.com/news/3-5-models-and-computer-use)).
489
+ creator_organization_name: Anthropic
490
+ access: limited
491
+ release_date: 2024-11-04 # Released after the blog post
492
+ tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
493
+
318
494
  - name: anthropic/claude-3-5-sonnet-20240620
319
495
  display_name: Claude 3.5 Sonnet (20240620)
320
496
  description: Claude 3.5 Sonnet is a Claude 3 family model which outperforms Claude 3 Opus while operating faster and at a lower cost. ([blog](https://www.anthropic.com/news/claude-3-5-sonnet))
@@ -323,6 +499,62 @@ models:
323
499
  release_date: 2024-06-20
324
500
  tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
325
501
 
502
+ - name: anthropic/claude-3-5-sonnet-20241022
503
+ display_name: Claude 3.5 Sonnet (20241022)
504
+ description: Claude 3.5 Sonnet is a Claude 3 family model which outperforms Claude 3 Opus while operating faster and at a lower cost ([blog](https://www.anthropic.com/news/claude-3-5-sonnet)). This is an upgraded snapshot released on 2024-10-22 ([blog](https://www.anthropic.com/news/3-5-models-and-computer-use)).
505
+ creator_organization_name: Anthropic
506
+ access: limited
507
+ release_date: 2024-10-22
508
+ tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
509
+
510
+ - name: anthropic/claude-3-7-sonnet-20250219
511
+ display_name: Claude 3.7 Sonnet (20250219)
512
+ description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)).
513
+ creator_organization_name: Anthropic
514
+ access: limited
515
+ release_date: 2025-02-24
516
+ tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
517
+
518
+ - name: anthropic/claude-3-7-sonnet-20250219-thinking-10k
519
+ display_name: Claude 3.7 Sonnet (20250219, extended thinking)
520
+ description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)). Extended thinking is enabled with 10k budget tokens.
521
+ creator_organization_name: Anthropic
522
+ access: limited
523
+ release_date: 2025-02-24
524
+ tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
525
+
526
+ - name: anthropic/claude-sonnet-4-20250514
527
+ display_name: Claude 4 Sonnet (20250514)
528
+ description: Claude 4 Sonnet is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)).
529
+ creator_organization_name: Anthropic
530
+ access: limited
531
+ release_date: 2025-05-14
532
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
533
+
534
+ - name: anthropic/claude-sonnet-4-20250514-thinking-10k
535
+ display_name: Claude 4 Sonnet (20250514, extended thinking)
536
+ description: Claude 4 Sonnet is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)). Extended thinking is enabled with 10k budget tokens.
537
+ creator_organization_name: Anthropic
538
+ access: limited
539
+ release_date: 2025-05-14
540
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
541
+
542
+ - name: anthropic/claude-opus-4-20250514
543
+ display_name: Claude 4 Opus (20250514)
544
+ description: Claude 4 Opus is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)).
545
+ creator_organization_name: Anthropic
546
+ access: limited
547
+ release_date: 2025-05-14
548
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
549
+
550
+ - name: anthropic/claude-opus-4-20250514-thinking-10k
551
+ display_name: Claude 4 Opus (20250514, extended thinking)
552
+ description: Claude 4 Opus is a hybrid model offering two modes - near-instant responses and extended thinking for deeper reasoning ([blog](https://www.anthropic.com/news/claude-4)). Extended thinking is enabled with 10k budget tokens.
553
+ creator_organization_name: Anthropic
554
+ access: limited
555
+ release_date: 2025-05-14
556
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
557
+
326
558
  - name: anthropic/stanford-online-all-v4-s3
327
559
  display_name: Anthropic-LM v4-s3 (52B)
328
560
  description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
@@ -342,7 +574,7 @@ models:
342
574
  access: open
343
575
  num_parameters: 13000000000
344
576
  release_date: 2022-04-03
345
- tags: [] # TODO: add tags
577
+ tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
346
578
 
347
579
 
348
580
 
@@ -363,7 +595,7 @@ models:
363
595
  access: open
364
596
  num_parameters: 176000000000
365
597
  release_date: 2022-11-03
366
- tags: [] # TODO: add tags
598
+ tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
367
599
 
368
600
  - name: bigscience/t0pp
369
601
  display_name: T0pp (11B)
@@ -418,7 +650,7 @@ models:
418
650
  access: limited
419
651
  num_parameters: 6700000000
420
652
  release_date: 2023-04-06
421
- tags: [] # TODO: add tags
653
+ tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
422
654
 
423
655
  - name: cerebras/cerebras-gpt-13b # NOT SUPPORTED
424
656
  display_name: Cerebras GPT (13B)
@@ -427,7 +659,7 @@ models:
427
659
  access: limited
428
660
  num_parameters: 13000000000
429
661
  release_date: 2023-04-06
430
- tags: [] # TODO: add tags
662
+ tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
431
663
 
432
664
 
433
665
 
@@ -644,7 +876,7 @@ models:
644
876
  access: closed
645
877
  num_parameters: 280000000000
646
878
  release_date: 2021-12-08
647
- tags: [] # TODO: add tags
879
+ tags: [UNSUPPORTED_MODEL_TAG]
648
880
 
649
881
  - name: deepmind/chinchilla # NOT SUPPORTED
650
882
  display_name: Chinchilla (70B)
@@ -653,7 +885,7 @@ models:
653
885
  access: closed
654
886
  num_parameters: 70000000000
655
887
  release_date: 2022-03-31
656
- tags: [] # TODO: add tags
888
+ tags: [UNSUPPORTED_MODEL_TAG]
657
889
 
658
890
 
659
891
  # Deepseek
@@ -666,6 +898,62 @@ models:
666
898
  release_date: 2024-01-05
667
899
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
668
900
 
901
+ - name: deepseek-ai/deepseek-v3
902
+ display_name: DeepSeek v3
903
+ description: DeepSeek v3 a Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. It adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures. ([paper](https://github.com/deepseek-ai/DeepSeek-V3/blob/main/DeepSeek_V3.pdf))
904
+ creator_organization_name: DeepSeek
905
+ access: open
906
+ # NOTE: The total size of DeepSeek-V3 models on HuggingFace is 685B, which includes 671B of the Main Model weights and 14B of the Multi-Token Prediction (MTP) Module weights.
907
+ num_parameters: 685000000000
908
+ release_date: 2024-12-24
909
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
910
+
911
+ - name: deepseek-ai/deepseek-r1
912
+ display_name: DeepSeek R1
913
+ description: DeepSeek R1 is DeepSeek's first-generation reasoning model which incoporates which incorporates multi-stage training and cold-start data before RL. ([paper](https://arxiv.org/abs/2501.12948))
914
+ creator_organization_name: DeepSeek
915
+ access: open
916
+ # NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
917
+ num_parameters: 685000000000
918
+ release_date: 2025-01-20
919
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
920
+
921
+ - name: deepseek-ai/deepseek-r1-hide-reasoning
922
+ display_name: DeepSeek R1 (hide reasoning)
923
+ description: DeepSeek R1 is DeepSeek's first-generation reasoning model which incoporates which incorporates multi-stage training and cold-start data before RL. ([paper](https://arxiv.org/abs/2501.12948)) The reasoning tokens are hidden from the output of the model.
924
+ creator_organization_name: DeepSeek
925
+ access: open
926
+ # NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
927
+ num_parameters: 685000000000
928
+ release_date: 2025-01-20
929
+ tags: [DEPRECATED_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
930
+
931
+ - name: deepseek-ai/deepseek-r1-0528
932
+ display_name: DeepSeek-R1-0528
933
+ description: DeepSeek-R1-0528 is a minor version upgrade from DeepSeek R1 that has improved its depth of reasoning and inference capabilities by leveraging increased computational resources and introducing algorithmic optimization mechanisms during post-training. ([paper](https://arxiv.org/abs/2501.12948))
934
+ creator_organization_name: DeepSeek
935
+ access: open
936
+ num_parameters: 685000000000
937
+ release_date: 2025-05-28
938
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
939
+
940
+ - name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
941
+ display_name: DeepSeek-R1-Distill-Llama-8b
942
+ description: DeepSeek-R1-Distill-Llama-8b is a model that is distilled from LLaMA 8B model for the DeepSeek-R1 task.
943
+ creator_organization_name: DeepSeek
944
+ access: open
945
+ num_parameters: 8000000000
946
+ release_date: 2025-01-20
947
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
948
+
949
+ - name: deepseek-ai/deepseek-coder-6.7b-instruct
950
+ display_name: DeepSeek-Coder-6.7b-Instruct
951
+ description: DeepSeek-Coder-6.7b-Instruct is a model that is fine-tuned from the LLaMA 6.7B model for the DeepSeek-Coder task.
952
+ creator_organization_name: DeepSeek
953
+ access: open
954
+ num_parameters: 6740000000
955
+ release_date: 2025-01-20
956
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
669
957
 
670
958
  # EleutherAI
671
959
  - name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together.
@@ -769,7 +1057,7 @@ models:
769
1057
  access: closed
770
1058
  num_parameters: 540000000000
771
1059
  release_date: 2023-03-01 # was first announced on 2022-04 but remained private.
772
- tags: [] # TODO: add tags
1060
+ tags: [UNSUPPORTED_MODEL_TAG]
773
1061
 
774
1062
  # Note: This is aliased to a snapshot of gemini-pro. When possible, please use a versioned snapshot instead.
775
1063
  - name: google/gemini-pro
@@ -819,7 +1107,7 @@ models:
819
1107
  creator_organization_name: Google
820
1108
  access: limited
821
1109
  release_date: 2024-05-24
822
- tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1110
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
823
1111
 
824
1112
  - name: google/gemini-1.5-flash-001
825
1113
  display_name: Gemini 1.5 Flash (001)
@@ -827,7 +1115,7 @@ models:
827
1115
  creator_organization_name: Google
828
1116
  access: limited
829
1117
  release_date: 2024-05-24
830
- tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1118
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
831
1119
 
832
1120
  - name: google/gemini-1.5-pro-preview-0409
833
1121
  display_name: Gemini 1.5 Pro (0409 preview)
@@ -885,6 +1173,142 @@ models:
885
1173
  release_date: 2024-05-24
886
1174
  tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
887
1175
 
1176
+ - name: google/gemini-1.5-pro-002
1177
+ display_name: Gemini 1.5 Pro (002)
1178
+ description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
1179
+ creator_organization_name: Google
1180
+ access: limited
1181
+ release_date: 2024-09-24
1182
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1183
+
1184
+ - name: google/gemini-1.5-flash-002
1185
+ display_name: Gemini 1.5 Flash (002)
1186
+ description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
1187
+ creator_organization_name: Google
1188
+ access: limited
1189
+ release_date: 2024-09-24
1190
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1191
+
1192
+ - name: google/gemini-2.0-flash-exp
1193
+ display_name: Gemini 2.0 Flash (Experimental)
1194
+ description: Gemini 2.0 Flash (Experimental) is a Gemini model that supports multimodal inputs like images, video and audio, as well as multimodal output like natively generated images mixed with text and steerable text-to-speech (TTS) multilingual audio. ([blog](https://blog.google/technology/google-deepmind/google-gemini-ai-update-december-2024/#gemini-2-0-flash))
1195
+ creator_organization_name: Google
1196
+ access: limited
1197
+ release_date: 2024-12-11
1198
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1199
+
1200
+ - name: google/gemini-1.5-flash-8b-001
1201
+ display_name: Gemini 1.5 Flash 8B
1202
+ description: Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks. ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1203
+ creator_organization_name: Google
1204
+ access: limited
1205
+ release_date: 2024-10-01
1206
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1207
+
1208
+ - name: google/gemini-2.0-flash-001
1209
+ display_name: Gemini 2.0 Flash
1210
+ description: Gemini 2.0 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1211
+ creator_organization_name: Google
1212
+ access: limited
1213
+ release_date: 2025-02-01
1214
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1215
+
1216
+ - name: google/gemini-2.0-flash-lite-preview-02-05
1217
+ display_name: Gemini 2.0 Flash Lite (02-05 preview)
1218
+ description: Gemini 2.0 Flash Lite (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1219
+ creator_organization_name: Google
1220
+ access: limited
1221
+ release_date: 2025-02-05
1222
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1223
+
1224
+ - name: google/gemini-2.0-flash-lite-001
1225
+ display_name: Gemini 2.0 Flash Lite
1226
+ description: Gemini 2.0 Flash Lite ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1227
+ creator_organization_name: Google
1228
+ access: limited
1229
+ release_date: 2025-03-25
1230
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1231
+
1232
+ - name: google/gemini-2.0-flash-thinking-exp-01-21
1233
+ display_name: Gemini 2.0 Flash Thinking (01-21 preview)
1234
+ description: Gemini 2.0 Flash Thinking (01-21 preview) ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/thinking))
1235
+ creator_organization_name: Google
1236
+ access: limited
1237
+ release_date: 2025-01-21
1238
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1239
+
1240
+ - name: google/gemini-2.0-pro-exp-02-05
1241
+ display_name: Gemini 2.0 Pro (02-05 preview)
1242
+ description: Gemini 2.0 Pro (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1243
+ creator_organization_name: Google
1244
+ access: limited
1245
+ release_date: 2025-02-05
1246
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1247
+
1248
+ - name: google/gemini-2.5-flash-lite-preview-06-17
1249
+ display_name: Gemini 2.5 Flash-Lite (06-17 preview)
1250
+ description: Gemini 2.5 Flash-Lite (06-17 preview) ([blog](https://blog.google/products/gemini/gemini-2-5-model-family-expands/))
1251
+ creator_organization_name: Google
1252
+ access: limited
1253
+ release_date: 2025-06-17
1254
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1255
+
1256
+ - name: google/gemini-2.5-flash-preview-04-17
1257
+ display_name: Gemini 2.5 Flash (04-17 preview)
1258
+ description: Gemini 2.5 Flash (04-17 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1259
+ creator_organization_name: Google
1260
+ access: limited
1261
+ release_date: 2025-04-17
1262
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1263
+
1264
+ - name: google/gemini-2.5-flash-preview-05-20
1265
+ display_name: Gemini 2.5 Flash (05-20 preview)
1266
+ description: Gemini 2.5 Flash (05-20 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1267
+ creator_organization_name: Google
1268
+ access: limited
1269
+ release_date: 2025-04-17
1270
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1271
+
1272
+ - name: google/gemini-2.5-flash
1273
+ display_name: Gemini 2.5 Flash
1274
+ description: Gemini 2.5 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1275
+ creator_organization_name: Google
1276
+ access: limited
1277
+ release_date: 2025-06-17
1278
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1279
+
1280
+ - name: google/gemini-2.5-pro-exp-03-25
1281
+ display_name: Gemini 2.5 Pro (03-25 experimental)
1282
+ description: Gemini 2.5 Pro (03-25 experimental) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1283
+ creator_organization_name: Google
1284
+ access: limited
1285
+ release_date: 2025-03-25
1286
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1287
+
1288
+ - name: google/gemini-2.5-pro-preview-03-25
1289
+ display_name: Gemini 2.5 Pro (03-25 preview)
1290
+ description: Gemini 2.5 Pro (03-25 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1291
+ creator_organization_name: Google
1292
+ access: limited
1293
+ release_date: 2025-04-09 # source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
1294
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1295
+
1296
+ - name: google/gemini-2.5-pro-preview-05-06
1297
+ display_name: Gemini 2.5 Pro (05-06 preview)
1298
+ description: Gemini 2.5 Pro (05-06 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1299
+ creator_organization_name: Google
1300
+ access: limited
1301
+ release_date: 2025-05-06 # source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro
1302
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1303
+
1304
+ - name: google/gemini-2.5-pro
1305
+ display_name: Gemini 2.5 Pro
1306
+ description: Gemini 2.5 Pro ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1307
+ creator_organization_name: Google
1308
+ access: limited
1309
+ release_date: 2025-06-17
1310
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1311
+
888
1312
  - name: google/gemma-2b
889
1313
  display_name: Gemma (2B)
890
1314
  description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
@@ -1083,6 +1507,60 @@ models:
1083
1507
  release_date: 2023-08-22
1084
1508
  tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, IDEFICS_INSTRUCT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
1085
1509
 
1510
+ - name: huggingface/smollm2-135m
1511
+ display_name: SmolLM2 (135M)
1512
+ description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
1513
+ creator_organization_name: HuggingFace
1514
+ access: open
1515
+ num_parameters: 135000000
1516
+ release_date: 2024-10-31
1517
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
1518
+
1519
+ - name: huggingface/smollm2-360m
1520
+ display_name: SmolLM2 (360M)
1521
+ description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
1522
+ creator_organization_name: HuggingFace
1523
+ access: open
1524
+ num_parameters: 362000000
1525
+ release_date: 2024-10-31
1526
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
1527
+
1528
+ - name: huggingface/smollm2-1.7b
1529
+ display_name: SmolLM2 (1.7B)
1530
+ description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
1531
+ creator_organization_name: HuggingFace
1532
+ access: open
1533
+ num_parameters: 1710000000
1534
+ release_date: 2024-10-31
1535
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
1536
+
1537
+ - name: huggingface/smollm2-135m-instruct
1538
+ display_name: SmolLM2 Instruct (135M)
1539
+ description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
1540
+ creator_organization_name: HuggingFace
1541
+ access: open
1542
+ num_parameters: 135000000
1543
+ release_date: 2024-10-31
1544
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1545
+
1546
+ - name: huggingface/smollm2-360m-instruct
1547
+ display_name: SmolLM2 Instruct (360M)
1548
+ description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
1549
+ creator_organization_name: HuggingFace
1550
+ access: open
1551
+ num_parameters: 362000000
1552
+ release_date: 2024-10-31
1553
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1554
+
1555
+ - name: huggingface/smollm2-1.7b-instruct
1556
+ display_name: SmolLM2 Instruct (1.7B)
1557
+ description: SmolLM2 is a family of compact language models that are capable of solving a wide range of tasks while being lightweight enough to run on-device. ([paper](https://arxiv.org/abs/2502.02737v1))
1558
+ creator_organization_name: HuggingFace
1559
+ access: open
1560
+ num_parameters: 1710000000
1561
+ release_date: 2024-10-31
1562
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1563
+
1086
1564
  ## Text-to-Image Diffusion Models
1087
1565
  - name: huggingface/dreamlike-diffusion-v1-0
1088
1566
  display_name: Dreamlike Diffusion v1.0 (1B)
@@ -1296,6 +1774,16 @@ models:
1296
1774
  release_date: 2023-06-22
1297
1775
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1298
1776
 
1777
+ # Marin Community
1778
+ - name: marin-community/marin-8b-instruct
1779
+ display_name: Marin 8B Instruct
1780
+ description: Marin 8B Instruct is an open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT.
1781
+ creator_organization_name: Marin Community
1782
+ access: open
1783
+ num_parameters: 8030000000
1784
+ release_date: 2025-05-15
1785
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1786
+
1299
1787
  # Meta
1300
1788
  - name: meta/opt-iml-175b # NOT SUPPORTED
1301
1789
  display_name: OPT-IML (175B)
@@ -1304,7 +1792,7 @@ models:
1304
1792
  access: open
1305
1793
  num_parameters: 175000000000
1306
1794
  release_date: 2022-12-22
1307
- tags: [] # TODO: add tags
1795
+ tags: [UNSUPPORTED_MODEL_TAG]
1308
1796
 
1309
1797
  - name: meta/opt-iml-30b # NOT SUPPORTED
1310
1798
  display_name: OPT-IML (30B)
@@ -1313,7 +1801,7 @@ models:
1313
1801
  access: open
1314
1802
  num_parameters: 30000000000
1315
1803
  release_date: 2022-12-22
1316
- tags: [] # TODO: add tags
1804
+ tags: [UNSUPPORTED_MODEL_TAG]
1317
1805
 
1318
1806
  - name: meta/opt-175b
1319
1807
  display_name: OPT (175B)
@@ -1360,7 +1848,7 @@ models:
1360
1848
  access: open
1361
1849
  num_parameters: 120000000000
1362
1850
  release_date: 2022-11-15
1363
- tags: [] # TODO: add tags
1851
+ tags: [UNSUPPORTED_MODEL_TAG]
1364
1852
 
1365
1853
  - name: meta/galactica-30b # NOT SUPPORTED
1366
1854
  display_name: Galactica (30B)
@@ -1369,7 +1857,7 @@ models:
1369
1857
  access: open
1370
1858
  num_parameters: 30000000000
1371
1859
  release_date: 2022-11-15
1372
- tags: [] # TODO: add tags
1860
+ tags: [UNSUPPORTED_MODEL_TAG]
1373
1861
 
1374
1862
  - name: meta/llama-7b
1375
1863
  display_name: LLaMA (7B)
@@ -1490,6 +1978,33 @@ models:
1490
1978
  release_date: 2024-07-18
1491
1979
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1492
1980
 
1981
+ - name: meta/llama-3.1-8b-instruct
1982
+ display_name: Llama 3.1 Instruct (8B)
1983
+ description: Llama 3.1 (8B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
1984
+ creator_organization_name: Meta
1985
+ access: open
1986
+ num_parameters: 8000000000
1987
+ release_date: 2024-07-23
1988
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1989
+
1990
+ - name: meta/llama-3.1-70b-instruct
1991
+ display_name: Llama 3.1 Instruct (70B)
1992
+ description: Llama 3.1 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
1993
+ creator_organization_name: Meta
1994
+ access: open
1995
+ num_parameters: 70000000000
1996
+ release_date: 2024-07-23
1997
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1998
+
1999
+ - name: meta/llama-3.1-405b-instruct
2000
+ display_name: Llama 3.1 Instruct (405B)
2001
+ description: Llama 3.1 (405B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
2002
+ creator_organization_name: Meta
2003
+ access: open
2004
+ num_parameters: 405000000000
2005
+ release_date: 2024-07-23
2006
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2007
+
1493
2008
  - name: meta/llama-3.1-8b-instruct-turbo
1494
2009
  display_name: Llama 3.1 Instruct Turbo (8B)
1495
2010
  description: Llama 3.1 (8B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
@@ -1517,6 +2032,15 @@ models:
1517
2032
  release_date: 2024-07-23
1518
2033
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1519
2034
 
2035
+ - name: meta/llama-3.2-1b-instruct
2036
+ display_name: Llama 3.2 Instruct (1.23B)
2037
+ description: The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned text-only generative models in 1B and 3B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/))
2038
+ creator_organization_name: Meta
2039
+ access: open
2040
+ num_parameters: 1230000000
2041
+ release_date: 2024-09-25
2042
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2043
+
1520
2044
  - name: meta/llama-3.2-3b-instruct-turbo
1521
2045
  display_name: Llama 3.2 Instruct Turbo (3B)
1522
2046
  description: The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned text-only generative models in 1B and 3B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
@@ -1533,7 +2057,7 @@ models:
1533
2057
  access: open
1534
2058
  num_parameters: 10700000000
1535
2059
  release_date: 2024-09-25
1536
- tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG. LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2060
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1537
2061
 
1538
2062
  - name: meta/llama-3.2-90b-vision-instruct-turbo
1539
2063
  display_name: Llama 3.2 Vision Instruct Turbo (90B)
@@ -1542,7 +2066,43 @@ models:
1542
2066
  access: open
1543
2067
  num_parameters: 88600000000
1544
2068
  release_date: 2024-09-25
1545
- tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG. LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2069
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2070
+
2071
+ - name: meta/llama-3.3-70b-instruct-turbo
2072
+ display_name: Llama 3.3 Instruct Turbo (70B)
2073
+ description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
2074
+ creator_organization_name: Meta
2075
+ access: open
2076
+ num_parameters: 70000000000
2077
+ release_date: 2024-12-06
2078
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2079
+
2080
+ - name: meta/llama-3.3-70b-instruct
2081
+ display_name: Llama 3.3 Instruct (70B)
2082
+ description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
2083
+ creator_organization_name: Meta
2084
+ access: open
2085
+ num_parameters: 70000000000
2086
+ release_date: 2024-12-06
2087
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2088
+
2089
+ - name: meta/llama-4-scout-17b-16e-instruct
2090
+ display_name: Llama 4 Scout (17Bx16E) Instruct
2091
+ description: Llama 4 Scout (17Bx16E) Instruct is part of the Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences using a mixture-of-experts architecture. ([blog](https://ai.meta.com/blog/llama-4-multimodal-intelligence/))
2092
+ creator_organization_name: Meta
2093
+ access: open
2094
+ num_parameters: 109000000000
2095
+ release_date: 2025-04-05
2096
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2097
+
2098
+ - name: meta/llama-4-maverick-17b-128e-instruct-fp8
2099
+ display_name: Llama 4 Maverick (17Bx128E) Instruct FP8
2100
+ description: Llama 4 Maverick (17Bx128E) Instruct FP8 is part of the Llama 4 collection of models are natively multimodal AI models that enable text and multimodal experiences using a mixture-of-experts architecture. ([blog](https://ai.meta.com/blog/llama-4-multimodal-intelligence/))
2101
+ creator_organization_name: Meta
2102
+ access: open
2103
+ num_parameters: 402000000000
2104
+ release_date: 2025-04-05
2105
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1546
2106
 
1547
2107
  - name: meta/llama-3-8b-chat
1548
2108
  display_name: Llama 3 Instruct (8B)
@@ -1698,10 +2258,28 @@ models:
1698
2258
  num_parameters: 14000000000
1699
2259
  release_date: 2024-05-21
1700
2260
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1701
-
1702
- # KAIST AI
1703
- - name: kaistai/prometheus-vision-13b-v1.0-hf
1704
- display_name: LLaVA + Vicuna-v1.5 (13B)
2261
+
2262
+ - name: microsoft/phi-3.5-mini-instruct
2263
+ display_name: Phi-3.5-mini-instruct (3.8B)
2264
+ description: Phi-3.5-mini is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available websites. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/discover-the-new-multi-lingual-high-quality-phi-3-5-slms/4225280))
2265
+ creator_organization_name: Microsoft
2266
+ access: open
2267
+ num_parameters: 3800000000
2268
+ release_date: 2024-08-22
2269
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2270
+
2271
+ - name: microsoft/phi-3.5-moe-instruct
2272
+ display_name: Phi-3.5 MoE
2273
+ description: Phi-3.5 MoE is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available documents - with a focus on very high-quality, reasoning dense data. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/discover-the-new-multi-lingual-high-quality-phi-3-5-slms/4225280))
2274
+ creator_organization_name: Microsoft
2275
+ access: open
2276
+ num_parameters: 41900000000
2277
+ release_date: 2024-08-22
2278
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2279
+
2280
+ # KAIST AI
2281
+ - name: kaistai/prometheus-vision-13b-v1.0-hf
2282
+ display_name: LLaVA + Vicuna-v1.5 (13B)
1705
2283
  description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
1706
2284
  creator_organization_name: KAIST AI
1707
2285
  access: open
@@ -1801,6 +2379,42 @@ models:
1801
2379
  release_date: 2024-04-17
1802
2380
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1803
2381
 
2382
+ - name: allenai/olmo-2-1124-7b-instruct
2383
+ display_name: OLMo 2 7B Instruct November 2024
2384
+ description: OLMo 2 is a family of 7B and 13B models trained on up to 5T tokens. ([blog](https://allenai.org/blog/olmo2))
2385
+ creator_organization_name: Allen Institute for AI
2386
+ access: open
2387
+ num_parameters: 7300000000
2388
+ release_date: 2024-11-26
2389
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2390
+
2391
+ - name: allenai/olmo-2-1124-13b-instruct
2392
+ display_name: OLMo 2 13B Instruct November 2024
2393
+ description: OLMo 2 is a family of 7B and 13B models trained on up to 5T tokens. ([blog](https://allenai.org/blog/olmo2))
2394
+ creator_organization_name: Allen Institute for AI
2395
+ access: open
2396
+ num_parameters: 13700000000
2397
+ release_date: 2024-11-26
2398
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2399
+
2400
+ - name: allenai/olmo-2-0325-32b-instruct
2401
+ display_name: OLMo 2 32B Instruct March 2025
2402
+ description: OLMo 2 32B Instruct March 2025 is trained up to 6T tokens and post-trained using Tulu 3.1. ([blog](https://allenai.org/blog/olmo2-32B))
2403
+ creator_organization_name: Allen Institute for AI
2404
+ access: open
2405
+ num_parameters: 32200000000
2406
+ release_date: 2025-03-13
2407
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2408
+
2409
+ - name: allenai/olmoe-1b-7b-0125-instruct
2410
+ display_name: OLMoE 1B-7B Instruct January 2025
2411
+ description: OLMoE 1B-7B Instruct January 2025 is a fully open language model leveraging sparse Mixture-of-Experts (MoE). It has 7B parameters but uses only 1B per input token. It was pretrained on 5T tokens. ([blog](https://allenai.org/blog/olmoe-an-open-small-and-state-of-the-art-mixture-of-experts-model-c258432d0514), [paper](https://arxiv.org/abs/2409.02060))
2412
+ creator_organization_name: Allen Institute for AI
2413
+ access: open
2414
+ num_parameters: 32200000000
2415
+ release_date: 2025-03-13
2416
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2417
+
1804
2418
  # Mistral AI
1805
2419
  - name: mistralai/mistral-7b-v0.1
1806
2420
  display_name: Mistral v0.1 (7B)
@@ -1837,6 +2451,15 @@ models:
1837
2451
  num_parameters: 7300000000
1838
2452
  release_date: 2024-05-22
1839
2453
  tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2454
+
2455
+ - name: mistralai/mistral-7b-instruct-v0.3-hf
2456
+ display_name: Mistral Instruct v0.3 (7B)
2457
+ description: Mistral v0.3 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
2458
+ creator_organization_name: Mistral AI
2459
+ access: open
2460
+ num_parameters: 7300000000
2461
+ release_date: 2024-05-22
2462
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1840
2463
 
1841
2464
  - name: mistralai/mixtral-8x7b-32kseqlen
1842
2465
  display_name: Mixtral (8x7B 32K seqlen)
@@ -1884,6 +2507,22 @@ models:
1884
2507
  release_date: 2023-10-16
1885
2508
  tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
1886
2509
 
2510
+ - name: mistralai/ministral-3b-2410
2511
+ display_name: Ministral 3B (2402)
2512
+ description: Ministral 3B (2402) is a model for on-device computing and at-the-edge use cases ([blog](https://mistral.ai/news/ministraux/)).
2513
+ creator_organization_name: Mistral AI
2514
+ access: limited
2515
+ release_date: 2024-10-16
2516
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2517
+
2518
+ - name: mistralai/ministral-8b-2410
2519
+ display_name: Ministral 8B (2402)
2520
+ description: Ministral 8B (2402) is a model for on-device computing and at-the-edge use cases a special interleaved sliding-window attention pattern for faster and memory-efficient inference ([blog](https://mistral.ai/news/ministraux/)).
2521
+ creator_organization_name: Mistral AI
2522
+ access: open
2523
+ release_date: 2024-10-16
2524
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2525
+
1887
2526
  - name: mistralai/mistral-small-2402
1888
2527
  display_name: Mistral Small (2402)
1889
2528
  description: Mistral Small is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
@@ -1892,6 +2531,32 @@ models:
1892
2531
  release_date: 2023-02-26
1893
2532
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1894
2533
 
2534
+ - name: mistralai/mistral-small-2409
2535
+ display_name: Mistral Small (2409)
2536
+ description: Mistral Small is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
2537
+ creator_organization_name: Mistral AI
2538
+ access: limited
2539
+ release_date: 2024-09-18
2540
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2541
+
2542
+ - name: mistralai/mistral-small-2501
2543
+ display_name: Mistral Small 3 (2501)
2544
+ description: Mistral Small 3 (2501) is a pre-trained and instructed model catered to the '80%' of generative AI tasks—those that require robust language and instruction following performance, with very low latency. ([blog](https://mistral.ai/news/mistral-small-3/))
2545
+ creator_organization_name: Mistral AI
2546
+ access: open
2547
+ num_parameters: 23600000000
2548
+ release_date: 2025-01-30
2549
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2550
+
2551
+ - name: mistralai/mistral-small-2503
2552
+ display_name: Mistral Small 3.1 (2503)
2553
+ description: Mistral Small 3.1 (2503) is a model with improved text performance, multimodal understanding, and an expanded context window of up to 128k tokens. ([blog](https://mistral.ai/news/mistral-small-3-1))
2554
+ creator_organization_name: Mistral AI
2555
+ access: open
2556
+ num_parameters: 23600000000
2557
+ release_date: 2025-03-17
2558
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2559
+
1895
2560
  - name: mistralai/mistral-medium-2312
1896
2561
  display_name: Mistral Medium (2312)
1897
2562
  description: Mistral is a transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA).
@@ -1900,6 +2565,14 @@ models:
1900
2565
  release_date: 2023-12-11
1901
2566
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1902
2567
 
2568
+ - name: mistralai/mistral-medium-2505
2569
+ display_name: Mistral Medium 3 (2505)
2570
+ description: Mistral Medium 3 (2505) is a language model that is intended to to deliver state-of-the-art performance at lower cost. ([blog](https://mistral.ai/news/mistral-medium-3))
2571
+ creator_organization_name: Mistral AI
2572
+ access: limited
2573
+ release_date: 2025-05-07
2574
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2575
+
1903
2576
  - name: mistralai/mistral-large-2402
1904
2577
  display_name: Mistral Large (2402)
1905
2578
  description: Mistral Large is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
@@ -1917,6 +2590,15 @@ models:
1917
2590
  release_date: 2023-07-24
1918
2591
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1919
2592
 
2593
+ - name: mistralai/mistral-large-2411
2594
+ display_name: Mistral Large (2411)
2595
+ description: Mistral Large (2411) is a 123B parameter model that has a 128k context window. ([blog](https://mistral.ai/news/pixtral-large/))
2596
+ creator_organization_name: Mistral AI
2597
+ access: open
2598
+ num_parameters: 123000000000
2599
+ release_date: 2024-11-18
2600
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2601
+
1920
2602
  - name: mistralai/open-mistral-nemo-2407
1921
2603
  display_name: Mistral NeMo (2402)
1922
2604
  description: Mistral NeMo is a multilingual 12B model with a large context window of 128K tokens. ([blog](https://mistral.ai/news/mistral-nemo/))
@@ -1925,6 +2607,24 @@ models:
1925
2607
  release_date: 2024-07-18
1926
2608
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1927
2609
 
2610
+ - name: mistralai/pixtral-12b-2409
2611
+ display_name: Mistral Pixtral (2409)
2612
+ description: Mistral Pixtral 12B is the first multimodal Mistral model for image understanding. ([blog](https://mistral.ai/news/pixtral-12b/))
2613
+ creator_organization_name: Mistral AI
2614
+ access: open
2615
+ release_date: 2024-09-17
2616
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2617
+
2618
+ - name: mistralai/pixtral-large-2411
2619
+ display_name: Mistral Pixtral Large (2411)
2620
+ description: Mistral Pixtral Large is a 124B open-weights multimodal model built on top of Mistral Large 2 (2407). ([blog](https://mistral.ai/news/pixtral-large/))
2621
+ creator_organization_name: Mistral AI
2622
+ access: open
2623
+ num_parameters: 124000000000
2624
+ release_date: 2024-11-18
2625
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2626
+
2627
+
1928
2628
  # MosaicML
1929
2629
  - name: mosaicml/mpt-7b
1930
2630
  display_name: MPT (7B)
@@ -1942,7 +2642,7 @@ models:
1942
2642
  access: open
1943
2643
  num_parameters: 6700000000
1944
2644
  release_date: 2023-05-05
1945
- tags: [] # TODO: add tags
2645
+ tags: [UNSUPPORTED_MODEL_TAG]
1946
2646
 
1947
2647
  - name: mosaicml/mpt-instruct-7b
1948
2648
  display_name: MPT-Instruct (7B)
@@ -1969,7 +2669,7 @@ models:
1969
2669
  access: open
1970
2670
  num_parameters: 30000000000
1971
2671
  release_date: 2023-06-22
1972
- tags: [] # TODO: add tags
2672
+ tags: [UNSUPPORTED_MODEL_TAG]
1973
2673
 
1974
2674
  - name: mosaicml/mpt-instruct-30b
1975
2675
  display_name: MPT-Instruct (30B)
@@ -1981,6 +2681,27 @@ models:
1981
2681
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1982
2682
 
1983
2683
 
2684
+
2685
+ # NECTEC
2686
+ - name: nectec/Pathumma-llm-text-1.0.0
2687
+ display_name: Pathumma-llm-text-1.0.0 (7B)
2688
+ description: Pathumma-llm-text-1.0.0 (7B) is a instruction model from OpenThaiLLM-Prebuilt-7B ([blog](https://medium.com/nectec/pathummallm-v-1-0-0-release-6a098ddfe276))
2689
+ creator_organization_name: nectec
2690
+ access: open
2691
+ num_parameters: 7620000000
2692
+ release_date: 2024-10-28
2693
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2694
+
2695
+ - name: nectec/OpenThaiLLM-Prebuilt-7B
2696
+ display_name: OpenThaiLLM-Prebuilt-7B (7B)
2697
+ description: OpenThaiLLM-Prebuilt-7B (7B) is a pretrained Thai large language model with 7 billion parameters based on Qwen2.5-7B.
2698
+ creator_organization_name: nectec
2699
+ access: open
2700
+ num_parameters: 7620000000
2701
+ release_date: 2024-10-28
2702
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
2703
+
2704
+
1984
2705
 
1985
2706
  # Neurips
1986
2707
  - name: neurips/local
@@ -2010,6 +2731,16 @@ models:
2010
2731
  release_date: 2024-06-17
2011
2732
  tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2012
2733
 
2734
+ - name: nvidia/llama-3.1-nemotron-70b-instruct
2735
+ display_name: Llama 3.1 Nemotron Instruct (70B)
2736
+ description: Llama-3.1-Nemotron-70B-Instruct is a large language model customized by NVIDIA to improve the helpfulness of LLM generated responses to user queries. It was trained using RLHF (specifically, REINFORCE), Llama-3.1-Nemotron-70B-Reward and HelpSteer2-Preference prompts on a Llama-3.1-70B-Instruct model. ([paper](https://arxiv.org/abs/2410.01257))
2737
+ creator_organization_name: NVIDIA
2738
+ access: open
2739
+ num_parameters: 70000000000
2740
+ release_date: 2024-10-02
2741
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2742
+
2743
+
2013
2744
  # OpenAI
2014
2745
 
2015
2746
  ## GPT 2 Models
@@ -2194,7 +2925,7 @@ models:
2194
2925
  tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2195
2926
 
2196
2927
 
2197
- ## GPT 4 Models
2928
+ ## GPT-4 and GPT-4 Turbo
2198
2929
 
2199
2930
  - name: openai/gpt-4-1106-preview
2200
2931
  display_name: GPT-4 Turbo (1106 preview)
@@ -2246,6 +2977,8 @@ models:
2246
2977
  release_date: 2024-01-25
2247
2978
  tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2248
2979
 
2980
+ ## GPT-4o
2981
+
2249
2982
  - name: openai/gpt-4-turbo-2024-04-09
2250
2983
  display_name: GPT-4 Turbo (2024-04-09)
2251
2984
  description: GPT-4 Turbo (2024-04-09) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Snapshot from 2024-04-09.
@@ -2270,6 +3003,14 @@ models:
2270
3003
  release_date: 2024-08-06
2271
3004
  tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2272
3005
 
3006
+ - name: openai/gpt-4o-2024-11-20
3007
+ display_name: GPT-4o (2024-11-20)
3008
+ description: GPT-4o (2024-11-20) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/introducing-structured-outputs-in-the-api/))
3009
+ creator_organization_name: OpenAI
3010
+ access: limited
3011
+ release_date: 2024-11-20
3012
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3013
+
2273
3014
  - name: openai/gpt-4o-mini-2024-07-18
2274
3015
  display_name: GPT-4o mini (2024-07-18)
2275
3016
  description: GPT-4o mini (2024-07-18) is a multimodal model with a context window of 128K tokens and improved handling of non-English text. ([blog](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/))
@@ -2278,6 +3019,80 @@ models:
2278
3019
  release_date: 2024-07-18
2279
3020
  tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2280
3021
 
3022
+ - name: openai/gpt-4.1-2025-04-14
3023
+ display_name: GPT-4.1 (2025-04-14)
3024
+ description: GPT-4.1 (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
3025
+ creator_organization_name: OpenAI
3026
+ access: limited
3027
+ release_date: 2025-04-14
3028
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3029
+
3030
+ - name: openai/gpt-4.1-mini-2025-04-14
3031
+ display_name: GPT-4.1 mini (2025-04-14)
3032
+ description: GPT-4.1 mini (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
3033
+ creator_organization_name: OpenAI
3034
+ access: limited
3035
+ release_date: 2025-04-14
3036
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3037
+
3038
+ - name: openai/gpt-4.1-nano-2025-04-14
3039
+ display_name: GPT-4.1 nano (2025-04-14)
3040
+ description: GPT-4.1 nano (2025-04-14) is a multimdodal model in the GPT-4.1 family, which outperforms the GPT-4o family, with major gains in coding and instruction following. They also have larger context windows of 1 million tokens and are able to better use that context with improved long-context comprehension. ([blog](https://openai.com/index/gpt-4-1/))
3041
+ creator_organization_name: OpenAI
3042
+ access: limited
3043
+ release_date: 2025-04-14
3044
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3045
+
3046
+ - name: openai/whisper-1_gpt-4o-2024-11-20
3047
+ display_name: Whisper-1 + GPT-4o (2024-11-20)
3048
+ description: Transcribes the text with Whisper-1 and then uses GPT-4o to generate a response.
3049
+ creator_organization_name: OpenAI
3050
+ access: limited
3051
+ release_date: 2024-11-20
3052
+ tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
3053
+
3054
+ - name: openai/gpt-4o-transcribe_gpt-4o-2024-11-20
3055
+ display_name: GPT-4o Transcribe + GPT-4o (2024-11-20)
3056
+ description: Transcribes the text with GPT-4o Transcribe and then uses GPT-4o to generate a response.
3057
+ creator_organization_name: OpenAI
3058
+ access: limited
3059
+ release_date: 2025-03-20
3060
+ tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
3061
+
3062
+ - name: openai/gpt-4o-mini-transcribe_gpt-4o-2024-11-20
3063
+ display_name: GPT-4o mini Transcribe + GPT-4o (2024-11-20)
3064
+ description: Transcribes the text with GPT-4o mini Transcribe and then uses GPT-4o to generate a response.
3065
+ creator_organization_name: OpenAI
3066
+ access: limited
3067
+ release_date: 2025-03-20
3068
+ tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
3069
+
3070
+ - name: openai/gpt-4o-audio-preview-2024-10-01
3071
+ display_name: GPT-4o Audio (Preview 2024-10-01)
3072
+ description: GPT-4o Audio (Preview 2024-10-01) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
3073
+ creator_organization_name: OpenAI
3074
+ access: limited
3075
+ release_date: 2024-10-01
3076
+ tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3077
+
3078
+ - name: openai/gpt-4o-audio-preview-2024-12-17
3079
+ display_name: GPT-4o Audio (Preview 2024-12-17)
3080
+ description: GPT-4o Audio (Preview 2024-12-17) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
3081
+ creator_organization_name: OpenAI
3082
+ access: limited
3083
+ release_date: 2024-12-17
3084
+ tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3085
+
3086
+ - name: openai/gpt-4o-mini-audio-preview-2024-12-17
3087
+ display_name: GPT-4o mini Audio (Preview 2024-12-17)
3088
+ description: GPT-4o mini Audio (Preview 2024-12-17) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
3089
+ creator_organization_name: OpenAI
3090
+ access: limited
3091
+ release_date: 2024-12-17
3092
+ tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3093
+
3094
+ # GPT-4V
3095
+
2281
3096
  - name: openai/gpt-4-vision-preview
2282
3097
  # According to https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4, this model has pointed gpt-4-1106-vision-preview.
2283
3098
  display_name: GPT-4V (1106 preview)
@@ -2295,7 +3110,64 @@ models:
2295
3110
  release_date: 2023-11-06
2296
3111
  tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
2297
3112
 
3113
+ ## GPT-4.5
3114
+ - name: openai/gpt-4.5-preview-2025-02-27
3115
+ display_name: GPT-4.5 (2025-02-27 preview)
3116
+ description: GPT-4.5 (2025-02-27 preview) is a large multimodal model that is designed to be more general-purpose than OpenAI's STEM-focused reasoning models. It was trained using new supervision techniques combined with traditional methods like supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF). ([blog](https://openai.com/index/introducing-gpt-4-5/), [system card](https://openai.com/index/gpt-4-5-system-card/))
3117
+ creator_organization_name: OpenAI
3118
+ access: limited
3119
+ release_date: 2025-02-27
3120
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3121
+
2298
3122
  ## o1 Models
3123
+ - name: openai/o1-pro-2025-03-19
3124
+ display_name: o1 pro (2025-03-19)
3125
+ description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
3126
+ creator_organization_name: OpenAI
3127
+ access: limited
3128
+ release_date: 2025-03-19
3129
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3130
+
3131
+ - name: openai/o1-pro-2025-03-19-low-reasoning-effort
3132
+ display_name: o1 pro (2025-03-19, low reasoning effort)
3133
+ description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to low.
3134
+ creator_organization_name: OpenAI
3135
+ access: limited
3136
+ release_date: 2025-03-19
3137
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3138
+
3139
+ - name: openai/o1-pro-2025-03-19-high-reasoning-effort
3140
+ display_name: o1 pro (2025-03-19, high reasoning effort)
3141
+ description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to high.
3142
+ creator_organization_name: OpenAI
3143
+ access: limited
3144
+ release_date: 2025-03-19
3145
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3146
+
3147
+ - name: openai/o1-2024-12-17
3148
+ display_name: o1 (2024-12-17)
3149
+ description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
3150
+ creator_organization_name: OpenAI
3151
+ access: limited
3152
+ release_date: 2024-12-17
3153
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3154
+
3155
+ - name: openai/o1-2024-12-17-low-reasoning-effort
3156
+ display_name: o1 (2024-12-17, low reasoning effort)
3157
+ description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to low.
3158
+ creator_organization_name: OpenAI
3159
+ access: limited
3160
+ release_date: 2024-12-17
3161
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3162
+
3163
+ - name: openai/o1-2024-12-17-high-reasoning-effort
3164
+ display_name: o1 (2024-12-17, high reasoning effort)
3165
+ description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to high.
3166
+ creator_organization_name: OpenAI
3167
+ access: limited
3168
+ release_date: 2024-12-17
3169
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3170
+
2299
3171
  - name: openai/o1-preview-2024-09-12
2300
3172
  display_name: o1-preview (2024-09-12)
2301
3173
  description: o1-preview is a language model trained with reinforcement learning to perform complex reasoning that can produce a long internal chain of thought before responding to the user. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
@@ -2312,6 +3184,78 @@ models:
2312
3184
  release_date: 2024-09-12
2313
3185
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2314
3186
 
3187
+ - name: openai/o3-mini-2025-01-31
3188
+ display_name: o3-mini (2025-01-31)
3189
+ description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/))
3190
+ creator_organization_name: OpenAI
3191
+ access: limited
3192
+ release_date: 2025-01-31
3193
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3194
+
3195
+ - name: openai/o3-mini-2025-01-31-low-reasoning-effort
3196
+ display_name: o3-mini (2025-01-31, low reasoning effort)
3197
+ description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/)) The requests' reasoning effort parameter in is set to low.
3198
+ creator_organization_name: OpenAI
3199
+ access: limited
3200
+ release_date: 2025-01-31
3201
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3202
+
3203
+ - name: openai/o3-mini-2025-01-31-high-reasoning-effort
3204
+ display_name: o3-mini (2025-01-31, high reasoning effort)
3205
+ description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/)) The requests' reasoning effort parameter in is set to high.
3206
+ creator_organization_name: OpenAI
3207
+ access: limited
3208
+ release_date: 2025-01-31
3209
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3210
+
3211
+ - name: openai/o3-2025-04-16
3212
+ display_name: o3 (2025-04-16)
3213
+ description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
3214
+ creator_organization_name: OpenAI
3215
+ access: limited
3216
+ release_date: 2025-04-16
3217
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3218
+
3219
+ - name: openai/o3-2025-04-16-low-reasoning-effort
3220
+ display_name: o3 (2025-04-16, low reasoning effort)
3221
+ description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
3222
+ creator_organization_name: OpenAI
3223
+ access: limited
3224
+ release_date: 2025-04-16
3225
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3226
+
3227
+ - name: openai/o3-2025-04-16-high-reasoning-effort
3228
+ display_name: o3 (2025-04-16, high reasoning effort)
3229
+ description: o3 is a reasoning model for math, science, coding, and visual reasoning tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
3230
+ creator_organization_name: OpenAI
3231
+ access: limited
3232
+ release_date: 2025-04-16
3233
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3234
+
3235
+ - name: openai/o4-mini-2025-04-16
3236
+ display_name: o4-mini (2025-04-16)
3237
+ description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
3238
+ creator_organization_name: OpenAI
3239
+ access: limited
3240
+ release_date: 2025-04-16
3241
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3242
+
3243
+ - name: openai/o4-mini-2025-04-16-low-reasoning-effort
3244
+ display_name: o4-mini (2025-04-16, low reasoning effort)
3245
+ description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
3246
+ creator_organization_name: OpenAI
3247
+ access: limited
3248
+ release_date: 2025-04-16
3249
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3250
+
3251
+ - name: openai/o4-mini-2025-04-16-high-reasoning-effort
3252
+ display_name: o4-mini (2025-04-16, high reasoning effort)
3253
+ description: o4-mini is an o-series model optimized for fast, effective reasoning with exceptionally efficient performance in coding and visual tasks. ([blog post](https://openai.com/index/introducing-o3-and-o4-mini/))
3254
+ creator_organization_name: OpenAI
3255
+ access: limited
3256
+ release_date: 2025-04-16
3257
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3258
+
2315
3259
  ## Codex Models
2316
3260
  # DEPRECATED: Codex models have been shut down on March 23 2023.
2317
3261
 
@@ -2556,6 +3500,47 @@ models:
2556
3500
  release_date: 2024-06-07
2557
3501
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2558
3502
 
3503
+ - name: qwen/qwen2.5-7b-instruct-turbo
3504
+ display_name: Qwen2.5 Instruct Turbo (7B)
3505
+ description: Qwen2.5 Instruct Turbo (7B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
3506
+ creator_organization_name: Qwen
3507
+ access: open
3508
+ release_date: 2024-09-19
3509
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3510
+
3511
+ - name: qwen/qwen2.5-7b-instruct
3512
+ display_name: Qwen2.5 Instruct (7B)
3513
+ description: Qwen2.5 Instruct (7B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
3514
+ creator_organization_name: Qwen
3515
+ access: open
3516
+ release_date: 2024-09-19
3517
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3518
+
3519
+ - name: qwen/qwen2.5-72b-instruct-turbo
3520
+ display_name: Qwen2.5 Instruct Turbo (72B)
3521
+ description: Qwen2.5 Instruct Turbo (72B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
3522
+ creator_organization_name: Qwen
3523
+ access: open
3524
+ release_date: 2024-09-19
3525
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3526
+
3527
+ - name: qwen/qwen3-235b-a22b-fp8-tput
3528
+ display_name: Qwen3 235B A22B FP8 Throughput
3529
+ description: Qwen3 235B A22B FP8 Throughput is a hybrid instruct and reasoning mixture-of-experts model ([blog](https://qwenlm.github.io/blog/qwen3/)).
3530
+ creator_organization_name: Qwen
3531
+ access: open
3532
+ release_date: 2025-04-29
3533
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3534
+
3535
+ - name: qwen/qwq-32b-preview
3536
+ display_name: QwQ (32B Preview)
3537
+ description: QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities. ([blog post](https://qwenlm.github.io/blog/qwq-32b-preview/)).
3538
+ creator_organization_name: Alibaba Cloud
3539
+ access: open
3540
+ num_parameters: 32800000000
3541
+ release_date: 2024-11-28
3542
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3543
+
2559
3544
  - name: qwen/qwen-vl
2560
3545
  display_name: Qwen-VL
2561
3546
  description: Visual multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2308.12966)).
@@ -2572,6 +3557,78 @@ models:
2572
3557
  release_date: 2023-08-24
2573
3558
  tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
2574
3559
 
3560
+ - name: qwen/qwen2-vl-7b-instruct
3561
+ display_name: Qwen2-VL Instruct (7B)
3562
+ description: The second generation of Qwen2-VL models ([paper](https://arxiv.org/abs/2409.12191)).
3563
+ creator_organization_name: Alibaba Group
3564
+ access: open
3565
+ release_date: 2024-08-29
3566
+ tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
3567
+
3568
+ - name: qwen/qwen2-vl-72b-instruct
3569
+ display_name: Qwen2-VL Instruct (72B)
3570
+ description: The second generation of Qwen2-VL models ([paper](https://arxiv.org/abs/2409.12191)).
3571
+ creator_organization_name: Alibaba Group
3572
+ access: open
3573
+ release_date: 2024-08-29
3574
+ tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
3575
+
3576
+ - name: qwen/qwen2.5-vl-3b-instruct
3577
+ display_name: Qwen2.5-VL Instruct (3B)
3578
+ description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
3579
+ creator_organization_name: Alibaba Group
3580
+ access: open
3581
+ release_date: 2025-01-26
3582
+ tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
3583
+
3584
+ - name: qwen/qwen2.5-vl-7b-instruct
3585
+ display_name: Qwen2.5-VL Instruct (7B)
3586
+ description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
3587
+ creator_organization_name: Alibaba Group
3588
+ access: open
3589
+ release_date: 2025-01-26
3590
+ tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
3591
+
3592
+ - name: qwen/qwen2.5-vl-32b-instruct
3593
+ display_name: Qwen2.5-VL Instruct (32B)
3594
+ description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
3595
+ creator_organization_name: Alibaba Group
3596
+ access: open
3597
+ release_date: 2025-01-26
3598
+ tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
3599
+
3600
+ - name: qwen/qwen2.5-vl-72b-instruct
3601
+ display_name: Qwen2.5-VL Instruct (72B)
3602
+ description: The second generation of Qwen2.5-VL models ([blog](https://qwenlm.github.io/blog/qwen2.5-vl/)).
3603
+ creator_organization_name: Alibaba Group
3604
+ access: open
3605
+ release_date: 2025-01-26
3606
+ tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
3607
+
3608
+ - name: qwen/qwen-audio-chat
3609
+ display_name: Qwen-Audio Chat
3610
+ description: Auditory multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2311.07919)).
3611
+ creator_organization_name: Alibaba Cloud
3612
+ access: open
3613
+ release_date: 2023-11-14
3614
+ tags: [AUDIO_LANGUAGE_MODEL_TAG]
3615
+
3616
+ - name: qwen/qwen2-audio-7b-instruct
3617
+ display_name: Qwen2-Audio Instruct (7B)
3618
+ description: The second version of auditory multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2407.10759)).
3619
+ creator_organization_name: Alibaba Cloud
3620
+ access: open
3621
+ release_date: 2024-07-15
3622
+ tags: [AUDIO_LANGUAGE_MODEL_TAG]
3623
+
3624
+ - name: qwen/qwen2.5-omni-7b
3625
+ display_name: Qwen2.5-Omni (7B)
3626
+ description: The new flagship end-to-end multimodal model in the Qwen series that can process inputs including text, images, audio, and video ([paper](https://arxiv.org/abs/2503.20215)).
3627
+ creator_organization_name: Alibaba Cloud
3628
+ access: open
3629
+ release_date: 2025-03-27
3630
+ tags: [AUDIO_LANGUAGE_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
3631
+
2575
3632
  # SAIL (Sea AI Lab)
2576
3633
  - name: sail/sailor-7b
2577
3634
  display_name: Sailor (7B)
@@ -2617,7 +3674,7 @@ models:
2617
3674
  access: open
2618
3675
  num_parameters: 16000000000
2619
3676
  release_date: 2022-03-25
2620
- tags: [] # TODO: add tags
3677
+ tags: [UNSUPPORTED_MODEL_TAG]
2621
3678
 
2622
3679
  # SambaNova
2623
3680
  - name: sambanova/sambalingo-thai-base
@@ -2769,8 +3826,6 @@ models:
2769
3826
  release_date: 2023-04-20
2770
3827
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
2771
3828
 
2772
-
2773
-
2774
3829
  # Stanford
2775
3830
  - name: stanford/alpaca-7b
2776
3831
  display_name: Alpaca (7B)
@@ -2866,7 +3921,7 @@ models:
2866
3921
  access: open
2867
3922
  num_parameters: 3000000000
2868
3923
  release_date: 2023-05-05
2869
- tafs: [] # TODO: add tags
3924
+ tags: [UNSUPPORTED_MODEL_TAG]
2870
3925
 
2871
3926
  - name: together/redpajama-incite-base-7b
2872
3927
  display_name: RedPajama-INCITE-Base (7B)
@@ -2917,9 +3972,27 @@ models:
2917
3972
  access: open
2918
3973
  num_parameters: 13000000000
2919
3974
  release_date: 2022-09-19
2920
- tags: [] # TODO: add tags
3975
+ tags: [UNSUPPORTED_MODEL_TAG]
2921
3976
 
3977
+ # Upstage
3978
+ - name: upstage/solar-pro-preview-instruct
3979
+ display_name: Solar Pro Preview (22B)
3980
+ description: Solar Pro Preview (22B) is open-weights model for single GPU inference that is a preview of the upcoming Solar Pro model ([blog](https://www.upstage.ai/products/solar-pro-preview)).
3981
+ creator_organization_name: Upstage
3982
+ access: open
3983
+ num_parameters: 22000000000
3984
+ release_date: 2024-09-11
3985
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2922
3986
 
3987
+ - name: upstage/solar-pro-241126
3988
+ display_name: Solar Pro
3989
+ display_name: Solar Pro
3990
+ description: Solar Pro is a LLM designed for instruction-following and processing structured formats like HTML and Markdown. It supports English, Korean, and Japanese and has domain expertise in Finance, Healthcare, and Legal. ([blog](https://www.upstage.ai/blog/press/solar-pro-aws)).
3991
+ creator_organization_name: Upstage
3992
+ access: limited
3993
+ num_parameters: 22000000000
3994
+ release_date: 2024-11-26
3995
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2923
3996
 
2924
3997
  # Writer
2925
3998
  - name: writer/palmyra-base
@@ -3030,6 +4103,14 @@ models:
3030
4103
  release_date: 2024-09-12
3031
4104
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3032
4105
 
4106
+ - name: writer/palmyra-x5
4107
+ display_name: Palmyra X5
4108
+ description: Palmyra X5 is a language model for enterprise that uses a Mixture of Experts (MoE) architecture and a hybrid attention mechanism that blends linear and softmax attention. ([blog](https://writer.com/engineering/long-context-palmyra-x5/))
4109
+ creator_organization_name: Writer
4110
+ access: limited
4111
+ release_date: 2024-04-28
4112
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4113
+
3033
4114
  - name: writer/palmyra-med-32k
3034
4115
  display_name: Palmyra-Med 32K (70B)
3035
4116
  description: Palmyra-Med 32K (70B) is a model finetuned from Palmyra-X-003 intended for medical applications.
@@ -3040,11 +4121,10 @@ models:
3040
4121
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3041
4122
 
3042
4123
  - name: writer/palmyra-med
3043
- display_name: Palmyra-Med (70B)
3044
- description: Palmyra-Med (70B) is a model finetuned from Palmyra-X-003 intended for medical applications.
4124
+ display_name: Palmyra Med
4125
+ description: Palmyra Med is a model intended for medical applications.
3045
4126
  creator_organization_name: Writer
3046
4127
  access: open
3047
- num_parameters: 70600000000
3048
4128
  release_date: 2024-07-31
3049
4129
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3050
4130
 
@@ -3057,6 +4137,32 @@ models:
3057
4137
  release_date: 2024-07-31
3058
4138
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3059
4139
 
4140
+ - name: writer/palmyra-fin
4141
+ display_name: Palmyra Fin
4142
+ description: Palmyra Fin is a financial LLM built using combining a well-curated set of financial training data with custom fine-tuning instruction data([blog](https://writer.com/blog/palmyra-med-fin-models/)).
4143
+ creator_organization_name: Writer
4144
+ access: limited
4145
+ release_date: 2024-07-31
4146
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4147
+
4148
+ # xAI
4149
+
4150
+ - name: xai/grok-3-beta
4151
+ display_name: Grok 3 Beta
4152
+ description: Grok 3 Beta is a model trained on xAI's Colossus supercluster with significant improvements in reasoning, mathematics, coding, world knowledge, and instruction-following tasks. ([blog](https://x.ai/news/grok-3))
4153
+ creator_organization_name: xAI
4154
+ access: limited
4155
+ release_date: 2025-04-03 # https://docs.x.ai/docs/release-notes#april-2025
4156
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4157
+
4158
+ - name: xai/grok-3-mini-beta
4159
+ display_name: Grok 3 mini Beta
4160
+ description: Grok 3 mini Beta is a model trained on xAI's Colossus supercluster with significant improvements in reasoning, mathematics, coding, world knowledge, and instruction-following tasks. ([blog](https://x.ai/news/grok-3))
4161
+ creator_organization_name: xAI
4162
+ access: limited
4163
+ release_date: 2025-04-03 # https://docs.x.ai/docs/release-notes#april-2025
4164
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4165
+
3060
4166
  # Yandex
3061
4167
  - name: yandex/yalm
3062
4168
  display_name: YaLM (100B)
@@ -3128,3 +4234,452 @@ models:
3128
4234
  release_date: 2024-04-18
3129
4235
  tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
3130
4236
 
4237
+ # Diva Llama
4238
+ - name: stanford/diva-llama
4239
+ display_name: Diva Llama 3 (8B)
4240
+ description: Diva Llama 3 is an end-to-end Voice Assistant Model which can handle speech and text as inputs. It was trained using distillation loss. ([paper](https://arxiv.org/abs/2410.02678))
4241
+ creator_organization_name: Stanford
4242
+ access: open
4243
+ num_parameters: 8000000000
4244
+ release_date: 2024-10-03
4245
+ tags: [AUDIO_LANGUAGE_MODEL_TAG]
4246
+
4247
+
4248
+ # LLaMA-Omni
4249
+ - name: ictnlp/llama-3.1-8b-omni
4250
+ display_name: LLaMA-Omni (8B)
4251
+ description: The audio-visual multimodal version of the LLaMA 3.1 model ([paper](https://arxiv.org/abs/2409.06666)).
4252
+ creator_organization_name: ICTNLP
4253
+ access: open
4254
+ num_parameters: 8000000000
4255
+ release_date: 2024-09-10
4256
+ tags: [AUDIO_LANGUAGE_MODEL_TAG]
4257
+
4258
+
4259
+ # Maritaca AI
4260
+ - name: maritaca-ai/sabia-7b
4261
+ display_name: Sabia 7B
4262
+ description: Sabia 7B
4263
+ creator_organization_name: MARITACA-AI
4264
+ access: open
4265
+ num_parameters: 6740000000
4266
+ release_date: 2023-11-08
4267
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4268
+
4269
+ # Granite - IBM
4270
+ # https://www.ibm.com/granite
4271
+ # https://github.com/ibm-granite/granite-3.0-language-models
4272
+
4273
+ - name: ibm-granite/granite-3.0-2b-base
4274
+ display_name: Granite 3.0 base (2B)
4275
+ description: Granite-3.0-2B-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
4276
+ creator_organization_name: IBM
4277
+ access: open
4278
+ num_parameters: 2530000000
4279
+ release: 2024-10-21
4280
+ tags: [TEXT_MODEL_TAG]
4281
+
4282
+ - name: ibm-granite/granite-3.0-2b-instruct
4283
+ display_name: Granite 3.0 Instruct (2B)
4284
+ description: Granite-3.0-2B-Instruct is a 2B parameter model finetuned from Granite-3.0-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
4285
+ creator_organization_name: IBM
4286
+ access: open
4287
+ num_parameters: 2630000000
4288
+ release: 2024-10-21
4289
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4290
+
4291
+ - name: ibm-granite/granite-3.0-8b-instruct
4292
+ display_name: Granite 3.0 instruct (8B)
4293
+ description: Granite-3.0-8B-Instruct is a 8B parameter model finetuned from Granite-3.0-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
4294
+ creator_organization_name: IBM
4295
+ access: open
4296
+ num_parameters: 8170000000
4297
+ release: 2024-10-21
4298
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4299
+
4300
+ - name: ibm-granite/granite-3.0-8b-base
4301
+ display_name: Granite 3.0 base (8B)
4302
+ description: Granite-3.0-8B-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
4303
+ creator_organization_name: IBM
4304
+ access: open
4305
+ num_parameters: 8170000000
4306
+ release: 2024-10-21
4307
+ tags: [TEXT_MODEL_TAG]
4308
+
4309
+ - name: ibm-granite/granite-3.0-3b-a800m-instruct
4310
+ display_name: Granite 3.0 A800M instruct (3B)
4311
+ description: Granite-3.0-3B-A800M-Instruct is a 3B parameter model finetuned from Granite-3.0-3B-A800M-Base-4K using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
4312
+ creator_organization_name: IBM
4313
+ access: open
4314
+ num_parameters: 3370000000
4315
+ release: 2024-10-21
4316
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4317
+
4318
+ - name: ibm-granite/granite-3.0-3b-a800m-base
4319
+ display_name: Granite 3.0 A800M base (3B)
4320
+ description: Granite-3.0-3B-A800M-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
4321
+ creator_organization_name: IBM
4322
+ access: open
4323
+ num_parameters: 3370000000
4324
+ release: 2024-10-21
4325
+ tags: [TEXT_MODEL_TAG]
4326
+
4327
+ - name: ibm-granite/granite-3.0-1b-a400m-instruct
4328
+ display_name: Granite 3.0 A400M instruct (1B)
4329
+ description: Granite-3.0-1B-A400M-Instruct is an 1B parameter model finetuned from Granite-3.0-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
4330
+ creator_organization_name: IBM
4331
+ access: open
4332
+ num_parameters: 1330000000
4333
+ release: 2024-10-21
4334
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4335
+
4336
+ - name: ibm-granite/granite-3.0-1b-a400m-base
4337
+ display_name: Granite 3.0 A400M base (1B)
4338
+ description: Granite-3.0-1B-A400M-Base is a decoder-only language model to support a variety of text-to-text generation tasks. It is trained from scratch following a two-stage training strategy.
4339
+ creator_organization_name: IBM
4340
+ access: open
4341
+ num_parameters: 1380000000
4342
+ release: 2024-10-21
4343
+ tags: [TEXT_MODEL_TAG]
4344
+
4345
+ - name: ibm-granite/granite-3.1-8b-base
4346
+ display_name: Granite 3.1 - 8B - Base
4347
+ description: Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
4348
+ creator_organization_name: IBM-GRANITE
4349
+ access: open
4350
+ num_parameters: 8170000000
4351
+ release_date: 2024-12-18
4352
+ tags: [TEXT_MODEL_TAG]
4353
+
4354
+ - name: ibm-granite/granite-3.1-8b-instruct
4355
+ display_name: Granite 3.1 - 8B - Instruct
4356
+ description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
4357
+ creator_organization_name: IBM
4358
+ access: open
4359
+ num_parameters: 8170000000
4360
+ release_date: 2024-12-18
4361
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4362
+
4363
+ - name: ibm-granite/granite-3.1-2b-instruct
4364
+ display_name: Granite 3.1 - 2B - Instruct
4365
+ description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
4366
+ creator_organization_name: IBM
4367
+ access: open
4368
+ num_parameters: 2530000000
4369
+ release_date: 2024-12-18
4370
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4371
+
4372
+ - name: ibm-granite/granite-3.1-2b-base
4373
+ display_name: Granite 3.1 - 2B - Base
4374
+ description: Granite-3.1-2B-Base extends the context length of Granite-3.0-2B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
4375
+ creator_organization_name: IBM-GRANITE
4376
+ access: open
4377
+ num_parameters: 2530000000
4378
+ release_date: 2024-12-18
4379
+ tags: [TEXT_MODEL_TAG]
4380
+
4381
+ - name: ibm-granite/granite-3.1-3b-a800m-instruct
4382
+ display_name: Granite 3.1 - 3B - A800M - Instruct
4383
+ description: Granite-3.1-3B-A800M-Instruct is a 3B parameter long-context instruct model finetuned from Granite-3.1-3B-A800M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
4384
+ creator_organization_name: IBM-GRANITE
4385
+ access: open
4386
+ num_parameters: 3300000000
4387
+ release_date: 2024-12-18
4388
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4389
+
4390
+ - name: ibm-granite/granite-3.1-3b-a800m-base
4391
+ display_name: Granite 3.1 - 3B - A800M - Base
4392
+ description: Granite-3.1-3B-A800M-Base extends the context length of Granite-3.0-3B-A800M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
4393
+ creator_organization_name: IBM-GRANITE
4394
+ access: open
4395
+ num_parameters: 3300000000
4396
+ release_date: 2024-12-18
4397
+ tags: [TEXT_MODEL_TAG]
4398
+
4399
+ - name: ibm-granite/granite-3.1-1b-a400m-instruct
4400
+ display_name: Granite 3.1 - 1B - A400M - Instruct
4401
+ description: Granite-3.1-1B-A400M-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
4402
+ creator_organization_name: IBM-GRANITE
4403
+ access: open
4404
+ num_parameters: 1330000000
4405
+ release_date: 2024-12-18
4406
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4407
+
4408
+ - name: ibm-granite/granite-3.1-1b-a400m-base
4409
+ display_name: Granite 3.1 - 1B - A400M - Base
4410
+ description: Granite-3.1-1B-A400M-Base extends the context length of Granite-3.0-1B-A400M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
4411
+ creator_organization_name: IBM-GRANITE
4412
+ access: open
4413
+ num_parameters: 1330000000
4414
+ release_date: 2024-12-18
4415
+ tags: [TEXT_MODEL_TAG]
4416
+
4417
+ - name: ibm/granite-13b-instruct-v2
4418
+ display_name: Granite 13b instruct v2
4419
+ description: Granite Base (13B) Instruct V2.0 is a large decoder-only transformer model.The following features were used in the design of the model Decoder-only model
4420
+ creator_organization_name: IBM
4421
+ access: limited
4422
+ num_parameters: 13000000000
4423
+ release: 2023-11-30
4424
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4425
+
4426
+ - name: ibm/granite-20b-code-instruct-8k
4427
+ display_name: Granite 20b code instruct (8K)
4428
+ description: Granite-20B-Code-Base-8K is a decoder-only code model designed for code generative tasks (e.g., code generation, code explanation, code fixing, etc.). It is trained from scratch with a two-phase training strategy. In phase 1, our model is trained on 3 trillion tokens sourced from 116 programming languages, ensuring a comprehensive understanding of programming languages and syntax. In phase 2, our model is trained on 500 billion tokens with a carefully designed mixture of high-quality data from code and natural language domains to improve the models’ ability to reason and follow instructions.
4429
+ creator_organization_name: IBM
4430
+ access: limited
4431
+ num_parameters: 20000000000
4432
+ release: 2024-18-4
4433
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4434
+
4435
+ - name: ibm/granite-34b-code-instruct
4436
+ display_name: Granite 34b code instruct
4437
+ description: Granite Base (34B) Code Instruct is a 34B parameter model fine tuned from Granite-34B-Code-Base on a combination of permissively licensed instruction data to enhance instruction following capabilities including logical reasoning and problem-solving skills.
4438
+ creator_organization_name: IBM
4439
+ access: open
4440
+ num_parameters: 34000000000
4441
+ release: 2024-6-5
4442
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4443
+
4444
+
4445
+ - name: ibm/granite-3b-code-instruct
4446
+ display_name: Granite 3b code instruct
4447
+ description: Granite-3B-Code-Instruct-128K is a 3B parameter long-context instruct model fine tuned from Granite-3B-Code-Base-128K on a combination of permissively licensed data used in training the original Granite code instruct models, in addition to synthetically generated code instruction datasets tailored for solving long context problems. By exposing the model to both short and long context data, we aim to enhance its long-context capability without sacrificing code generation performance at short input context.
4448
+ creator_organization_name: IBM
4449
+ access: open
4450
+ num_parameters: 3000000000
4451
+ release: 2024-6-18
4452
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4453
+
4454
+ - name: ibm/granite-8b-code-instruct
4455
+ display_name: Granite 8b code instruct
4456
+ description: Granite-8B-Code-Instruct-128K is a 8B parameter long-context instruct model fine tuned from Granite-8B-Code-Base-128K on a combination of permissively licensed data used in training the original Granite code instruct models, in addition to synthetically generated code instruction datasets tailored for solving long context problems. By exposing the model to both short and long context data, we aim to enhance its long-context capability without sacrificing code generation performance at short input context.
4457
+ creator_organization_name: IBM
4458
+ access: open
4459
+ num_parameters: 8000000000
4460
+ release: 2024-6-18
4461
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4462
+
4463
+ - name: ibm/granite-3.1-8b-instruct
4464
+ display_name: Granite 3.1 - 8B - Instruct
4465
+ description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
4466
+ creator_organization_name: IBM
4467
+ access: open
4468
+ num_parameters: 8170000000
4469
+ release_date: 2024-12-18
4470
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4471
+
4472
+ - name: ibm/granite-3.1-2b-instruct
4473
+ display_name: Granite 3.1 - 2B - Instruct
4474
+ description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
4475
+ creator_organization_name: IBM
4476
+ access: open
4477
+ num_parameters: 2530000000
4478
+ release_date: 2024-12-18
4479
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4480
+
4481
+ - name: ibm/granite-3.3-8b-instruct
4482
+ display_name: Granite 3.3 8B Instruct
4483
+ description: Granite 3.3 8B Instruct is a 8-billion parameter 128K context length language model fine-tuned for improved reasoning and instruction-following capabilities. ([model card](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct))
4484
+ creator_organization_name: IBM
4485
+ access: open
4486
+ num_parameters: 8170000000
4487
+ release_date: 2025-04-16
4488
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4489
+
4490
+ - name: mistralai/mixtral-8x7b-instruct-v0:1
4491
+ display_name: Mixtral 8x7B Instruct on IBM WatsonX
4492
+ description: A 7B sparse Mixture-of-Experts model with stronger capabilities than Mistral 7B. Uses 12B active parameters out of 45B total. Supports multiple languages, code and 32k context window.
4493
+ creator_organization_name: Mistral
4494
+ access: limited
4495
+ release_date: 2023-12-11
4496
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4497
+
4498
+ - name: ura-hcmut/ura-llama-2.1-8b
4499
+ display_name: URA-Llama 2.1 (8B)
4500
+ description: URA-Llama 2.1 (8B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4501
+ creator_organization_name: URA
4502
+ access: open
4503
+ num_parameters: 8000000000
4504
+ release_date: 2024-08-04
4505
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4506
+
4507
+ - name: ura-hcmut/ura-llama-2-8b
4508
+ display_name: URA-Llama 2 (8B)
4509
+ description: URA-Llama 2 (8B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4510
+ creator_organization_name: URA
4511
+ access: open
4512
+ num_parameters: 8000000000
4513
+ release_date: 2024-08-04
4514
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4515
+
4516
+ - name: ura-hcmut/ura-llama-7b
4517
+ display_name: URA-Llama 7B (7B)
4518
+ description: URA-Llama 7B (7B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4519
+ creator_organization_name: URA
4520
+ access: open
4521
+ num_parameters: 7000000000
4522
+ release_date: 2023-10-10
4523
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4524
+
4525
+ - name: ura-hcmut/ura-llama-13b
4526
+ display_name: URA-Llama 13B (13B)
4527
+ description: URA-Llama 13B (13B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4528
+ creator_organization_name: URA
4529
+ access: open
4530
+ num_parameters: 13000000000
4531
+ release_date: 2023-10-10
4532
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4533
+
4534
+ - name: ura-hcmut/ura-llama-70b
4535
+ display_name: URA-Llama 70B (70B)
4536
+ description: URA-Llama 70B (70B) is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4537
+ creator_organization_name: URA
4538
+ access: open
4539
+ num_parameters: 70000000000
4540
+ release_date: 2023-10-10
4541
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4542
+
4543
+ - name: ura-hcmut/GemSUra-7B
4544
+ display_name: GemSUra 7B
4545
+ description: GemSUra 7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4546
+ creator_organization_name: URA
4547
+ access: open
4548
+ num_parameters: 7000000000
4549
+ release_date: 2024-03-12
4550
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4551
+
4552
+ - name: ura-hcmut/GemSUra-2B
4553
+ display_name: GemSUra 2B
4554
+ description: GemSUra 2B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4555
+ creator_organization_name: URA
4556
+ access: open
4557
+ num_parameters: 2000000000
4558
+ release_date: 2024-03-12
4559
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4560
+
4561
+ - name: ura-hcmut/MixSUra
4562
+ display_name: MixSUra
4563
+ description: MixSUra is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text. It is a mixture of experts model with 8 active experts.
4564
+ creator_organization_name: URA
4565
+ access: open
4566
+ num_parameters: 46700000000
4567
+ release_date: 2024-03-12
4568
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4569
+
4570
+ - name: vilm/vinallama-7b-chat
4571
+ display_name: VinaLLaMa
4572
+ description: VinaLLaMa is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4573
+ creator_organization_name: ViLM
4574
+ access: open
4575
+ num_parameters: 7000000000
4576
+ release_date: 2024-03-12
4577
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4578
+
4579
+ - name: vilm/vinallama-2.7b-chat
4580
+ display_name: VinaLLaMa 2.7B
4581
+ description: VinaLLaMa 2.7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4582
+ creator_organization_name: ViLM
4583
+ access: open
4584
+ num_parameters: 2700000000
4585
+ release_date: 2024-03-12
4586
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4587
+
4588
+ - name: vilm/vietcuna-7b-v3
4589
+ display_name: VietCuna 7B (v3)
4590
+ description: VietCuna 7B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4591
+ creator_organization_name: ViLM
4592
+ access: open
4593
+ num_parameters: 7000000000
4594
+ release_date: 2023-08-07
4595
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4596
+
4597
+ - name: vilm/vietcuna-3b-v2
4598
+ display_name: VietCuna 3B (v2)
4599
+ description: VietCuna 3B is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4600
+ creator_organization_name: ViLM
4601
+ access: open
4602
+ num_parameters: 3000000000
4603
+ release_date: 2023-07-15
4604
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4605
+
4606
+ - name: vilm/Quyen-v0.1
4607
+ display_name: Quyen (v0.1)
4608
+ description: Quyen is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4609
+ creator_organization_name: ViLM
4610
+ access: open
4611
+ num_parameters: 4000000000
4612
+ release_date: 2024-02-26
4613
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4614
+
4615
+ - name: vilm/Quyen-Plus-v0.1
4616
+ display_name: Quyen Plus (v0.1)
4617
+ description: Quyen Plus is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4618
+ creator_organization_name: ViLM
4619
+ access: open
4620
+ num_parameters: 7000000000
4621
+ release_date: 2024-02-26
4622
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4623
+
4624
+ - name: vilm/Quyen-Pro-v0.1
4625
+ display_name: Quyen Pro (v0.1)
4626
+ description: Quyen Pro is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4627
+ creator_organization_name: ViLM
4628
+ access: open
4629
+ num_parameters: 14000000000
4630
+ release_date: 2024-02-26
4631
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4632
+
4633
+ - name: vilm/Quyen-Pro-Max-v0.1
4634
+ display_name: Quyen Pro Max (v0.1)
4635
+ description: Quyen Pro Max is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4636
+ creator_organization_name: ViLM
4637
+ access: open
4638
+ num_parameters: 72000000000
4639
+ release_date: 2024-02-26
4640
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4641
+
4642
+ - name: vilm/Quyen-Mini-v0.1
4643
+ display_name: Quyen Mini (v0.1)
4644
+ description: Quyen Mini is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4645
+ creator_organization_name: ViLM
4646
+ access: open
4647
+ num_parameters: 1800000000
4648
+ release_date: 2024-02-26
4649
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4650
+
4651
+ - name: vilm/Quyen-SE-v0.1
4652
+ display_name: Quyen SE (v0.1)
4653
+ description: Quyen SE is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4654
+ creator_organization_name: ViLM
4655
+ access: open
4656
+ num_parameters: 500000000
4657
+ release_date: 2024-02-26
4658
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4659
+
4660
+ - name: Viet-Mistral/Vistral-7B-Chat
4661
+ display_name: Vistral 7B Chat
4662
+ description: Vistral 7B Chat is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4663
+ creator_organization_name: Viet-Mistral
4664
+ access: open
4665
+ num_parameters: 7000000000
4666
+ release_date: 2024-02-28
4667
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4668
+
4669
+ - name: vinai/PhoGPT-7B5-Instruct
4670
+ display_name: PhoGPT 7B5 Instruct
4671
+ description: PhoGPT 7B5 Instruct is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4672
+ creator_organization_name: VinAI
4673
+ access: open
4674
+ num_parameters: 7500000000
4675
+ release_date: 2024-02-19
4676
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
4677
+
4678
+ - name: vinai/PhoGPT-4B-Chat
4679
+ display_name: PhoGPT 4B Chat
4680
+ description: PhoGPT 4B Chat is a model trained on a large corpus of Vietnamese text data, including books, articles, and websites. It is designed to understand and generate Vietnamese text.
4681
+ creator_organization_name: VinAI
4682
+ access: open
4683
+ num_parameters: 4000000000
4684
+ release_date: 2024-04-02
4685
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]