crfm-helm 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (580) hide show
  1. crfm_helm-0.5.5.dist-info/METADATA +413 -0
  2. crfm_helm-0.5.5.dist-info/RECORD +894 -0
  3. {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.5.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapter_spec.py +13 -1
  5. helm/benchmark/adaptation/adapters/adapter_factory.py +15 -1
  6. helm/benchmark/adaptation/adapters/binary_ranking_adapter.py +1 -1
  7. helm/benchmark/adaptation/adapters/chat_adapter.py +49 -0
  8. helm/benchmark/adaptation/adapters/ehr_instruction_adapter.py +108 -0
  9. helm/benchmark/adaptation/adapters/generation_adapter.py +1 -1
  10. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +1 -1
  11. helm/benchmark/adaptation/adapters/language_modeling_adapter.py +1 -1
  12. helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py +4 -2
  13. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -1
  14. helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py +1 -1
  15. helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py +4 -2
  16. helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +1 -1
  17. helm/benchmark/adaptation/adapters/multiple_choice_calibrated_adapter.py +1 -1
  18. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +2 -2
  19. helm/benchmark/adaptation/adapters/multiple_choice_joint_chain_of_thought_adapter.py +87 -0
  20. helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py +1 -1
  21. helm/benchmark/adaptation/adapters/test_generation_adapter.py +3 -3
  22. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +2 -2
  23. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +2 -2
  24. helm/benchmark/adaptation/common_adapter_specs.py +69 -4
  25. helm/benchmark/adaptation/prompt.py +1 -1
  26. helm/benchmark/annotation/aci_bench_annotator.py +95 -0
  27. helm/benchmark/annotation/air_bench_annotator.py +20 -5
  28. helm/benchmark/annotation/annotator.py +5 -0
  29. helm/benchmark/annotation/annotator_factory.py +3 -20
  30. helm/benchmark/annotation/autobencher_capabilities_annotator.py +107 -0
  31. helm/benchmark/annotation/autobencher_safety_annotator.py +98 -0
  32. helm/benchmark/annotation/bigcodebench_annotator.py +108 -0
  33. helm/benchmark/annotation/bird_sql_annotator.py +58 -0
  34. helm/benchmark/annotation/chw_care_plan_annotator.py +98 -0
  35. helm/benchmark/annotation/czech_bank_qa_annotator.py +78 -0
  36. helm/benchmark/annotation/dischargeme_annotator.py +107 -0
  37. helm/benchmark/annotation/ehr_sql_annotator.py +87 -0
  38. helm/benchmark/annotation/helpdesk_call_summarization_annotator.py +131 -0
  39. helm/benchmark/annotation/image2struct/image_compiler_annotator.py +6 -1
  40. helm/benchmark/annotation/live_qa_annotator.py +1 -1
  41. helm/benchmark/annotation/med_dialog_annotator.py +99 -0
  42. helm/benchmark/annotation/medalign_annotator.py +100 -0
  43. helm/benchmark/annotation/medi_qa_annotator.py +98 -0
  44. helm/benchmark/annotation/medication_qa_annotator.py +87 -63
  45. helm/benchmark/annotation/mental_health_annotator.py +98 -0
  46. helm/benchmark/annotation/mimic_rrs_annotator.py +100 -0
  47. helm/benchmark/annotation/model_as_judge.py +218 -6
  48. helm/benchmark/annotation/mtsamples_procedures_annotator.py +98 -0
  49. helm/benchmark/annotation/mtsamples_replicate_annotator.py +101 -0
  50. helm/benchmark/annotation/omni_math/gpt_evaluation_template.txt +152 -0
  51. helm/benchmark/annotation/omni_math/gpt_evaluation_zero_shot_template.txt +36 -0
  52. helm/benchmark/annotation/omni_math_annotator.py +132 -0
  53. helm/benchmark/annotation/spider_annotator.py +18 -0
  54. helm/benchmark/annotation/starr_patient_instructions_annotator.py +98 -0
  55. helm/benchmark/annotation/wildbench/eval_template.pairwise.v2.md +75 -0
  56. helm/benchmark/annotation/wildbench/eval_template.score.v2.md +66 -0
  57. helm/benchmark/annotation/wildbench_annotator.py +119 -0
  58. helm/benchmark/annotation_executor.py +35 -15
  59. helm/benchmark/augmentations/cleva_perturbation.py +9 -8
  60. helm/benchmark/augmentations/contraction_expansion_perturbation.py +2 -2
  61. helm/benchmark/augmentations/contrast_sets_perturbation.py +2 -2
  62. helm/benchmark/augmentations/dialect_perturbation.py +4 -5
  63. helm/benchmark/augmentations/extra_space_perturbation.py +2 -2
  64. helm/benchmark/augmentations/filler_words_perturbation.py +2 -2
  65. helm/benchmark/augmentations/gender_perturbation.py +2 -2
  66. helm/benchmark/augmentations/lowercase_perturbation.py +2 -2
  67. helm/benchmark/augmentations/mild_mix_perturbation.py +6 -6
  68. helm/benchmark/augmentations/misspelling_perturbation.py +2 -2
  69. helm/benchmark/augmentations/person_name_perturbation.py +4 -5
  70. helm/benchmark/augmentations/perturbation.py +1 -1
  71. helm/benchmark/augmentations/space_perturbation.py +2 -2
  72. helm/benchmark/augmentations/suffix_perturbation.py +2 -2
  73. helm/benchmark/augmentations/synonym_perturbation.py +4 -3
  74. helm/benchmark/augmentations/test_perturbation.py +16 -13
  75. helm/benchmark/augmentations/translate_perturbation.py +2 -2
  76. helm/benchmark/augmentations/typos_perturbation.py +2 -2
  77. helm/benchmark/data_preprocessor.py +2 -2
  78. helm/benchmark/huggingface_registration.py +2 -7
  79. helm/benchmark/metrics/aci_bench_metrics.py +34 -0
  80. helm/benchmark/metrics/basic_metrics.py +6 -6
  81. helm/benchmark/metrics/bbq_metrics.py +2 -2
  82. helm/benchmark/metrics/bias_metrics.py +12 -3
  83. helm/benchmark/metrics/bigcodebench_metrics.py +25 -0
  84. helm/benchmark/metrics/bird_sql_metrics.py +28 -0
  85. helm/benchmark/metrics/chw_care_plan_metrics.py +34 -0
  86. helm/benchmark/metrics/classification_metrics.py +76 -12
  87. helm/benchmark/metrics/cleva_harms_metrics.py +8 -7
  88. helm/benchmark/metrics/code_metrics.py +5 -5
  89. helm/benchmark/metrics/comet_metric.py +125 -0
  90. helm/benchmark/metrics/common_metric_specs.py +9 -2
  91. helm/benchmark/metrics/conv_fin_qa_calc_metrics.py +72 -0
  92. helm/benchmark/metrics/copyright_metrics.py +4 -4
  93. helm/benchmark/metrics/czech_bank_qa_metrics.py +29 -0
  94. helm/benchmark/metrics/decodingtrust_fairness_metrics.py +2 -2
  95. helm/benchmark/metrics/decodingtrust_privacy_metrics.py +2 -2
  96. helm/benchmark/metrics/decodingtrust_stereotype_bias_metrics.py +2 -2
  97. helm/benchmark/metrics/dischargeme_metrics.py +34 -0
  98. helm/benchmark/metrics/disinformation_metrics.py +4 -4
  99. helm/benchmark/metrics/dry_run_metrics.py +5 -5
  100. helm/benchmark/metrics/efficiency_metrics.py +3 -3
  101. helm/benchmark/metrics/ehr_sql_metrics.py +103 -0
  102. helm/benchmark/metrics/evaluate_instances_metric.py +3 -3
  103. helm/benchmark/metrics/evaluate_reference_metrics.py +144 -16
  104. helm/benchmark/metrics/gpqa_chain_of_thought_metric.py +103 -0
  105. helm/benchmark/metrics/gpt4_audio_critique_metrics.py +167 -0
  106. helm/benchmark/metrics/helpdesk_call_summarization_metrics.py +36 -0
  107. helm/benchmark/metrics/ifeval/__init__.py +0 -0
  108. helm/benchmark/metrics/ifeval/instructions.py +1574 -0
  109. helm/benchmark/metrics/ifeval/instructions_registry.py +182 -0
  110. helm/benchmark/metrics/ifeval/instructions_registry.pyi +3 -0
  111. helm/benchmark/metrics/ifeval/instructions_util.py +153 -0
  112. helm/benchmark/metrics/ifeval_metrics.py +55 -0
  113. helm/benchmark/metrics/image_generation/aesthetics_metrics.py +1 -1
  114. helm/benchmark/metrics/image_generation/detection_metrics.py +1 -1
  115. helm/benchmark/metrics/image_generation/detectors/vitdet.py +1 -1
  116. helm/benchmark/metrics/image_generation/fractal_dimension/test_fractal_dimension_util.py +1 -1
  117. helm/benchmark/metrics/image_generation/fractal_dimension_metric.py +1 -1
  118. helm/benchmark/metrics/image_generation/nsfw_metrics.py +1 -1
  119. helm/benchmark/metrics/image_generation/q16/test_q16.py +3 -1
  120. helm/benchmark/metrics/image_generation/q16_toxicity_metrics.py +1 -1
  121. helm/benchmark/metrics/image_generation/skin_tone_metrics.py +2 -2
  122. helm/benchmark/metrics/image_generation/watermark/test_watermark_detector.py +1 -1
  123. helm/benchmark/metrics/image_generation/watermark_metrics.py +1 -1
  124. helm/benchmark/metrics/instruction_following_critique_metrics.py +4 -4
  125. helm/benchmark/metrics/language_modeling_metrics.py +4 -4
  126. helm/benchmark/metrics/machine_translation_metrics.py +2 -2
  127. helm/benchmark/metrics/med_dialog_metrics.py +34 -0
  128. helm/benchmark/metrics/medalign_metrics.py +34 -0
  129. helm/benchmark/metrics/medcalc_bench_metrics.py +124 -0
  130. helm/benchmark/metrics/medec_metrics.py +101 -0
  131. helm/benchmark/metrics/medi_qa_metrics.py +34 -0
  132. helm/benchmark/metrics/medication_qa_metrics.py +15 -4
  133. helm/benchmark/metrics/mental_health_metrics.py +34 -0
  134. helm/benchmark/metrics/metric.py +3 -3
  135. helm/benchmark/metrics/mimic_rrs_metrics.py +34 -0
  136. helm/benchmark/metrics/mimiciv_billing_code_metrics.py +96 -0
  137. helm/benchmark/metrics/mtsamples_procedures_metrics.py +34 -0
  138. helm/benchmark/metrics/mtsamples_replicate_metrics.py +34 -0
  139. helm/benchmark/metrics/nltk_helper.py +32 -0
  140. helm/benchmark/metrics/numeracy_metrics.py +4 -4
  141. helm/benchmark/metrics/omni_math_metrics.py +32 -0
  142. helm/benchmark/metrics/output_processing_metric.py +60 -0
  143. helm/benchmark/metrics/output_processors.py +15 -0
  144. helm/benchmark/metrics/paraphrase_generation_metrics.py +2 -2
  145. helm/benchmark/metrics/ranking_metrics.py +3 -3
  146. helm/benchmark/metrics/reference_metric.py +3 -3
  147. helm/benchmark/metrics/{bhasa_metrics.py → seahelm_metrics.py} +3 -3
  148. helm/benchmark/metrics/seahelm_metrics_specs.py +10 -0
  149. helm/benchmark/metrics/spider_metrics.py +7 -0
  150. helm/benchmark/metrics/starr_patient_instructions_metrics.py +34 -0
  151. helm/benchmark/metrics/statistic.py +1 -1
  152. helm/benchmark/metrics/summac/model_summac.py +1 -1
  153. helm/benchmark/metrics/summarization_critique_metrics.py +4 -4
  154. helm/benchmark/metrics/summarization_metrics.py +19 -9
  155. helm/benchmark/metrics/test_bias_metrics.py +5 -1
  156. helm/benchmark/metrics/test_classification_metrics.py +140 -68
  157. helm/benchmark/metrics/test_evaluate_reference_metrics.py +15 -0
  158. helm/benchmark/metrics/test_metric.py +1 -1
  159. helm/benchmark/metrics/test_statistic.py +2 -2
  160. helm/benchmark/metrics/tokens/ai21_token_cost_estimator.py +1 -1
  161. helm/benchmark/metrics/tokens/auto_token_cost_estimator.py +6 -6
  162. helm/benchmark/metrics/tokens/cohere_token_cost_estimator.py +1 -1
  163. helm/benchmark/metrics/tokens/free_token_cost_estimator.py +1 -1
  164. helm/benchmark/metrics/tokens/gooseai_token_cost_estimator.py +1 -1
  165. helm/benchmark/metrics/tokens/openai_token_cost_estimator.py +1 -1
  166. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +1 -1
  167. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +1 -1
  168. helm/benchmark/metrics/toxicity_metrics.py +4 -4
  169. helm/benchmark/metrics/unitxt_metrics.py +4 -1
  170. helm/benchmark/metrics/vision_language/image_metrics.py +1 -1
  171. helm/benchmark/metrics/wildbench_metrics.py +34 -0
  172. helm/benchmark/model_metadata_registry.py +16 -0
  173. helm/benchmark/presentation/summarize.py +23 -10
  174. helm/benchmark/presentation/torr_robustness_summarizer.py +178 -0
  175. helm/benchmark/reeval_run.py +203 -0
  176. helm/benchmark/reeval_runner.py +355 -0
  177. helm/benchmark/run.py +8 -17
  178. helm/benchmark/run_expander.py +78 -8
  179. helm/benchmark/run_spec_factory.py +12 -0
  180. helm/benchmark/run_specs/air_bench_run_specs.py +21 -3
  181. helm/benchmark/run_specs/audio_run_specs.py +613 -0
  182. helm/benchmark/run_specs/call_center_run_specs.py +49 -0
  183. helm/benchmark/run_specs/capabilities_run_specs.py +308 -0
  184. helm/benchmark/run_specs/classic_run_specs.py +1 -69
  185. helm/benchmark/run_specs/enem_challenge_specs.py +31 -0
  186. helm/benchmark/run_specs/enterprise_run_specs.py +260 -0
  187. helm/benchmark/run_specs/experimental_run_specs.py +112 -3
  188. helm/benchmark/run_specs/imdb_ptbr_run_specs.py +30 -0
  189. helm/benchmark/run_specs/lite_run_specs.py +2 -2
  190. helm/benchmark/run_specs/long_context_run_specs.py +89 -0
  191. helm/benchmark/run_specs/medhelm_run_specs.py +1155 -0
  192. helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py +49 -0
  193. helm/benchmark/run_specs/oab_exams_specs.py +32 -0
  194. helm/benchmark/run_specs/safety_run_specs.py +37 -0
  195. helm/benchmark/run_specs/{bhasa_run_specs.py → seahelm_run_specs.py} +44 -44
  196. helm/benchmark/run_specs/sql_run_specs.py +54 -0
  197. helm/benchmark/run_specs/tweetsentbr_run_specs.py +32 -0
  198. helm/benchmark/run_specs/unitxt_run_specs.py +14 -5
  199. helm/benchmark/run_specs/vlm_run_specs.py +75 -2
  200. helm/benchmark/run_specs/winogrande_afr_run_specs.py +47 -0
  201. helm/benchmark/scenarios/aci_bench_scenario.py +120 -0
  202. helm/benchmark/scenarios/air_bench_scenario.py +6 -1
  203. helm/benchmark/scenarios/anthropic_hh_rlhf_scenario.py +5 -3
  204. helm/benchmark/scenarios/anthropic_red_team_scenario.py +1 -1
  205. helm/benchmark/scenarios/audio_language/__init__.py +0 -0
  206. helm/benchmark/scenarios/audio_language/air_bench_chat_scenario.py +128 -0
  207. helm/benchmark/scenarios/audio_language/air_bench_foundation_scenario.py +154 -0
  208. helm/benchmark/scenarios/audio_language/ami_scenario.py +96 -0
  209. helm/benchmark/scenarios/audio_language/audio_mnist_scenario.py +62 -0
  210. helm/benchmark/scenarios/audio_language/audio_pairs_scenario.py +62 -0
  211. helm/benchmark/scenarios/audio_language/audiocaps_scenario.py +59 -0
  212. helm/benchmark/scenarios/audio_language/casual_conversations2_scenario.py +152 -0
  213. helm/benchmark/scenarios/audio_language/common_voice_15_scenario.py +99 -0
  214. helm/benchmark/scenarios/audio_language/covost2_scenario.py +163 -0
  215. helm/benchmark/scenarios/audio_language/fleurs_fairness_scenario.py +83 -0
  216. helm/benchmark/scenarios/audio_language/fleurs_scenario.py +312 -0
  217. helm/benchmark/scenarios/audio_language/iemocap_audio_scenario.py +83 -0
  218. helm/benchmark/scenarios/audio_language/librispeech_fairness_scenario.py +96 -0
  219. helm/benchmark/scenarios/audio_language/librispeech_scenario.py +80 -0
  220. helm/benchmark/scenarios/audio_language/meld_audio_scenario.py +113 -0
  221. helm/benchmark/scenarios/audio_language/multilingual_librispeech_scenario.py +80 -0
  222. helm/benchmark/scenarios/audio_language/mustard_scenario.py +142 -0
  223. helm/benchmark/scenarios/audio_language/mutox_scenario.py +254 -0
  224. helm/benchmark/scenarios/audio_language/parade_scenario.py +97 -0
  225. helm/benchmark/scenarios/audio_language/speech_robust_bench_scenario.py +124 -0
  226. helm/benchmark/scenarios/audio_language/vocal_sound_scenario.py +69 -0
  227. helm/benchmark/scenarios/audio_language/voice_jailbreak_attacks_scenario.py +87 -0
  228. helm/benchmark/scenarios/audio_language/voxceleb2_scenario.py +106 -0
  229. helm/benchmark/scenarios/autobencher_capabilities_scenario.py +68 -0
  230. helm/benchmark/scenarios/autobencher_safety_scenario.py +51 -0
  231. helm/benchmark/scenarios/babi_qa_scenario.py +1 -1
  232. helm/benchmark/scenarios/banking77_scenario.py +6 -1
  233. helm/benchmark/scenarios/bbq_scenario.py +1 -1
  234. helm/benchmark/scenarios/big_bench_scenario.py +11 -1
  235. helm/benchmark/scenarios/bigcodebench_scenario.py +58 -0
  236. helm/benchmark/scenarios/bird_sql_scenario.py +94 -0
  237. helm/benchmark/scenarios/bird_sql_scenario_helper.py +118 -0
  238. helm/benchmark/scenarios/blimp_scenario.py +1 -1
  239. helm/benchmark/scenarios/bold_scenario.py +1 -1
  240. helm/benchmark/scenarios/boolq_scenario.py +1 -1
  241. helm/benchmark/scenarios/casehold_scenario.py +79 -0
  242. helm/benchmark/scenarios/chw_care_plan_scenario.py +105 -0
  243. helm/benchmark/scenarios/civil_comments_scenario.py +1 -1
  244. helm/benchmark/scenarios/clear_scenario.py +153 -0
  245. helm/benchmark/scenarios/cleva_scenario.py +2 -2
  246. helm/benchmark/scenarios/code_scenario.py +17 -4
  247. helm/benchmark/scenarios/commonsense_scenario.py +1 -1
  248. helm/benchmark/scenarios/conv_fin_qa_calc_scenario.py +97 -0
  249. helm/benchmark/scenarios/copyright_scenario.py +1 -1
  250. helm/benchmark/scenarios/covid_dialog_scenario.py +10 -1
  251. helm/benchmark/scenarios/cti_to_mitre_scenario.py +240 -0
  252. helm/benchmark/scenarios/custom_mcqa_scenario.py +1 -1
  253. helm/benchmark/scenarios/czech_bank_qa_scenario.py +130 -0
  254. helm/benchmark/scenarios/decodingtrust_adv_demonstration_scenario.py +1 -1
  255. helm/benchmark/scenarios/decodingtrust_privacy_scenario.py +1 -1
  256. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +1 -1
  257. helm/benchmark/scenarios/decodingtrust_toxicity_prompts_scenario.py +1 -1
  258. helm/benchmark/scenarios/dialogue_scenarios.py +13 -2
  259. helm/benchmark/scenarios/dischargeme_scenario.py +157 -0
  260. helm/benchmark/scenarios/disinformation_scenario.py +10 -1
  261. helm/benchmark/scenarios/dyck_language_scenario.py +10 -1
  262. helm/benchmark/scenarios/echr_judgment_classification_scenario.py +113 -0
  263. helm/benchmark/scenarios/ehr_sql_scenario.py +131 -0
  264. helm/benchmark/scenarios/ehrshot_scenario.py +1546 -0
  265. helm/benchmark/scenarios/enem_challenge_scenario.py +58 -0
  266. helm/benchmark/scenarios/entity_data_imputation_scenario.py +11 -1
  267. helm/benchmark/scenarios/entity_matching_scenario.py +12 -2
  268. helm/benchmark/scenarios/financial_phrasebank_scenario.py +94 -0
  269. helm/benchmark/scenarios/gold_commodity_news_scenario.py +124 -0
  270. helm/benchmark/scenarios/gpqa_scenario.py +80 -0
  271. helm/benchmark/scenarios/grammar_scenario.py +2 -2
  272. helm/benchmark/scenarios/gsm_scenario.py +10 -1
  273. helm/benchmark/scenarios/harm_bench_gcg_transfer_scenario.py +50 -0
  274. helm/benchmark/scenarios/harm_bench_scenario.py +1 -1
  275. helm/benchmark/scenarios/headqa_scenario.py +131 -0
  276. helm/benchmark/scenarios/helpdesk_call_summarization_scenario.py +37 -0
  277. helm/benchmark/scenarios/ice_scenario.py +8 -4
  278. helm/benchmark/scenarios/ifeval_scenario.py +53 -0
  279. helm/benchmark/scenarios/imdb_ptbr_scenario.py +60 -0
  280. helm/benchmark/scenarios/imdb_scenario.py +11 -2
  281. helm/benchmark/scenarios/infinite_bench_sum_scenario.py +82 -0
  282. helm/benchmark/scenarios/interactive_qa_mmlu_scenario.py +2 -2
  283. helm/benchmark/scenarios/koala_scenario.py +1 -1
  284. helm/benchmark/scenarios/legal_contract_summarization_scenario.py +129 -0
  285. helm/benchmark/scenarios/legal_opinion_sentiment_classification_scenario.py +77 -0
  286. helm/benchmark/scenarios/legal_summarization_scenario.py +11 -1
  287. helm/benchmark/scenarios/legal_support_scenario.py +11 -1
  288. helm/benchmark/scenarios/legalbench_scenario.py +22 -3
  289. helm/benchmark/scenarios/lex_glue_scenario.py +12 -2
  290. helm/benchmark/scenarios/lextreme_scenario.py +11 -1
  291. helm/benchmark/scenarios/live_qa_scenario.py +1 -1
  292. helm/benchmark/scenarios/lm_entry_scenario.py +1 -1
  293. helm/benchmark/scenarios/lsat_qa_scenario.py +1 -1
  294. helm/benchmark/scenarios/math_scenario.py +9 -1
  295. helm/benchmark/scenarios/me_q_sum_scenario.py +10 -1
  296. helm/benchmark/scenarios/med_dialog_scenario.py +22 -24
  297. helm/benchmark/scenarios/med_mcqa_scenario.py +10 -1
  298. helm/benchmark/scenarios/med_paragraph_simplification_scenario.py +10 -1
  299. helm/benchmark/scenarios/med_qa_scenario.py +10 -1
  300. helm/benchmark/scenarios/medalign_scenario.py +88 -0
  301. helm/benchmark/scenarios/medalign_scenario_helper.py +429 -0
  302. helm/benchmark/scenarios/medbullets_scenario.py +140 -0
  303. helm/benchmark/scenarios/medcalc_bench_scenario.py +125 -0
  304. helm/benchmark/scenarios/medec_scenario.py +120 -0
  305. helm/benchmark/scenarios/medhallu_scenario.py +66 -0
  306. helm/benchmark/scenarios/medi_qa_scenario.py +105 -0
  307. helm/benchmark/scenarios/medication_qa_scenario.py +2 -2
  308. helm/benchmark/scenarios/mental_health_scenario.py +112 -0
  309. helm/benchmark/scenarios/mimic_bhc_scenario.py +98 -0
  310. helm/benchmark/scenarios/mimic_rrs_scenario.py +89 -0
  311. helm/benchmark/scenarios/mimiciv_billing_code_scenario.py +71 -0
  312. helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py +74 -0
  313. helm/benchmark/scenarios/mmlu_pro_scenario.py +95 -0
  314. helm/benchmark/scenarios/mmlu_scenario.py +11 -1
  315. helm/benchmark/scenarios/msmarco_scenario.py +1 -1
  316. helm/benchmark/scenarios/mtsamples_procedures_scenario.py +141 -0
  317. helm/benchmark/scenarios/mtsamples_replicate_scenario.py +141 -0
  318. helm/benchmark/scenarios/n2c2_ct_matching_scenario.py +271 -0
  319. helm/benchmark/scenarios/narrativeqa_scenario.py +1 -1
  320. helm/benchmark/scenarios/natural_qa_scenario.py +1 -1
  321. helm/benchmark/scenarios/newsqa_scenario.py +1 -1
  322. helm/benchmark/scenarios/numeracy_scenario.py +10 -1
  323. helm/benchmark/scenarios/oab_exams_scenario.py +57 -0
  324. helm/benchmark/scenarios/omni_math_scenario.py +53 -0
  325. helm/benchmark/scenarios/open_assistant_scenario.py +11 -2
  326. helm/benchmark/scenarios/opinions_qa_scenario.py +1 -1
  327. helm/benchmark/scenarios/pubmed_qa_scenario.py +54 -43
  328. helm/benchmark/scenarios/quac_scenario.py +10 -1
  329. helm/benchmark/scenarios/race_based_med_scenario.py +142 -0
  330. helm/benchmark/scenarios/raft_scenario.py +17 -2
  331. helm/benchmark/scenarios/real_toxicity_prompts_scenario.py +1 -1
  332. helm/benchmark/scenarios/ruler_qa_scenario_helper.py +171 -0
  333. helm/benchmark/scenarios/ruler_qa_scenarios.py +88 -0
  334. helm/benchmark/scenarios/scenario.py +9 -1
  335. helm/benchmark/scenarios/{bhasa_scenario.py → seahelm_scenario.py} +7 -2
  336. helm/benchmark/scenarios/self_instruct_scenario.py +1 -1
  337. helm/benchmark/scenarios/shc_bmt_scenario.py +69 -0
  338. helm/benchmark/scenarios/shc_cdi_scenario.py +70 -0
  339. helm/benchmark/scenarios/shc_conf_scenario.py +70 -0
  340. helm/benchmark/scenarios/shc_ent_scenario.py +72 -0
  341. helm/benchmark/scenarios/shc_gip_scenario.py +66 -0
  342. helm/benchmark/scenarios/shc_ptbm_scenario.py +76 -0
  343. helm/benchmark/scenarios/shc_sei_scenario.py +89 -0
  344. helm/benchmark/scenarios/shc_sequoia_scenario.py +69 -0
  345. helm/benchmark/scenarios/simple_safety_tests_scenario.py +1 -1
  346. helm/benchmark/scenarios/spider_scenario.py +91 -0
  347. helm/benchmark/scenarios/starr_patient_instructions_scenario.py +90 -0
  348. helm/benchmark/scenarios/summarization_scenario.py +11 -1
  349. helm/benchmark/scenarios/sumosum_scenario.py +157 -0
  350. helm/benchmark/scenarios/synthetic_efficiency_scenario.py +1 -1
  351. helm/benchmark/scenarios/synthetic_reasoning_natural_scenario.py +11 -1
  352. helm/benchmark/scenarios/synthetic_reasoning_scenario.py +11 -1
  353. helm/benchmark/scenarios/test_bigcodebench_scenario.py +26 -0
  354. helm/benchmark/scenarios/test_czech_bank_qa_scenario.py +18 -0
  355. helm/benchmark/scenarios/test_enem_challenge_scenario.py +53 -0
  356. helm/benchmark/scenarios/test_ewok_scenario.py +6 -2
  357. helm/benchmark/scenarios/test_gold_commodity_news_scenario.py +18 -0
  358. helm/benchmark/scenarios/test_gpqa_scenario.py +44 -0
  359. helm/benchmark/scenarios/test_ifeval_scenario.py +36 -0
  360. helm/benchmark/scenarios/test_imdb_ptbr_scenario.py +27 -0
  361. helm/benchmark/scenarios/test_infinite_bench_sum_scenario.py +46 -0
  362. helm/benchmark/scenarios/test_math_scenario.py +1 -0
  363. helm/benchmark/scenarios/test_mmlu_clinical_afr_scenario.py +21 -0
  364. helm/benchmark/scenarios/test_mmlu_pro_scenario.py +53 -0
  365. helm/benchmark/scenarios/test_oab_exams_scenario.py +51 -0
  366. helm/benchmark/scenarios/test_omni_math_scenario.py +27 -0
  367. helm/benchmark/scenarios/test_tweetsentbr_scenario.py +24 -0
  368. helm/benchmark/scenarios/test_wildbench_scenario.py +15 -0
  369. helm/benchmark/scenarios/test_winogrande_afr_scenario.py +19 -0
  370. helm/benchmark/scenarios/thai_exam_scenario.py +10 -1
  371. helm/benchmark/scenarios/the_pile_scenario.py +1 -1
  372. helm/benchmark/scenarios/truthful_qa_scenario.py +10 -1
  373. helm/benchmark/scenarios/tweetsentbr_scenario.py +66 -0
  374. helm/benchmark/scenarios/twitter_aae_scenario.py +1 -1
  375. helm/benchmark/scenarios/unitxt_scenario.py +8 -2
  376. helm/benchmark/scenarios/verifiability_judgment_scenario.py +1 -1
  377. helm/benchmark/scenarios/vicuna_scenario.py +1 -1
  378. helm/benchmark/scenarios/vision_language/blink_scenario.py +140 -0
  379. helm/benchmark/scenarios/vision_language/mm_star_scenario.py +95 -0
  380. helm/benchmark/scenarios/vision_language/vqa_rad_scenario.py +88 -0
  381. helm/benchmark/scenarios/wikifact_scenario.py +11 -1
  382. helm/benchmark/scenarios/wikitext_103_scenario.py +1 -1
  383. helm/benchmark/scenarios/wildbench_scenario.py +83 -0
  384. helm/benchmark/scenarios/winogrande_afr_scenario.py +78 -0
  385. helm/benchmark/scenarios/wmt_14_scenario.py +14 -2
  386. helm/benchmark/scenarios/xstest_scenario.py +1 -1
  387. helm/benchmark/server.py +11 -0
  388. helm/benchmark/slurm_runner.py +1 -1
  389. helm/benchmark/static/schema_audio.yaml +752 -0
  390. helm/benchmark/static/schema_autobencher.yaml +150 -0
  391. helm/benchmark/static/schema_call_center.yaml +97 -60
  392. helm/benchmark/static/schema_capabilities.yaml +254 -0
  393. helm/benchmark/static/schema_czech_bank.yaml +148 -0
  394. helm/benchmark/static/schema_enem_challenge.yaml +146 -0
  395. helm/benchmark/static/schema_enterprise.yaml +298 -0
  396. helm/benchmark/static/schema_finance.yaml +14 -12
  397. helm/benchmark/static/schema_heim.yaml +1389 -0
  398. helm/benchmark/static/{schema_medical.yaml → schema_long_context.yaml} +67 -82
  399. helm/benchmark/static/schema_medhelm.yaml +1081 -0
  400. helm/benchmark/static/schema_mmlu_winogrande_afr.yaml +1045 -0
  401. helm/benchmark/static/schema_safety.yaml +18 -1
  402. helm/benchmark/static/{schema_bhasa.yaml → schema_seahelm.yaml} +30 -16
  403. helm/benchmark/static/schema_social_audio.yaml +224 -0
  404. helm/benchmark/static/schema_sql.yaml +171 -0
  405. helm/benchmark/static/{schema_tables.yaml → schema_torr.yaml} +169 -36
  406. helm/benchmark/static/schema_tweetsentbr.yaml +146 -0
  407. helm/benchmark/static/schema_vhelm.yaml +109 -36
  408. helm/benchmark/static_build/assets/helm-safety-2907a7b6.png +0 -0
  409. helm/benchmark/static_build/assets/index-262903c1.js +10 -0
  410. helm/benchmark/static_build/assets/index-42060d71.css +1 -0
  411. helm/benchmark/static_build/assets/medhelm-overview-3ddfcd65.png +0 -0
  412. helm/benchmark/static_build/assets/{react-d4a0b69b.js → react-f82877fd.js} +1 -1
  413. helm/benchmark/static_build/assets/{recharts-6d337683.js → recharts-4037aff0.js} +1 -1
  414. helm/benchmark/static_build/assets/{tremor-54a99cc4.js → tremor-9cefc3c5.js} +1 -1
  415. helm/benchmark/static_build/config.js +1 -1
  416. helm/benchmark/static_build/index.html +5 -5
  417. helm/benchmark/window_services/default_window_service.py +1 -1
  418. helm/benchmark/window_services/encoder_decoder_window_service.py +1 -1
  419. helm/benchmark/window_services/ice_window_service.py +1 -1
  420. helm/benchmark/window_services/image_generation/lexica_search_window_service.py +1 -1
  421. helm/benchmark/window_services/image_generation/openai_dalle_window_service.py +1 -1
  422. helm/benchmark/window_services/local_window_service.py +2 -2
  423. helm/benchmark/window_services/test_anthropic_window_service.py +3 -3
  424. helm/benchmark/window_services/test_bloom_window_service.py +3 -3
  425. helm/benchmark/window_services/test_gpt2_window_service.py +7 -2
  426. helm/benchmark/window_services/test_gpt4_window_service.py +8 -3
  427. helm/benchmark/window_services/test_gptj_window_service.py +8 -3
  428. helm/benchmark/window_services/test_gptneox_window_service.py +3 -3
  429. helm/benchmark/window_services/test_openai_window_service.py +8 -3
  430. helm/benchmark/window_services/test_opt_window_service.py +3 -3
  431. helm/benchmark/window_services/test_palmyra_window_service.py +3 -3
  432. helm/benchmark/window_services/test_t0pp_window_service.py +3 -3
  433. helm/benchmark/window_services/test_t511b_window_service.py +3 -3
  434. helm/benchmark/window_services/test_ul2_window_service.py +3 -3
  435. helm/benchmark/window_services/test_utils.py +1 -1
  436. helm/benchmark/window_services/test_yalm_window_service.py +3 -3
  437. helm/benchmark/window_services/yalm_window_service.py +1 -1
  438. helm/clients/ai21_client.py +3 -3
  439. helm/clients/aleph_alpha_client.py +1 -1
  440. helm/clients/audio_language/__init__.py +0 -0
  441. helm/clients/audio_language/diva_llama_client.py +118 -0
  442. helm/clients/audio_language/llama_omni_client.py +198 -0
  443. helm/clients/audio_language/qwen2_audiolm_client.py +188 -0
  444. helm/clients/audio_language/qwen_audiolm_client.py +150 -0
  445. helm/clients/auto_client.py +4 -2
  446. helm/clients/azure_openai_client.py +55 -0
  447. helm/clients/bedrock_client.py +201 -7
  448. helm/clients/bedrock_utils.py +33 -0
  449. helm/clients/clip_scorers/clip_scorer.py +1 -1
  450. helm/clients/clip_scorers/multilingual_clip_scorer.py +1 -1
  451. helm/clients/cohere_client.py +3 -3
  452. helm/clients/google_client.py +1 -1
  453. helm/clients/http_model_client.py +1 -1
  454. helm/clients/huggingface_client.py +10 -18
  455. helm/clients/ibm_client.py +267 -0
  456. helm/clients/image_generation/adobe_vision_client.py +1 -1
  457. helm/clients/image_generation/aleph_alpha_image_generation_client.py +1 -1
  458. helm/clients/image_generation/cogview2/sr_pipeline/__init__.py +3 -3
  459. helm/clients/image_generation/cogview2/sr_pipeline/direct_sr.py +5 -2
  460. helm/clients/image_generation/cogview2/sr_pipeline/iterative_sr.py +5 -2
  461. helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py +2 -2
  462. helm/clients/image_generation/cogview2_client.py +1 -1
  463. helm/clients/image_generation/dalle2_client.py +1 -1
  464. helm/clients/image_generation/dalle3_client.py +2 -2
  465. helm/clients/image_generation/dalle_mini/__init__.py +1 -1
  466. helm/clients/image_generation/dalle_mini/data.py +1 -1
  467. helm/clients/image_generation/dalle_mini/model/__init__.py +5 -5
  468. helm/clients/image_generation/dalle_mini/model/configuration.py +1 -1
  469. helm/clients/image_generation/dalle_mini/model/modeling.py +2 -2
  470. helm/clients/image_generation/dalle_mini/model/processor.py +4 -4
  471. helm/clients/image_generation/dalle_mini/model/tokenizer.py +1 -1
  472. helm/clients/image_generation/dalle_mini/vqgan_jax/__init__.py +1 -1
  473. helm/clients/image_generation/dalle_mini/vqgan_jax/convert_pt_model_to_jax.py +2 -2
  474. helm/clients/image_generation/dalle_mini/vqgan_jax/modeling_flax_vqgan.py +1 -1
  475. helm/clients/image_generation/dalle_mini_client.py +1 -1
  476. helm/clients/image_generation/deep_floyd_client.py +1 -1
  477. helm/clients/image_generation/huggingface_diffusers_client.py +1 -1
  478. helm/clients/image_generation/lexica_client.py +1 -1
  479. helm/clients/image_generation/mindalle/models/__init__.py +6 -6
  480. helm/clients/image_generation/mindalle/models/stage1/vqgan.py +1 -1
  481. helm/clients/image_generation/mindalle/models/stage2/transformer.py +1 -1
  482. helm/clients/image_generation/mindalle/utils/__init__.py +3 -3
  483. helm/clients/image_generation/mindalle_client.py +1 -1
  484. helm/clients/image_generation/together_image_generation_client.py +1 -1
  485. helm/clients/lit_gpt_client.py +2 -2
  486. helm/clients/mistral_client.py +62 -18
  487. helm/clients/nvidia_nim_client.py +0 -3
  488. helm/clients/openai_client.py +241 -22
  489. helm/clients/palmyra_client.py +1 -4
  490. helm/clients/reka_client.py +1 -1
  491. helm/clients/stanfordhealthcare_azure_openai_client.py +58 -0
  492. helm/clients/stanfordhealthcare_claude_client.py +31 -0
  493. helm/clients/stanfordhealthcare_google_client.py +43 -0
  494. helm/clients/stanfordhealthcare_http_model_client.py +93 -0
  495. helm/clients/stanfordhealthcare_openai_client.py +62 -0
  496. helm/clients/stanfordhealthcare_shc_openai_client.py +42 -0
  497. helm/clients/test_client.py +1 -1
  498. helm/clients/test_together_client.py +6 -1
  499. helm/clients/together_client.py +47 -7
  500. helm/clients/upstage_client.py +23 -0
  501. helm/clients/vertexai_client.py +39 -13
  502. helm/clients/vision_language/open_flamingo/__init__.py +2 -2
  503. helm/clients/vision_language/open_flamingo/src/factory.py +3 -3
  504. helm/clients/vision_language/open_flamingo/src/flamingo.py +2 -2
  505. helm/clients/vision_language/open_flamingo/src/flamingo_lm.py +2 -2
  506. helm/clients/vision_language/qwen2_vlm_client.py +175 -0
  507. helm/clients/vllm_client.py +4 -6
  508. helm/clients/yi_client.py +0 -3
  509. helm/common/audio_utils.py +111 -0
  510. helm/common/file_caches/local_file_cache.py +1 -1
  511. helm/common/file_caches/test_local_file_cache.py +1 -1
  512. helm/common/images_utils.py +2 -2
  513. helm/common/media_object.py +2 -2
  514. helm/common/multimodal_request_utils.py +26 -0
  515. helm/common/reeval_parameters.py +12 -0
  516. helm/common/request.py +6 -2
  517. helm/common/response_format.py +18 -0
  518. helm/common/test_media_object.py +1 -1
  519. helm/config/model_deployments.yaml +1112 -19
  520. helm/config/model_metadata.yaml +985 -44
  521. helm/config/tokenizer_configs.yaml +379 -3
  522. helm/proxy/cli.py +2 -2
  523. helm/proxy/example_queries.py +1 -1
  524. helm/proxy/server.py +11 -4
  525. helm/proxy/services/remote_service.py +1 -1
  526. helm/proxy/services/server_service.py +1 -1
  527. helm/proxy/services/test_remote_service.py +2 -2
  528. helm/proxy/services/test_service.py +1 -1
  529. helm/proxy/static/general.js +122 -0
  530. helm/proxy/static/help.html +99 -0
  531. helm/proxy/static/index.css +57 -0
  532. helm/proxy/static/index.html +40 -0
  533. helm/proxy/static/index.js +456 -0
  534. helm/proxy/static/info-icon.png +0 -0
  535. helm/proxy/test_retry.py +1 -1
  536. helm/proxy/token_counters/auto_token_counter.py +1 -1
  537. helm/tokenizers/aleph_alpha_tokenizer.py +1 -1
  538. helm/tokenizers/caching_tokenizer.py +2 -30
  539. helm/tokenizers/http_model_tokenizer.py +1 -1
  540. helm/tokenizers/huggingface_tokenizer.py +2 -2
  541. helm/tokenizers/lit_gpt_tokenizer.py +1 -1
  542. helm/tokenizers/test_anthropic_tokenizer.py +6 -2
  543. helm/tokenizers/test_huggingface_tokenizer.py +1 -1
  544. helm/tokenizers/test_yalm_tokenizer.py +1 -1
  545. helm/tokenizers/tiktoken_tokenizer.py +1 -1
  546. helm/tokenizers/tokenizer.py +3 -1
  547. helm/tokenizers/yalm_tokenizer.py +3 -3
  548. helm/tokenizers/yalm_tokenizer_data/test_yalm_tokenizer.py +1 -1
  549. crfm_helm-0.5.4.dist-info/METADATA +0 -350
  550. crfm_helm-0.5.4.dist-info/RECORD +0 -697
  551. helm/benchmark/metrics/bhasa_metrics_specs.py +0 -10
  552. helm/benchmark/static_build/assets/01-694cb9b7.png +0 -0
  553. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  554. helm/benchmark/static_build/assets/ai21-0eb91ec3.png +0 -0
  555. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  556. helm/benchmark/static_build/assets/aleph-alpha-7ce10034.png +0 -0
  557. helm/benchmark/static_build/assets/anthropic-70d8bc39.png +0 -0
  558. helm/benchmark/static_build/assets/bigscience-7f0400c0.png +0 -0
  559. helm/benchmark/static_build/assets/cohere-3550c6cb.png +0 -0
  560. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  561. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  562. helm/benchmark/static_build/assets/eleutherai-b9451114.png +0 -0
  563. helm/benchmark/static_build/assets/google-06d997ad.png +0 -0
  564. helm/benchmark/static_build/assets/index-05c76bb1.css +0 -1
  565. helm/benchmark/static_build/assets/index-3ee38b3d.js +0 -10
  566. helm/benchmark/static_build/assets/meta-5580e9f1.png +0 -0
  567. helm/benchmark/static_build/assets/microsoft-f5ee5016.png +0 -0
  568. helm/benchmark/static_build/assets/mistral-18e1be23.png +0 -0
  569. helm/benchmark/static_build/assets/nvidia-86fa75c1.png +0 -0
  570. helm/benchmark/static_build/assets/openai-3f8653e4.png +0 -0
  571. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  572. helm/benchmark/static_build/assets/tii-24de195c.png +0 -0
  573. helm/benchmark/static_build/assets/together-a665a35b.png +0 -0
  574. helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png +0 -0
  575. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  576. helm/benchmark/static_build/assets/yandex-38e09d70.png +0 -0
  577. helm/tokenizers/anthropic_tokenizer.py +0 -52
  578. {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.5.dist-info}/entry_points.txt +0 -0
  579. {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.5.dist-info/licenses}/LICENSE +0 -0
  580. {crfm_helm-0.5.4.dist-info → crfm_helm-0.5.5.dist-info}/top_level.txt +0 -0
@@ -18,7 +18,7 @@ models:
18
18
  access: open
19
19
  release_date: 2023-01-01
20
20
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
21
-
21
+
22
22
  # Adobe
23
23
  - name: adobe/giga-gan
24
24
  display_name: GigaGAN (1B)
@@ -128,7 +128,7 @@ models:
128
128
 
129
129
  # AI Singapore
130
130
  - name: aisingapore/sea-lion-7b
131
- display_name: SEA-LION (7B)
131
+ display_name: SEA-LION 7B
132
132
  description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
133
133
  creator_organization_name: AI Singapore
134
134
  access: open
@@ -137,7 +137,7 @@ models:
137
137
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
138
138
 
139
139
  - name: aisingapore/sea-lion-7b-instruct
140
- display_name: SEA-LION Instruct (7B)
140
+ display_name: SEA-LION 7B Instruct
141
141
  description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
142
142
  creator_organization_name: AI Singapore
143
143
  access: open
@@ -146,23 +146,77 @@ models:
146
146
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
147
147
 
148
148
  - name: aisingapore/llama3-8b-cpt-sea-lionv2-base
149
- display_name: Llama 3 CPT SEA-Lion v2 (8B)
150
- description: Llama 3 CPT SEA-Lion v2 (8B) is a multilingual model which was continued pre-trained on 48B additional tokens, including tokens in Southeast Asian languages.
149
+ display_name: Llama3 8B CPT SEA-LIONv2
150
+ description: Llama3 8B CPT SEA-LIONv2 is a multilingual model which was continued pre-trained on 48B additional tokens, including tokens in Southeast Asian languages.
151
151
  creator_organization_name: AI Singapore
152
152
  access: open
153
- num_parameters: 80300000000
153
+ num_parameters: 8030000000
154
154
  release_date: 2024-07-31
155
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
155
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
156
156
 
157
157
  - name: aisingapore/llama3-8b-cpt-sea-lionv2.1-instruct
158
- display_name: Llama 3 CPT SEA-Lion v2.1 Instruct (8B)
159
- description: Llama 3 CPT SEA-Lion v2.1 Instruct (8B) is a multilingual model which has been fine-tuned with around 100,000 English instruction-completion pairs alongside a smaller pool of around 50,000 instruction-completion pairs from other Southeast Asian languages, such as Indonesian, Thai and Vietnamese.
158
+ display_name: Llama3 8B CPT SEA-LIONv2.1 Instruct
159
+ description: Llama3 8B CPT SEA-LIONv2.1 Instruct is a multilingual model which has been fine-tuned with around 100,000 English instruction-completion pairs alongside a smaller pool of around 50,000 instruction-completion pairs from other Southeast Asian languages, such as Indonesian, Thai and Vietnamese.
160
160
  creator_organization_name: AI Singapore
161
161
  access: open
162
- num_parameters: 80300000000
162
+ num_parameters: 8030000000
163
163
  release_date: 2024-08-21
164
164
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
165
165
 
166
+ - name: aisingapore/gemma2-9b-cpt-sea-lionv3-base
167
+ display_name: Gemma2 9B CPT SEA-LIONv3
168
+ description: Gemma2 9B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across the 11 official Southeast Asian languages, such as English, Chinese, Vietnamese, Indonesian, Thai, Tamil, Filipino, Malay, Khmer, Lao, Burmese.
169
+ creator_organization_name: AI Singapore
170
+ access: open
171
+ num_parameters: 9240000000
172
+ release_date: 2024-10-30
173
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
174
+
175
+ - name: aisingapore/gemma2-9b-cpt-sea-lionv3-instruct
176
+ display_name: Gemma2 9B CPT SEA-LIONv3 Instruct
177
+ description: Gemma2 9B CPT SEA-LIONv3 Instruct is a multilingual model which has been fine-tuned with around 500,000 English instruction-completion pairs alongside a larger pool of around 1,000,000 instruction-completion pairs from other ASEAN languages, such as Indonesian, Thai and Vietnamese.
178
+ creator_organization_name: AI Singapore
179
+ access: open
180
+ num_parameters: 9240000000
181
+ release_date: 2024-10-30
182
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
183
+
184
+ - name: aisingapore/llama3.1-8b-cpt-sea-lionv3-base
185
+ display_name: Llama3.1 8B CPT SEA-LIONv3
186
+ description: Llama3.1 8B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across 11 SEA languages, such as Burmese, Chinese, English, Filipino, Indonesia, Khmer, Lao, Malay, Tamil, Thai and Vietnamese.
187
+ creator_organization_name: AI Singapore
188
+ access: open
189
+ num_parameters: 9240000000
190
+ release_date: 2024-12-11
191
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
192
+
193
+ - name: aisingapore/llama3.1-8b-cpt-sea-lionv3-instruct
194
+ display_name: Llama3.1 8B CPT SEA-LIONv3 Instruct
195
+ description: Llama3.1 8B CPT SEA-LIONv3 Instruct is a multilingual model that has been fine-tuned in two stages on approximately 12.3M English instruction-completion pairs alongside a pool of 4.5M Southeast Asian instruction-completion pairs from SEA languages such as Indonesian, Javanese, Sundanese, Tamil, Thai and Vietnamese.
196
+ creator_organization_name: AI Singapore
197
+ access: open
198
+ num_parameters: 9240000000
199
+ release_date: 2024-12-11
200
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
201
+
202
+ - name: aisingapore/llama3.1-70b-cpt-sea-lionv3-base
203
+ display_name: Llama3.1 70B CPT SEA-LIONv3
204
+ description: Llama3.1 70B CPT SEA-LIONv3 Base is a multilingual model which has undergone continued pre-training on approximately 200B tokens across 11 SEA languages, such as Burmese, Chinese, English, Filipino, Indonesia, Khmer, Lao, Malay, Tamil, Thai and Vietnamese.
205
+ creator_organization_name: AI Singapore
206
+ access: open
207
+ num_parameters: 70600000000
208
+ release_date: 2024-12-11
209
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
210
+
211
+ - name: aisingapore/llama3.1-70b-cpt-sea-lionv3-instruct
212
+ display_name: Llama3.1 70B CPT SEA-LIONv3 Instruct
213
+ description: Llama3.1 70B CPT SEA-LIONv3 Instruct is a multilingual model that has been fine-tuned in two stages on approximately 12.3M English instruction-completion pairs alongside a pool of 4.5M Southeast Asian instruction-completion pairs from SEA languages such as Indonesian, Javanese, Sundanese, Tamil, Thai, and Vietnamese.
214
+ creator_organization_name: AI Singapore
215
+ access: open
216
+ num_parameters: 70600000000
217
+ release_date: 2024-12-11
218
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
219
+
166
220
  # Aleph Alpha
167
221
  # Aleph Alpha's Luminous models: https://docs.aleph-alpha.com/docs/introduction/luminous
168
222
  # TODO: add Luminous World when it's released
@@ -219,7 +273,34 @@ models:
219
273
  tags: [TEXT_TO_IMAGE_MODEL_TAG]
220
274
 
221
275
 
222
- # Amazon
276
+ # Amazon Nova models
277
+ # References for Amazon Nova models:
278
+ # https://aws.amazon.com/ai/generative-ai/nova/
279
+ - name: amazon/nova-pro-v1:0
280
+ display_name: Amazon Nova Pro
281
+ description: Amazon Nova Pro Model
282
+ creator_organization_name: Amazon
283
+ access: limited
284
+ release_date: 2024-12-03
285
+ tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
286
+
287
+ - name: amazon/nova-lite-v1:0
288
+ display_name: Amazon Nova Lite
289
+ description: Amazon Nova Lite Model
290
+ creator_organization_name: Amazon
291
+ access: limited
292
+ release_date: 2024-12-03
293
+ tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
294
+
295
+ - name: amazon/nova-micro-v1:0
296
+ display_name: Amazon Nova Micro
297
+ description: Amazon Nova Micro Model
298
+ creator_organization_name: Amazon
299
+ access: limited
300
+ release_date: 2024-12-03
301
+ tags: [NOVA_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
302
+
303
+ # Titan Models
223
304
  # References for Amazon Titan models:
224
305
  # - https://aws.amazon.com/bedrock/titan/
225
306
  # - https://community.aws/content/2ZUVD3fkNtqEOYIa2iUJAFArS7c/family-of-titan-text-models---cli-demo
@@ -230,16 +311,8 @@ models:
230
311
  creator_organization_name: Amazon
231
312
  access: limited
232
313
  release_date: 2023-11-29
233
- tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
234
-
235
- - name: amazon/titan-tg1-large
236
- display_name: Amazon Titan Large
237
- description: Amazon Titan Large is efficient model perfect for fine-tuning English-language tasks like summarization, create article, marketing campaign.
238
- creator_organization_name: Amazon
239
- access: limited
240
- release_date: 2023-11-29
241
- tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
242
-
314
+ tags: [BEDROCK_MODEL_TAG,TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
315
+
243
316
  - name: amazon/titan-text-express-v1
244
317
  display_name: Amazon Titan Text Express
245
318
  description: Amazon Titan Text Express, with a context length of up to 8,000 tokens, excels in advanced language tasks like open-ended text generation and conversational chat. It's also optimized for Retrieval Augmented Generation (RAG). Initially designed for English, the model offers preview multilingual support for over 100 additional languages.
@@ -248,6 +321,93 @@ models:
248
321
  release_date: 2023-11-29
249
322
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
250
323
 
324
+ # Mistral Models on Bedrock
325
+ # References for Mistral on Amazon Bedrock
326
+ # https://aws.amazon.com/bedrock/mistral/
327
+
328
+ - name: mistralai/amazon-mistral-7b-instruct-v0:2
329
+ display_name: Mistral 7B Instruct on Amazon Bedrock
330
+ description: A 7B dense Transformer, fast-deployed and easily customisable. Small, yet powerful for a variety of use cases. Supports English and code, and a 32k context window.
331
+ creator_organization_name: Mistral
332
+ access: limited
333
+ release_date: 2024-03-23
334
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
335
+
336
+ - name: mistralai/amazon-mixtral-8x7b-instruct-v0:1
337
+ display_name: Mixtral 8x7B Instruct on Amazon Bedrock
338
+ description: A 7B sparse Mixture-of-Experts model with stronger capabilities than Mistral 7B. Uses 12B active parameters out of 45B total. Supports multiple languages, code and 32k context window.
339
+ creator_organization_name: Mistral
340
+ access: limited
341
+ release_date: 2023-12-11
342
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
343
+
344
+ - name: mistralai/amazon-mistral-large-2402-v1:0
345
+ display_name: Mistral Large(2402) on Amazon Bedrock
346
+ description: The most advanced Mistral AI Large Language model capable of handling any language task including complex multilingual reasoning, text understanding, transformation, and code generation.
347
+ creator_organization_name: Mistral
348
+ access: limited
349
+ release_date: 2023-07-26
350
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
351
+
352
+ - name: mistralai/amazon-mistral-small-2402-v1:0
353
+ display_name: Mistral Small on Amazon Bedrock
354
+ description: Mistral Small is perfectly suited for straightforward tasks that can be performed in bulk, such as classification, customer support, or text generation. It provides outstanding performance at a cost-effective price point.
355
+ creator_organization_name: Mistral
356
+ access: limited
357
+ release_date: 2023-02-26
358
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
359
+
360
+ - name: mistralai/amazon-mistral-large-2407-v1:0
361
+ display_name: Mistral Large(2407) on Amazon Bedrock
362
+ description: Mistral Large 2407 is an advanced Large Language Model (LLM) that supports dozens of languages and is trained on 80+ coding languages. It has best-in-class agentic capabilities with native function calling JSON outputting and reasoning capabilities.
363
+ creator_organization_name: Mistral
364
+ access: limited
365
+ release_date: 2024-07-24
366
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
367
+
368
+ # Llama3 on Amazon Bedrock
369
+ # References for Llama3 on Amazon Bedrock
370
+ # https://aws.amazon.com/bedrock/llama/
371
+
372
+ - name: meta/amazon-llama3-8b-instruct-v1:0
373
+ display_name: Llama 3 8B Instruct on Amazon Bedrock
374
+ description: Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for limited computational power and resources, edge devices, and faster training times.
375
+ creator_organization_name: Meta
376
+ access: limited
377
+ release_date: 2024-04-23
378
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
379
+
380
+ - name: meta/amazon-llama3-70b-instruct-v1:0
381
+ display_name: Llama 3 70B Instruct on Amazon Bedrock
382
+ description: Meta Llama 3 is an accessible, open large language model (LLM) designed for developers, researchers, and businesses to build, experiment, and responsibly scale their generative AI ideas. Part of a foundational system, it serves as a bedrock for innovation in the global community. Ideal for content creation, conversational AI, language understanding, R&D, and Enterprise applications.
383
+ creator_organization_name: Meta
384
+ access: limited
385
+ release_date: 2024-04-23
386
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
387
+
388
+ - name: meta/amazon-llama3-1-405b-instruct-v1:0
389
+ display_name: Llama 3.1 405b Instruct on Amazon Bedrock.
390
+ description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
391
+ creator_organization_name: Meta
392
+ access: limited
393
+ release_date: 2024-07-26
394
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
395
+
396
+ - name: meta/amazon-llama3-1-70b-instruct-v1:0
397
+ display_name: Llama 3.1 70b Instruct on Amazon Bedrock.
398
+ description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
399
+ creator_organization_name: Meta
400
+ access: limited
401
+ release_date: 2024-07-26
402
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
403
+
404
+ - name: meta/amazon-llama3-1-8b-instruct-v1:0
405
+ display_name: Llama 3.1 8b Instruct on Amazon Bedrock.
406
+ description: Meta's Llama 3.1 offers multilingual models (8B, 70B, 405B) with 128K context, improved reasoning, and optimization for dialogue. It outperforms many open-source chat models and is designed for commercial and research use in multiple languages.
407
+ creator_organization_name: Meta
408
+ access: limited
409
+ release_date: 2024-07-26
410
+ tags: [BEDROCK_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
251
411
 
252
412
  # Anthropic
253
413
  - name: anthropic/claude-v1.3
@@ -315,6 +475,14 @@ models:
315
475
  release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
316
476
  tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
317
477
 
478
+ - name: anthropic/claude-3-5-haiku-20241022
479
+ display_name: Claude 3.5 Haiku (20241022)
480
+ description: Claude 3.5 Haiku is a Claude 3 family model which matches the performance of Claude 3 Opus at a similar speed to the previous generation of Haiku ([blog](https://www.anthropic.com/news/3-5-models-and-computer-use)).
481
+ creator_organization_name: Anthropic
482
+ access: limited
483
+ release_date: 2024-11-04 # Released after the blog post
484
+ tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
485
+
318
486
  - name: anthropic/claude-3-5-sonnet-20240620
319
487
  display_name: Claude 3.5 Sonnet (20240620)
320
488
  description: Claude 3.5 Sonnet is a Claude 3 family model which outperforms Claude 3 Opus while operating faster and at a lower cost. ([blog](https://www.anthropic.com/news/claude-3-5-sonnet))
@@ -323,6 +491,22 @@ models:
323
491
  release_date: 2024-06-20
324
492
  tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
325
493
 
494
+ - name: anthropic/claude-3-5-sonnet-20241022
495
+ display_name: Claude 3.5 Sonnet (20241022)
496
+ description: Claude 3.5 Sonnet is a Claude 3 family model which outperforms Claude 3 Opus while operating faster and at a lower cost ([blog](https://www.anthropic.com/news/claude-3-5-sonnet)). This is an upgraded snapshot released on 2024-10-22 ([blog](https://www.anthropic.com/news/3-5-models-and-computer-use)).
497
+ creator_organization_name: Anthropic
498
+ access: limited
499
+ release_date: 2024-10-22
500
+ tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
501
+
502
+ - name: anthropic/claude-3-7-sonnet-20250219
503
+ display_name: Claude 3.7 Sonnet (20250219)
504
+ description: Claude 3.7 Sonnet is a Claude 3 family hybrid reasoning model that can produce near-instant responses or extended, step-by-step thinking that is made visible to the user ([blog](https://www.anthropic.com/news/claude-3-7-sonnet)).
505
+ creator_organization_name: Anthropic
506
+ access: limited
507
+ release_date: 2025-02-24
508
+ tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
509
+
326
510
  - name: anthropic/stanford-online-all-v4-s3
327
511
  display_name: Anthropic-LM v4-s3 (52B)
328
512
  description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
@@ -342,7 +526,7 @@ models:
342
526
  access: open
343
527
  num_parameters: 13000000000
344
528
  release_date: 2022-04-03
345
- tags: [] # TODO: add tags
529
+ tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
346
530
 
347
531
 
348
532
 
@@ -363,7 +547,7 @@ models:
363
547
  access: open
364
548
  num_parameters: 176000000000
365
549
  release_date: 2022-11-03
366
- tags: [] # TODO: add tags
550
+ tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
367
551
 
368
552
  - name: bigscience/t0pp
369
553
  display_name: T0pp (11B)
@@ -418,7 +602,7 @@ models:
418
602
  access: limited
419
603
  num_parameters: 6700000000
420
604
  release_date: 2023-04-06
421
- tags: [] # TODO: add tags
605
+ tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
422
606
 
423
607
  - name: cerebras/cerebras-gpt-13b # NOT SUPPORTED
424
608
  display_name: Cerebras GPT (13B)
@@ -427,7 +611,7 @@ models:
427
611
  access: limited
428
612
  num_parameters: 13000000000
429
613
  release_date: 2023-04-06
430
- tags: [] # TODO: add tags
614
+ tags: [DEPRECATED_MODEL_TAG] # TODO: add tags
431
615
 
432
616
 
433
617
 
@@ -644,7 +828,7 @@ models:
644
828
  access: closed
645
829
  num_parameters: 280000000000
646
830
  release_date: 2021-12-08
647
- tags: [] # TODO: add tags
831
+ tags: [UNSUPPORTED_MODEL_TAG]
648
832
 
649
833
  - name: deepmind/chinchilla # NOT SUPPORTED
650
834
  display_name: Chinchilla (70B)
@@ -653,7 +837,7 @@ models:
653
837
  access: closed
654
838
  num_parameters: 70000000000
655
839
  release_date: 2022-03-31
656
- tags: [] # TODO: add tags
840
+ tags: [UNSUPPORTED_MODEL_TAG]
657
841
 
658
842
 
659
843
  # Deepseek
@@ -666,7 +850,36 @@ models:
666
850
  release_date: 2024-01-05
667
851
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
668
852
 
853
+ - name: deepseek-ai/deepseek-v3
854
+ display_name: DeepSeek v3
855
+ description: DeepSeek v3 a Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. It adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures. ([paper](https://github.com/deepseek-ai/DeepSeek-V3/blob/main/DeepSeek_V3.pdf))
856
+ creator_organization_name: DeepSeek
857
+ access: open
858
+ # NOTE: The total size of DeepSeek-V3 models on HuggingFace is 685B, which includes 671B of the Main Model weights and 14B of the Multi-Token Prediction (MTP) Module weights.
859
+ num_parameters: 685000000000
860
+ release_date: 2024-12-24
861
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
862
+
863
+ - name: deepseek-ai/deepseek-r1
864
+ display_name: DeepSeek R1
865
+ description: DeepSeek R1 is DeepSeek's first-generation reasoning model which incoporates which incorporates multi-stage training and cold-start data before RL. ([paper](https://arxiv.org/abs/2501.12948))
866
+ creator_organization_name: DeepSeek
867
+ access: open
868
+ # NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
869
+ num_parameters: 685000000000
870
+ release_date: 2025-01-20
871
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
669
872
 
873
+ - name: deepseek-ai/deepseek-r1-hide-reasoning
874
+ display_name: DeepSeek R1 (hide reasoning)
875
+ description: DeepSeek R1 is DeepSeek's first-generation reasoning model which incoporates which incorporates multi-stage training and cold-start data before RL. ([paper](https://arxiv.org/abs/2501.12948)) The reasoning tokens are hidden from the output of the model.
876
+ creator_organization_name: DeepSeek
877
+ access: open
878
+ # NOTE: The total size of DeepSeek-R3 model1 on HuggingFace is 685B
879
+ num_parameters: 685000000000
880
+ release_date: 2025-01-20
881
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
882
+
670
883
  # EleutherAI
671
884
  - name: eleutherai/gpt-j-6b # Served by GooseAi, HuggingFace and Together.
672
885
  display_name: GPT-J (6B)
@@ -769,7 +982,7 @@ models:
769
982
  access: closed
770
983
  num_parameters: 540000000000
771
984
  release_date: 2023-03-01 # was first announced on 2022-04 but remained private.
772
- tags: [] # TODO: add tags
985
+ tags: [UNSUPPORTED_MODEL_TAG]
773
986
 
774
987
  # Note: This is aliased to a snapshot of gemini-pro. When possible, please use a versioned snapshot instead.
775
988
  - name: google/gemini-pro
@@ -819,7 +1032,7 @@ models:
819
1032
  creator_organization_name: Google
820
1033
  access: limited
821
1034
  release_date: 2024-05-24
822
- tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1035
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
823
1036
 
824
1037
  - name: google/gemini-1.5-flash-001
825
1038
  display_name: Gemini 1.5 Flash (001)
@@ -827,7 +1040,7 @@ models:
827
1040
  creator_organization_name: Google
828
1041
  access: limited
829
1042
  release_date: 2024-05-24
830
- tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1043
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
831
1044
 
832
1045
  - name: google/gemini-1.5-pro-preview-0409
833
1046
  display_name: Gemini 1.5 Pro (0409 preview)
@@ -885,6 +1098,70 @@ models:
885
1098
  release_date: 2024-05-24
886
1099
  tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
887
1100
 
1101
+ - name: google/gemini-1.5-pro-002
1102
+ display_name: Gemini 1.5 Pro (002)
1103
+ description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
1104
+ creator_organization_name: Google
1105
+ access: limited
1106
+ release_date: 2024-09-24
1107
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1108
+
1109
+ - name: google/gemini-1.5-flash-002
1110
+ display_name: Gemini 1.5 Flash (002)
1111
+ description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
1112
+ creator_organization_name: Google
1113
+ access: limited
1114
+ release_date: 2024-09-24
1115
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1116
+
1117
+ - name: google/gemini-2.0-flash-exp
1118
+ display_name: Gemini 2.0 Flash (Experimental)
1119
+ description: Gemini 2.0 Flash (Experimental) is a Gemini model that supports multimodal inputs like images, video and audio, as well as multimodal output like natively generated images mixed with text and steerable text-to-speech (TTS) multilingual audio. ([blog](https://blog.google/technology/google-deepmind/google-gemini-ai-update-december-2024/#gemini-2-0-flash))
1120
+ creator_organization_name: Google
1121
+ access: limited
1122
+ release_date: 2024-12-11
1123
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1124
+
1125
+ - name: google/gemini-1.5-flash-8b-001
1126
+ display_name: Gemini 1.5 Flash 8B
1127
+ description: Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks. ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1128
+ creator_organization_name: Google
1129
+ access: limited
1130
+ release_date: 2024-10-01
1131
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1132
+
1133
+ - name: google/gemini-2.0-flash-001
1134
+ display_name: Gemini 2.0 Flash
1135
+ description: Gemini 2.0 Flash ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1136
+ creator_organization_name: Google
1137
+ access: limited
1138
+ release_date: 2025-02-01
1139
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1140
+
1141
+ - name: google/gemini-2.0-flash-lite-preview-02-05
1142
+ display_name: Gemini 2.0 Flash Lite (02-05 preview)
1143
+ description: Gemini 2.0 Flash Lite (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1144
+ creator_organization_name: Google
1145
+ access: limited
1146
+ release_date: 2025-02-05
1147
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1148
+
1149
+ - name: google/gemini-2.0-flash-thinking-exp-01-21
1150
+ display_name: Gemini 2.0 Flash Thinking (01-21 preview)
1151
+ description: Gemini 2.0 Flash Thinking (01-21 preview) ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/thinking))
1152
+ creator_organization_name: Google
1153
+ access: limited
1154
+ release_date: 2025-01-21
1155
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1156
+
1157
+ - name: google/gemini-2.0-pro-exp-02-05
1158
+ display_name: Gemini 2.0 Pro (02-05 preview)
1159
+ description: Gemini 2.0 Pro (02-05 preview) ([documentation](https://ai.google.dev/gemini-api/docs/models/gemini))
1160
+ creator_organization_name: Google
1161
+ access: limited
1162
+ release_date: 2025-02-05
1163
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, AUDIO_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1164
+
888
1165
  - name: google/gemma-2b
889
1166
  display_name: Gemma (2B)
890
1167
  description: Gemma is a family of lightweight, open models built from the research and technology that Google used to create the Gemini models. ([model card](https://www.kaggle.com/models/google/gemma), [blog post](https://blog.google/technology/developers/gemma-open-models/))
@@ -1304,7 +1581,7 @@ models:
1304
1581
  access: open
1305
1582
  num_parameters: 175000000000
1306
1583
  release_date: 2022-12-22
1307
- tags: [] # TODO: add tags
1584
+ tags: [UNSUPPORTED_MODEL_TAG]
1308
1585
 
1309
1586
  - name: meta/opt-iml-30b # NOT SUPPORTED
1310
1587
  display_name: OPT-IML (30B)
@@ -1313,7 +1590,7 @@ models:
1313
1590
  access: open
1314
1591
  num_parameters: 30000000000
1315
1592
  release_date: 2022-12-22
1316
- tags: [] # TODO: add tags
1593
+ tags: [UNSUPPORTED_MODEL_TAG]
1317
1594
 
1318
1595
  - name: meta/opt-175b
1319
1596
  display_name: OPT (175B)
@@ -1360,7 +1637,7 @@ models:
1360
1637
  access: open
1361
1638
  num_parameters: 120000000000
1362
1639
  release_date: 2022-11-15
1363
- tags: [] # TODO: add tags
1640
+ tags: [UNSUPPORTED_MODEL_TAG]
1364
1641
 
1365
1642
  - name: meta/galactica-30b # NOT SUPPORTED
1366
1643
  display_name: Galactica (30B)
@@ -1369,7 +1646,7 @@ models:
1369
1646
  access: open
1370
1647
  num_parameters: 30000000000
1371
1648
  release_date: 2022-11-15
1372
- tags: [] # TODO: add tags
1649
+ tags: [UNSUPPORTED_MODEL_TAG]
1373
1650
 
1374
1651
  - name: meta/llama-7b
1375
1652
  display_name: LLaMA (7B)
@@ -1490,6 +1767,33 @@ models:
1490
1767
  release_date: 2024-07-18
1491
1768
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1492
1769
 
1770
+ - name: meta/llama-3.1-8b-instruct
1771
+ display_name: Llama 3.1 Instruct (8B)
1772
+ description: Llama 3.1 (8B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
1773
+ creator_organization_name: Meta
1774
+ access: open
1775
+ num_parameters: 8000000000
1776
+ release_date: 2024-07-23
1777
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1778
+
1779
+ - name: meta/llama-3.1-70b-instruct
1780
+ display_name: Llama 3.1 Instruct (70B)
1781
+ description: Llama 3.1 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
1782
+ creator_organization_name: Meta
1783
+ access: open
1784
+ num_parameters: 70000000000
1785
+ release_date: 2024-07-23
1786
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1787
+
1788
+ - name: meta/llama-3.1-405b-instruct
1789
+ display_name: Llama 3.1 Instruct (405B)
1790
+ description: Llama 3.1 (405B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
1791
+ creator_organization_name: Meta
1792
+ access: open
1793
+ num_parameters: 405000000000
1794
+ release_date: 2024-07-23
1795
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1796
+
1493
1797
  - name: meta/llama-3.1-8b-instruct-turbo
1494
1798
  display_name: Llama 3.1 Instruct Turbo (8B)
1495
1799
  description: Llama 3.1 (8B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/), [blog](https://ai.meta.com/blog/meta-llama-3-1/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
@@ -1517,6 +1821,15 @@ models:
1517
1821
  release_date: 2024-07-23
1518
1822
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1519
1823
 
1824
+ - name: meta/llama-3.2-1b-instruct
1825
+ display_name: Llama 3.2 Instruct (1.23B)
1826
+ description: The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned text-only generative models in 1B and 3B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/))
1827
+ creator_organization_name: Meta
1828
+ access: open
1829
+ num_parameters: 1230000000
1830
+ release_date: 2024-09-25
1831
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1832
+
1520
1833
  - name: meta/llama-3.2-3b-instruct-turbo
1521
1834
  display_name: Llama 3.2 Instruct Turbo (3B)
1522
1835
  description: The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned text-only generative models in 1B and 3B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
@@ -1533,7 +1846,7 @@ models:
1533
1846
  access: open
1534
1847
  num_parameters: 10700000000
1535
1848
  release_date: 2024-09-25
1536
- tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG. LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1849
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1537
1850
 
1538
1851
  - name: meta/llama-3.2-90b-vision-instruct-turbo
1539
1852
  display_name: Llama 3.2 Vision Instruct Turbo (90B)
@@ -1542,7 +1855,25 @@ models:
1542
1855
  access: open
1543
1856
  num_parameters: 88600000000
1544
1857
  release_date: 2024-09-25
1545
- tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG. LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1858
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1859
+
1860
+ - name: meta/llama-3.3-70b-instruct-turbo
1861
+ display_name: Llama 3.3 Instruct Turbo (70B)
1862
+ description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
1863
+ creator_organization_name: Meta
1864
+ access: open
1865
+ num_parameters: 70000000000
1866
+ release_date: 2024-12-06
1867
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1868
+
1869
+ - name: meta/llama-3.3-70b-instruct
1870
+ display_name: Llama 3.3 Instruct (70B)
1871
+ description: Llama 3.3 (70B) is part of the Llama 3 family of dense Transformer models that that natively support multilinguality, coding, reasoning, and tool usage. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/))
1872
+ creator_organization_name: Meta
1873
+ access: open
1874
+ num_parameters: 70000000000
1875
+ release_date: 2024-12-06
1876
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1546
1877
 
1547
1878
  - name: meta/llama-3-8b-chat
1548
1879
  display_name: Llama 3 Instruct (8B)
@@ -1698,6 +2029,24 @@ models:
1698
2029
  num_parameters: 14000000000
1699
2030
  release_date: 2024-05-21
1700
2031
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2032
+
2033
+ - name: microsoft/phi-3.5-mini-instruct
2034
+ display_name: Phi-3.5-mini-instruct (3.8B)
2035
+ description: Phi-3.5-mini is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available websites. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/discover-the-new-multi-lingual-high-quality-phi-3-5-slms/4225280))
2036
+ creator_organization_name: Microsoft
2037
+ access: open
2038
+ num_parameters: 3800000000
2039
+ release_date: 2024-08-22
2040
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2041
+
2042
+ - name: microsoft/phi-3.5-moe-instruct
2043
+ display_name: Phi-3.5 MoE
2044
+ description: Phi-3.5 MoE is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available documents - with a focus on very high-quality, reasoning dense data. ([paper](https://arxiv.org/abs/2404.14219), [blog](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/discover-the-new-multi-lingual-high-quality-phi-3-5-slms/4225280))
2045
+ creator_organization_name: Microsoft
2046
+ access: open
2047
+ num_parameters: 41900000000
2048
+ release_date: 2024-08-22
2049
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1701
2050
 
1702
2051
  # KAIST AI
1703
2052
  - name: kaistai/prometheus-vision-13b-v1.0-hf
@@ -1837,6 +2186,15 @@ models:
1837
2186
  num_parameters: 7300000000
1838
2187
  release_date: 2024-05-22
1839
2188
  tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2189
+
2190
+ - name: mistralai/mistral-7b-instruct-v0.3-hf
2191
+ display_name: Mistral Instruct v0.3 (7B)
2192
+ description: Mistral v0.3 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
2193
+ creator_organization_name: Mistral AI
2194
+ access: open
2195
+ num_parameters: 7300000000
2196
+ release_date: 2024-05-22
2197
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1840
2198
 
1841
2199
  - name: mistralai/mixtral-8x7b-32kseqlen
1842
2200
  display_name: Mixtral (8x7B 32K seqlen)
@@ -1884,6 +2242,22 @@ models:
1884
2242
  release_date: 2023-10-16
1885
2243
  tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
1886
2244
 
2245
+ - name: mistralai/ministral-3b-2410
2246
+ display_name: Ministral 3B (2402)
2247
+ description: Ministral 3B (2402) is a model for on-device computing and at-the-edge use cases ([blog](https://mistral.ai/news/ministraux/)).
2248
+ creator_organization_name: Mistral AI
2249
+ access: limited
2250
+ release_date: 2024-10-16
2251
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2252
+
2253
+ - name: mistralai/ministral-8b-2410
2254
+ display_name: Ministral 8B (2402)
2255
+ description: Ministral 8B (2402) is a model for on-device computing and at-the-edge use cases a special interleaved sliding-window attention pattern for faster and memory-efficient inference ([blog](https://mistral.ai/news/ministraux/)).
2256
+ creator_organization_name: Mistral AI
2257
+ access: open
2258
+ release_date: 2024-10-16
2259
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2260
+
1887
2261
  - name: mistralai/mistral-small-2402
1888
2262
  display_name: Mistral Small (2402)
1889
2263
  description: Mistral Small is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
@@ -1892,6 +2266,32 @@ models:
1892
2266
  release_date: 2023-02-26
1893
2267
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1894
2268
 
2269
+ - name: mistralai/mistral-small-2409
2270
+ display_name: Mistral Small (2409)
2271
+ description: Mistral Small is a multilingual model with a 32K tokens context window and function-calling capabilities. ([blog](https://mistral.ai/news/mistral-large/))
2272
+ creator_organization_name: Mistral AI
2273
+ access: limited
2274
+ release_date: 2024-09-18
2275
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2276
+
2277
+ - name: mistralai/mistral-small-2501
2278
+ display_name: Mistral Small 3 (2501)
2279
+ description: Mistral Small 3 (2501) is a pre-trained and instructed model catered to the '80%' of generative AI tasks—those that require robust language and instruction following performance, with very low latency. ([blog](https://mistral.ai/news/mistral-small-3/))
2280
+ creator_organization_name: Mistral AI
2281
+ access: open
2282
+ num_parameters: 23600000000
2283
+ release_date: 2025-01-30
2284
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2285
+
2286
+ - name: mistralai/mistral-small-2503
2287
+ display_name: Mistral Small 3.1 (2503)
2288
+ description: Mistral Small 3.1 (2503) is a model with improved text performance, multimodal understanding, and an expanded context window of up to 128k tokens. ([blog](https://mistral.ai/news/mistral-small-3-1))
2289
+ creator_organization_name: Mistral AI
2290
+ access: open
2291
+ num_parameters: 23600000000
2292
+ release_date: 2025-03-17
2293
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2294
+
1895
2295
  - name: mistralai/mistral-medium-2312
1896
2296
  display_name: Mistral Medium (2312)
1897
2297
  description: Mistral is a transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA).
@@ -1917,6 +2317,15 @@ models:
1917
2317
  release_date: 2023-07-24
1918
2318
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1919
2319
 
2320
+ - name: mistralai/mistral-large-2411
2321
+ display_name: Mistral Large (2411)
2322
+ description: Mistral Large (2411) is a 123B parameter model that has a 128k context window. ([blog](https://mistral.ai/news/pixtral-large/))
2323
+ creator_organization_name: Mistral AI
2324
+ access: open
2325
+ num_parameters: 123000000000
2326
+ release_date: 2024-11-18
2327
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2328
+
1920
2329
  - name: mistralai/open-mistral-nemo-2407
1921
2330
  display_name: Mistral NeMo (2402)
1922
2331
  description: Mistral NeMo is a multilingual 12B model with a large context window of 128K tokens. ([blog](https://mistral.ai/news/mistral-nemo/))
@@ -1925,6 +2334,24 @@ models:
1925
2334
  release_date: 2024-07-18
1926
2335
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1927
2336
 
2337
+ - name: mistralai/pixtral-12b-2409
2338
+ display_name: Mistral Pixtral (2409)
2339
+ description: Mistral Pixtral 12B is the first multimodal Mistral model for image understanding. ([blog](https://mistral.ai/news/pixtral-12b/))
2340
+ creator_organization_name: Mistral AI
2341
+ access: open
2342
+ release_date: 2024-09-17
2343
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2344
+
2345
+ - name: mistralai/pixtral-large-2411
2346
+ display_name: Mistral Pixtral Large (2411)
2347
+ description: Mistral Pixtral Large is a 124B open-weights multimodal model built on top of Mistral Large 2 (2407). ([blog](https://mistral.ai/news/pixtral-large/))
2348
+ creator_organization_name: Mistral AI
2349
+ access: open
2350
+ num_parameters: 124000000000
2351
+ release_date: 2024-11-18
2352
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2353
+
2354
+
1928
2355
  # MosaicML
1929
2356
  - name: mosaicml/mpt-7b
1930
2357
  display_name: MPT (7B)
@@ -1942,7 +2369,7 @@ models:
1942
2369
  access: open
1943
2370
  num_parameters: 6700000000
1944
2371
  release_date: 2023-05-05
1945
- tags: [] # TODO: add tags
2372
+ tags: [UNSUPPORTED_MODEL_TAG]
1946
2373
 
1947
2374
  - name: mosaicml/mpt-instruct-7b
1948
2375
  display_name: MPT-Instruct (7B)
@@ -1969,7 +2396,7 @@ models:
1969
2396
  access: open
1970
2397
  num_parameters: 30000000000
1971
2398
  release_date: 2023-06-22
1972
- tags: [] # TODO: add tags
2399
+ tags: [UNSUPPORTED_MODEL_TAG]
1973
2400
 
1974
2401
  - name: mosaicml/mpt-instruct-30b
1975
2402
  display_name: MPT-Instruct (30B)
@@ -1981,6 +2408,27 @@ models:
1981
2408
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1982
2409
 
1983
2410
 
2411
+
2412
+ # NECTEC
2413
+ - name: nectec/Pathumma-llm-text-1.0.0
2414
+ display_name: Pathumma-llm-text-1.0.0 (7B)
2415
+ description: Pathumma-llm-text-1.0.0 (7B) is a instruction model from OpenThaiLLM-Prebuilt-7B ([blog](https://medium.com/nectec/pathummallm-v-1-0-0-release-6a098ddfe276))
2416
+ creator_organization_name: nectec
2417
+ access: open
2418
+ num_parameters: 7620000000
2419
+ release_date: 2024-10-28
2420
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2421
+
2422
+ - name: nectec/OpenThaiLLM-Prebuilt-7B
2423
+ display_name: OpenThaiLLM-Prebuilt-7B (7B)
2424
+ description: OpenThaiLLM-Prebuilt-7B (7B) is a pretrained Thai large language model with 7 billion parameters based on Qwen2.5-7B.
2425
+ creator_organization_name: nectec
2426
+ access: open
2427
+ num_parameters: 7620000000
2428
+ release_date: 2024-10-28
2429
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
2430
+
2431
+
1984
2432
 
1985
2433
  # Neurips
1986
2434
  - name: neurips/local
@@ -2010,6 +2458,16 @@ models:
2010
2458
  release_date: 2024-06-17
2011
2459
  tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2012
2460
 
2461
+ - name: nvidia/llama-3.1-nemotron-70b-instruct
2462
+ display_name: Llama 3.1 Nemotron Instruct (70B)
2463
+ description: Llama-3.1-Nemotron-70B-Instruct is a large language model customized by NVIDIA to improve the helpfulness of LLM generated responses to user queries. It was trained using RLHF (specifically, REINFORCE), Llama-3.1-Nemotron-70B-Reward and HelpSteer2-Preference prompts on a Llama-3.1-70B-Instruct model. ([paper](https://arxiv.org/abs/2410.01257))
2464
+ creator_organization_name: NVIDIA
2465
+ access: open
2466
+ num_parameters: 70000000000
2467
+ release_date: 2024-10-02
2468
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2469
+
2470
+
2013
2471
  # OpenAI
2014
2472
 
2015
2473
  ## GPT 2 Models
@@ -2194,7 +2652,7 @@ models:
2194
2652
  tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2195
2653
 
2196
2654
 
2197
- ## GPT 4 Models
2655
+ ## GPT-4 and GPT-4 Turbo
2198
2656
 
2199
2657
  - name: openai/gpt-4-1106-preview
2200
2658
  display_name: GPT-4 Turbo (1106 preview)
@@ -2246,6 +2704,8 @@ models:
2246
2704
  release_date: 2024-01-25
2247
2705
  tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2248
2706
 
2707
+ ## GPT-4o
2708
+
2249
2709
  - name: openai/gpt-4-turbo-2024-04-09
2250
2710
  display_name: GPT-4 Turbo (2024-04-09)
2251
2711
  description: GPT-4 Turbo (2024-04-09) is a large multimodal model that is optimized for chat but works well for traditional completions tasks. The model is cheaper and faster than the original GPT-4 model. Snapshot from 2024-04-09.
@@ -2270,6 +2730,14 @@ models:
2270
2730
  release_date: 2024-08-06
2271
2731
  tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2272
2732
 
2733
+ - name: openai/gpt-4o-2024-11-20
2734
+ display_name: GPT-4o (2024-11-20)
2735
+ description: GPT-4o (2024-11-20) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs. ([blog](https://openai.com/index/introducing-structured-outputs-in-the-api/))
2736
+ creator_organization_name: OpenAI
2737
+ access: limited
2738
+ release_date: 2024-11-20
2739
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2740
+
2273
2741
  - name: openai/gpt-4o-mini-2024-07-18
2274
2742
  display_name: GPT-4o mini (2024-07-18)
2275
2743
  description: GPT-4o mini (2024-07-18) is a multimodal model with a context window of 128K tokens and improved handling of non-English text. ([blog](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/))
@@ -2278,6 +2746,40 @@ models:
2278
2746
  release_date: 2024-07-18
2279
2747
  tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2280
2748
 
2749
+ - name: openai/whisper-1_gpt-4o-2024-11-20
2750
+ display_name: Whisper-1 + GPT-4o (2024-11-20)
2751
+ description: Transcribes the text with Whisper-1 and then uses GPT-4o to generate a response.
2752
+ creator_organization_name: OpenAI
2753
+ access: limited
2754
+ release_date: 2024-11-20
2755
+ tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG]
2756
+
2757
+ - name: openai/gpt-4o-audio-preview-2024-10-01
2758
+ display_name: GPT-4o Audio (Preview 2024-10-01)
2759
+ description: GPT-4o Audio (Preview 2024-10-01) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
2760
+ creator_organization_name: OpenAI
2761
+ access: limited
2762
+ release_date: 2024-10-01
2763
+ tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2764
+
2765
+ - name: openai/gpt-4o-audio-preview-2024-12-17
2766
+ display_name: GPT-4o Audio (Preview 2024-12-17)
2767
+ description: GPT-4o Audio (Preview 2024-12-17) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
2768
+ creator_organization_name: OpenAI
2769
+ access: limited
2770
+ release_date: 2024-12-17
2771
+ tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2772
+
2773
+ - name: openai/gpt-4o-mini-audio-preview-2024-12-17
2774
+ display_name: GPT-4o mini Audio (Preview 2024-12-17)
2775
+ description: GPT-4o mini Audio (Preview 2024-12-17) is a preview model that allows using use audio inputs to prompt the model ([documentation](https://platform.openai.com/docs/guides/audio)).
2776
+ creator_organization_name: OpenAI
2777
+ access: limited
2778
+ release_date: 2024-12-17
2779
+ tags: [AUDIO_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2780
+
2781
+ # GPT-4V
2782
+
2281
2783
  - name: openai/gpt-4-vision-preview
2282
2784
  # According to https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4, this model has pointed gpt-4-1106-vision-preview.
2283
2785
  display_name: GPT-4V (1106 preview)
@@ -2295,7 +2797,40 @@ models:
2295
2797
  release_date: 2023-11-06
2296
2798
  tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
2297
2799
 
2800
+ ## GPT-4.5
2801
+ - name: openai/gpt-4.5-preview-2025-02-27
2802
+ display_name: GPT-4.5 (2025-02-27 preview)
2803
+ description: GPT-4.5 (2025-02-27 preview) is a large multimodal model that is designed to be more general-purpose than OpenAI's STEM-focused reasoning models. It was trained using new supervision techniques combined with traditional methods like supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF). ([blog](https://openai.com/index/introducing-gpt-4-5/), [system card](https://openai.com/index/gpt-4-5-system-card/))
2804
+ creator_organization_name: OpenAI
2805
+ access: limited
2806
+ release_date: 2025-02-27
2807
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2808
+
2298
2809
  ## o1 Models
2810
+ - name: openai/o1-2024-12-17
2811
+ display_name: o1 (2024-12-17)
2812
+ description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
2813
+ creator_organization_name: OpenAI
2814
+ access: limited
2815
+ release_date: 2024-12-17
2816
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2817
+
2818
+ - name: openai/o1-2024-12-17-low-reasoning-effort
2819
+ display_name: o1 (2024-12-17, low reasoning effort)
2820
+ description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to low.
2821
+ creator_organization_name: OpenAI
2822
+ access: limited
2823
+ release_date: 2024-12-17
2824
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2825
+
2826
+ - name: openai/o1-2024-12-17-high-reasoning-effort
2827
+ display_name: o1 (2024-12-17, high reasoning effort)
2828
+ description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to high.
2829
+ creator_organization_name: OpenAI
2830
+ access: limited
2831
+ release_date: 2024-12-17
2832
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2833
+
2299
2834
  - name: openai/o1-preview-2024-09-12
2300
2835
  display_name: o1-preview (2024-09-12)
2301
2836
  description: o1-preview is a language model trained with reinforcement learning to perform complex reasoning that can produce a long internal chain of thought before responding to the user. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
@@ -2312,6 +2847,30 @@ models:
2312
2847
  release_date: 2024-09-12
2313
2848
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2314
2849
 
2850
+ - name: openai/o3-mini-2025-01-31
2851
+ display_name: o3-mini (2025-01-31)
2852
+ description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/))
2853
+ creator_organization_name: OpenAI
2854
+ access: limited
2855
+ release_date: 2025-01-31
2856
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2857
+
2858
+ - name: openai/o3-mini-2025-01-31-low-reasoning-effort
2859
+ display_name: o3-mini (2025-01-31, low reasoning effort)
2860
+ description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/)) The requests' reasoning effort parameter in is set to low.
2861
+ creator_organization_name: OpenAI
2862
+ access: limited
2863
+ release_date: 2025-01-31
2864
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2865
+
2866
+ - name: openai/o3-mini-2025-01-31-high-reasoning-effort
2867
+ display_name: o3-mini (2025-01-31, high reasoning effort)
2868
+ description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/)) The requests' reasoning effort parameter in is set to high.
2869
+ creator_organization_name: OpenAI
2870
+ access: limited
2871
+ release_date: 2025-01-31
2872
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2873
+
2315
2874
  ## Codex Models
2316
2875
  # DEPRECATED: Codex models have been shut down on March 23 2023.
2317
2876
 
@@ -2556,6 +3115,39 @@ models:
2556
3115
  release_date: 2024-06-07
2557
3116
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2558
3117
 
3118
+ - name: qwen/qwen2.5-7b-instruct-turbo
3119
+ display_name: Qwen2.5 Instruct Turbo (7B)
3120
+ description: Qwen2.5 Instruct Turbo (7B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
3121
+ creator_organization_name: Qwen
3122
+ access: open
3123
+ release_date: 2024-09-19
3124
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3125
+
3126
+ - name: qwen/qwen2.5-7b-instruct
3127
+ display_name: Qwen2.5 Instruct (7B)
3128
+ description: Qwen2.5 Instruct (7B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
3129
+ creator_organization_name: Qwen
3130
+ access: open
3131
+ release_date: 2024-09-19
3132
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3133
+
3134
+ - name: qwen/qwen2.5-72b-instruct-turbo
3135
+ display_name: Qwen2.5 Instruct Turbo (72B)
3136
+ description: Qwen2.5 Instruct Turbo (72B) was trained on 18 trillion tokens and supports 29 languages, and shows improvements over Qwen2 in knowledge, coding, mathematics, instruction following, generating long texts, and processing structure data. ([blog](https://qwenlm.github.io/blog/qwen2.5/)) Turbo is Together's cost-efficient implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
3137
+ creator_organization_name: Qwen
3138
+ access: open
3139
+ release_date: 2024-09-19
3140
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3141
+
3142
+ - name: qwen/qwq-32b-preview
3143
+ display_name: QwQ (32B Preview)
3144
+ description: QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities. ([blog post](https://qwenlm.github.io/blog/qwq-32b-preview/)).
3145
+ creator_organization_name: Alibaba Cloud
3146
+ access: open
3147
+ num_parameters: 32800000000
3148
+ release_date: 2024-11-28
3149
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3150
+
2559
3151
  - name: qwen/qwen-vl
2560
3152
  display_name: Qwen-VL
2561
3153
  description: Visual multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2308.12966)).
@@ -2572,6 +3164,38 @@ models:
2572
3164
  release_date: 2023-08-24
2573
3165
  tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
2574
3166
 
3167
+ - name: qwen/qwen2-vl-7b-instruct
3168
+ display_name: Qwen2-VL Instruct (7B)
3169
+ description: The second generation of Qwen2-VL models ([paper](https://arxiv.org/abs/2409.12191)).
3170
+ creator_organization_name: Alibaba Group
3171
+ access: open
3172
+ release_date: 2024-08-29
3173
+ tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
3174
+
3175
+ - name: qwen/qwen2-vl-72b-instruct
3176
+ display_name: Qwen2-VL Instruct (72B)
3177
+ description: The second generation of Qwen2-VL models ([paper](https://arxiv.org/abs/2409.12191)).
3178
+ creator_organization_name: Alibaba Group
3179
+ access: open
3180
+ release_date: 2024-08-29
3181
+ tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
3182
+
3183
+ - name: qwen/qwen-audio-chat
3184
+ display_name: Qwen-Audio Chat
3185
+ description: Auditory multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2311.07919)).
3186
+ creator_organization_name: Alibaba Cloud
3187
+ access: open
3188
+ release_date: 2023-11-14
3189
+ tags: [AUDIO_LANGUAGE_MODEL_TAG]
3190
+
3191
+ - name: qwen/qwen2-audio-7b-instruct
3192
+ display_name: Qwen2-Audio Instruct (7B)
3193
+ description: The second version of auditory multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2407.10759)).
3194
+ creator_organization_name: Alibaba Cloud
3195
+ access: open
3196
+ release_date: 2024-07-15
3197
+ tags: [AUDIO_LANGUAGE_MODEL_TAG]
3198
+
2575
3199
  # SAIL (Sea AI Lab)
2576
3200
  - name: sail/sailor-7b
2577
3201
  display_name: Sailor (7B)
@@ -2617,7 +3241,7 @@ models:
2617
3241
  access: open
2618
3242
  num_parameters: 16000000000
2619
3243
  release_date: 2022-03-25
2620
- tags: [] # TODO: add tags
3244
+ tags: [UNSUPPORTED_MODEL_TAG]
2621
3245
 
2622
3246
  # SambaNova
2623
3247
  - name: sambanova/sambalingo-thai-base
@@ -2769,8 +3393,6 @@ models:
2769
3393
  release_date: 2023-04-20
2770
3394
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
2771
3395
 
2772
-
2773
-
2774
3396
  # Stanford
2775
3397
  - name: stanford/alpaca-7b
2776
3398
  display_name: Alpaca (7B)
@@ -2866,7 +3488,7 @@ models:
2866
3488
  access: open
2867
3489
  num_parameters: 3000000000
2868
3490
  release_date: 2023-05-05
2869
- tafs: [] # TODO: add tags
3491
+ tags: [UNSUPPORTED_MODEL_TAG]
2870
3492
 
2871
3493
  - name: together/redpajama-incite-base-7b
2872
3494
  display_name: RedPajama-INCITE-Base (7B)
@@ -2917,9 +3539,27 @@ models:
2917
3539
  access: open
2918
3540
  num_parameters: 13000000000
2919
3541
  release_date: 2022-09-19
2920
- tags: [] # TODO: add tags
3542
+ tags: [UNSUPPORTED_MODEL_TAG]
2921
3543
 
3544
+ # Upstage
3545
+ - name: upstage/solar-pro-preview-instruct
3546
+ display_name: Solar Pro Preview (22B)
3547
+ description: Solar Pro Preview (22B) is open-weights model for single GPU inference that is a preview of the upcoming Solar Pro model ([blog](https://www.upstage.ai/products/solar-pro-preview)).
3548
+ creator_organization_name: Upstage
3549
+ access: open
3550
+ num_parameters: 22000000000
3551
+ release_date: 2024-09-11
3552
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2922
3553
 
3554
+ - name: upstage/solar-pro-241126
3555
+ display_name: Solar Pro
3556
+ display_name: Solar Pro
3557
+ description: Solar Pro is a LLM designed for instruction-following and processing structured formats like HTML and Markdown. It supports English, Korean, and Japanese and has domain expertise in Finance, Healthcare, and Legal. ([blog](https://www.upstage.ai/blog/press/solar-pro-aws)).
3558
+ creator_organization_name: Upstage
3559
+ access: limited
3560
+ num_parameters: 22000000000
3561
+ release_date: 2024-11-26
3562
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2923
3563
 
2924
3564
  # Writer
2925
3565
  - name: writer/palmyra-base
@@ -3057,6 +3697,24 @@ models:
3057
3697
  release_date: 2024-07-31
3058
3698
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3059
3699
 
3700
+ - name: writer/palmyra-fin
3701
+ display_name: Palmyra Fin
3702
+ description: Palmyra Fin is a financial LLM built using combining a well-curated set of financial training data with custom fine-tuning instruction data([blog](https://writer.com/blog/palmyra-med-fin-models/)).
3703
+ creator_organization_name: Writer
3704
+ access: limited
3705
+ release_date: 2024-07-31
3706
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3707
+
3708
+ # xAI
3709
+
3710
+ - name: xai/grok-beta
3711
+ display_name: Grok Beta
3712
+ description: Grok Beta is a model from xAI.
3713
+ creator_organization_name: xAI
3714
+ access: closed
3715
+ release_date: 2024-08-13
3716
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3717
+
3060
3718
  # Yandex
3061
3719
  - name: yandex/yalm
3062
3720
  display_name: YaLM (100B)
@@ -3128,3 +3786,286 @@ models:
3128
3786
  release_date: 2024-04-18
3129
3787
  tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
3130
3788
 
3789
+ # Diva Llama
3790
+ - name: stanford/diva-llama
3791
+ display_name: Diva Llama 3 (8B)
3792
+ description: Diva Llama 3 is an end-to-end Voice Assistant Model which can handle speech and text as inputs. It was trained using distillation loss. ([paper](https://arxiv.org/abs/2410.02678))
3793
+ creator_organization_name: Stanford
3794
+ access: open
3795
+ num_parameters: 8000000000
3796
+ release_date: 2024-10-03
3797
+ tags: [AUDIO_LANGUAGE_MODEL_TAG]
3798
+
3799
+
3800
+ # LLaMA-Omni
3801
+ - name: ictnlp/llama-3.1-8b-omni
3802
+ display_name: LLaMA-Omni (8B)
3803
+ description: The audio-visual multimodal version of the LLaMA 3.1 model ([paper](https://arxiv.org/abs/2409.06666)).
3804
+ creator_organization_name: ICTNLP
3805
+ access: open
3806
+ num_parameters: 8000000000
3807
+ release_date: 2024-09-10
3808
+ tags: [AUDIO_LANGUAGE_MODEL_TAG]
3809
+
3810
+ # Granite - IBM
3811
+ # https://www.ibm.com/granite
3812
+ # https://github.com/ibm-granite/granite-3.0-language-models
3813
+
3814
+ - name: ibm-granite/granite-3.0-2b-base
3815
+ display_name: Granite 3.0 base (2B)
3816
+ description: Granite-3.0-2B-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
3817
+ creator_organization_name: IBM
3818
+ access: open
3819
+ num_parameters: 2530000000
3820
+ release: 2024-10-21
3821
+ tags: [TEXT_MODEL_TAG]
3822
+
3823
+ - name: ibm-granite/granite-3.0-2b-instruct
3824
+ display_name: Granite 3.0 Instruct (2B)
3825
+ description: Granite-3.0-2B-Instruct is a 2B parameter model finetuned from Granite-3.0-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
3826
+ creator_organization_name: IBM
3827
+ access: open
3828
+ num_parameters: 2630000000
3829
+ release: 2024-10-21
3830
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3831
+
3832
+ - name: ibm-granite/granite-3.0-8b-instruct
3833
+ display_name: Granite 3.0 instruct (8B)
3834
+ description: Granite-3.0-8B-Instruct is a 8B parameter model finetuned from Granite-3.0-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
3835
+ creator_organization_name: IBM
3836
+ access: open
3837
+ num_parameters: 8170000000
3838
+ release: 2024-10-21
3839
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3840
+
3841
+ - name: ibm-granite/granite-3.0-8b-base
3842
+ display_name: Granite 3.0 base (8B)
3843
+ description: Granite-3.0-8B-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
3844
+ creator_organization_name: IBM
3845
+ access: open
3846
+ num_parameters: 8170000000
3847
+ release: 2024-10-21
3848
+ tags: [TEXT_MODEL_TAG]
3849
+
3850
+ - name: ibm-granite/granite-3.0-3b-a800m-instruct
3851
+ display_name: Granite 3.0 A800M instruct (3B)
3852
+ description: Granite-3.0-3B-A800M-Instruct is a 3B parameter model finetuned from Granite-3.0-3B-A800M-Base-4K using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
3853
+ creator_organization_name: IBM
3854
+ access: open
3855
+ num_parameters: 3370000000
3856
+ release: 2024-10-21
3857
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3858
+
3859
+ - name: ibm-granite/granite-3.0-3b-a800m-base
3860
+ display_name: Granite 3.0 A800M base (3B)
3861
+ description: Granite-3.0-3B-A800M-Base is a decoder-only language model to support a variety of text-to-text generation tasks.
3862
+ creator_organization_name: IBM
3863
+ access: open
3864
+ num_parameters: 3370000000
3865
+ release: 2024-10-21
3866
+ tags: [TEXT_MODEL_TAG]
3867
+
3868
+ - name: ibm-granite/granite-3.0-1b-a400m-instruct
3869
+ display_name: Granite 3.0 A400M instruct (1B)
3870
+ description: Granite-3.0-1B-A400M-Instruct is an 1B parameter model finetuned from Granite-3.0-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets.
3871
+ creator_organization_name: IBM
3872
+ access: open
3873
+ num_parameters: 1330000000
3874
+ release: 2024-10-21
3875
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3876
+
3877
+ - name: ibm-granite/granite-3.0-1b-a400m-base
3878
+ display_name: Granite 3.0 A400M base (1B)
3879
+ description: Granite-3.0-1B-A400M-Base is a decoder-only language model to support a variety of text-to-text generation tasks. It is trained from scratch following a two-stage training strategy.
3880
+ creator_organization_name: IBM
3881
+ access: open
3882
+ num_parameters: 1380000000
3883
+ release: 2024-10-21
3884
+ tags: [TEXT_MODEL_TAG]
3885
+
3886
+ - name: maritaca-ai/sabia-7b
3887
+ display_name: Sabia 7B
3888
+ description: Sabia 7B
3889
+ creator_organization_name: MARITACA-AI
3890
+ access: open
3891
+ num_parameters: 6740000000
3892
+ release_date: 2023-11-08
3893
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3894
+
3895
+ # Granite-3.1-8b-base
3896
+ - name: ibm-granite/granite-3.1-8b-base
3897
+ display_name: Granite 3.1 - 8B - Base
3898
+ description: Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
3899
+ creator_organization_name: IBM-GRANITE
3900
+ access: open
3901
+ num_parameters: 8170000000
3902
+ release_date: 2024-12-18
3903
+ tags: [TEXT_MODEL_TAG]
3904
+
3905
+ # Granite-3.1-8b-instruct
3906
+ - name: ibm-granite/granite-3.1-8b-instruct
3907
+ display_name: Granite 3.1 - 8B - Instruct
3908
+ description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
3909
+ creator_organization_name: IBM
3910
+ access: open
3911
+ num_parameters: 8170000000
3912
+ release_date: 2024-12-18
3913
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3914
+
3915
+ # Granite-3.1-2b-instruct
3916
+ - name: ibm-granite/granite-3.1-2b-instruct
3917
+ display_name: Granite 3.1 - 2B - Instruct
3918
+ description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
3919
+ creator_organization_name: IBM
3920
+ access: open
3921
+ num_parameters: 2530000000
3922
+ release_date: 2024-12-18
3923
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3924
+
3925
+ # Granite-3.1-2b-base
3926
+ - name: ibm-granite/granite-3.1-2b-base
3927
+ display_name: Granite 3.1 - 2B - Base
3928
+ description: Granite-3.1-2B-Base extends the context length of Granite-3.0-2B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
3929
+ creator_organization_name: IBM-GRANITE
3930
+ access: open
3931
+ num_parameters: 2530000000
3932
+ release_date: 2024-12-18
3933
+ tags: [TEXT_MODEL_TAG]
3934
+
3935
+ # Granite-3.1-3b-a800m-instruct
3936
+ - name: ibm-granite/granite-3.1-3b-a800m-instruct
3937
+ display_name: Granite 3.1 - 3B - A800M - Instruct
3938
+ description: Granite-3.1-3B-A800M-Instruct is a 3B parameter long-context instruct model finetuned from Granite-3.1-3B-A800M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
3939
+ creator_organization_name: IBM-GRANITE
3940
+ access: open
3941
+ num_parameters: 3300000000
3942
+ release_date: 2024-12-18
3943
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3944
+
3945
+ # Granite-3.1-3b-a800m-base
3946
+ - name: ibm-granite/granite-3.1-3b-a800m-base
3947
+ display_name: Granite 3.1 - 3B - A800M - Base
3948
+ description: Granite-3.1-3B-A800M-Base extends the context length of Granite-3.0-3B-A800M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
3949
+ creator_organization_name: IBM-GRANITE
3950
+ access: open
3951
+ num_parameters: 3300000000
3952
+ release_date: 2024-12-18
3953
+ tags: [TEXT_MODEL_TAG]
3954
+
3955
+ # Granite-3.1-1b-a400m-instruct
3956
+ - name: ibm-granite/granite-3.1-1b-a400m-instruct
3957
+ display_name: Granite 3.1 - 1B - A400M - Instruct
3958
+ description: Granite-3.1-1B-A400M-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-1B-A400M-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
3959
+ creator_organization_name: IBM-GRANITE
3960
+ access: open
3961
+ num_parameters: 1330000000
3962
+ release_date: 2024-12-18
3963
+ tags: [TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3964
+
3965
+ # Granite-3.1-1b-a400m-base
3966
+ - name: ibm-granite/granite-3.1-1b-a400m-base
3967
+ display_name: Granite 3.1 - 1B - A400M - Base
3968
+ description: Granite-3.1-1B-A400M-Base extends the context length of Granite-3.0-1B-A400M-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K.
3969
+ creator_organization_name: IBM-GRANITE
3970
+ access: open
3971
+ num_parameters: 1330000000
3972
+ release_date: 2024-12-18
3973
+ tags: [TEXT_MODEL_TAG]
3974
+
3975
+ # DeepSeek-R1-Distill-Llama-3.1-8b
3976
+ - name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
3977
+ display_name: DeepSeek-R1-Distill-Llama-8b
3978
+ description: DeepSeek-R1-Distill-Llama-8b is a model that is distilled from LLaMA 8B model for the DeepSeek-R1 task.
3979
+ creator_organization_name: DeepSeek
3980
+ access: open
3981
+ num_parameters: 8000000000
3982
+ release_date: 2025-01-20
3983
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3984
+
3985
+ # deepseek-ai/deepseek-coder-6.7b-instruct
3986
+ - name: deepseek-ai/deepseek-coder-6.7b-instruct
3987
+ display_name: DeepSeek-Coder-6.7b-Instruct
3988
+ description: DeepSeek-Coder-6.7b-Instruct is a model that is fine-tuned from the LLaMA 6.7B model for the DeepSeek-Coder task.
3989
+ creator_organization_name: DeepSeek
3990
+ access: open
3991
+ num_parameters: 6740000000
3992
+ release_date: 2025-01-20
3993
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
3994
+
3995
+ # WatsonX - IBM
3996
+ - name: ibm/granite-13b-instruct-v2
3997
+ display_name: Granite 13b instruct v2
3998
+ description: Granite Base (13B) Instruct V2.0 is a large decoder-only transformer model.The following features were used in the design of the model Decoder-only model
3999
+ creator_organization_name: IBM
4000
+ access: limited
4001
+ num_parameters: 13000000000
4002
+ release: 2023-11-30
4003
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4004
+
4005
+ - name: ibm/granite-20b-code-instruct-8k
4006
+ display_name: Granite 20b code instruct (8K)
4007
+ description: Granite-20B-Code-Base-8K is a decoder-only code model designed for code generative tasks (e.g., code generation, code explanation, code fixing, etc.). It is trained from scratch with a two-phase training strategy. In phase 1, our model is trained on 3 trillion tokens sourced from 116 programming languages, ensuring a comprehensive understanding of programming languages and syntax. In phase 2, our model is trained on 500 billion tokens with a carefully designed mixture of high-quality data from code and natural language domains to improve the models’ ability to reason and follow instructions.
4008
+ creator_organization_name: IBM
4009
+ access: limited
4010
+ num_parameters: 20000000000
4011
+ release: 2024-18-4
4012
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4013
+
4014
+ - name: ibm/granite-34b-code-instruct
4015
+ display_name: Granite 34b code instruct
4016
+ description: Granite Base (34B) Code Instruct is a 34B parameter model fine tuned from Granite-34B-Code-Base on a combination of permissively licensed instruction data to enhance instruction following capabilities including logical reasoning and problem-solving skills.
4017
+ creator_organization_name: IBM
4018
+ access: open
4019
+ num_parameters: 34000000000
4020
+ release: 2024-6-5
4021
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4022
+
4023
+
4024
+ - name: ibm/granite-3b-code-instruct
4025
+ display_name: Granite 3b code instruct
4026
+ description: Granite-3B-Code-Instruct-128K is a 3B parameter long-context instruct model fine tuned from Granite-3B-Code-Base-128K on a combination of permissively licensed data used in training the original Granite code instruct models, in addition to synthetically generated code instruction datasets tailored for solving long context problems. By exposing the model to both short and long context data, we aim to enhance its long-context capability without sacrificing code generation performance at short input context.
4027
+ creator_organization_name: IBM
4028
+ access: open
4029
+ num_parameters: 3000000000
4030
+ release: 2024-6-18
4031
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4032
+
4033
+ - name: ibm/granite-8b-code-instruct
4034
+ display_name: Granite 8b code instruct
4035
+ description: Granite-8B-Code-Instruct-128K is a 8B parameter long-context instruct model fine tuned from Granite-8B-Code-Base-128K on a combination of permissively licensed data used in training the original Granite code instruct models, in addition to synthetically generated code instruction datasets tailored for solving long context problems. By exposing the model to both short and long context data, we aim to enhance its long-context capability without sacrificing code generation performance at short input context.
4036
+ creator_organization_name: IBM
4037
+ access: open
4038
+ num_parameters: 8000000000
4039
+ release: 2024-6-18
4040
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4041
+
4042
+
4043
+
4044
+
4045
+
4046
+
4047
+ - name: ibm/granite-3.1-8b-instruct
4048
+ display_name: Granite 3.1 - 8B - Instruct
4049
+ description: Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
4050
+ creator_organization_name: IBM
4051
+ access: open
4052
+ num_parameters: 8170000000
4053
+ release_date: 2024-12-18
4054
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4055
+
4056
+ - name: ibm/granite-3.1-2b-instruct
4057
+ display_name: Granite 3.1 - 2B - Instruct
4058
+ description: Granite-3.1-2B-Instruct is a 2B parameter long-context instruct model finetuned from Granite-3.1-2B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems.
4059
+ creator_organization_name: IBM
4060
+ access: open
4061
+ num_parameters: 2530000000
4062
+ release_date: 2024-12-18
4063
+ tags: [ TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG ]
4064
+
4065
+ - name: mistralai/mixtral-8x7b-instruct-v0:1
4066
+ display_name: Mixtral 8x7B Instruct on IBM WatsonX
4067
+ description: A 7B sparse Mixture-of-Experts model with stronger capabilities than Mistral 7B. Uses 12B active parameters out of 45B total. Supports multiple languages, code and 32k context window.
4068
+ creator_organization_name: Mistral
4069
+ access: limited
4070
+ release_date: 2023-12-11
4071
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]