crfm-helm 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (125) hide show
  1. {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.2.dist-info}/METADATA +19 -5
  2. {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.2.dist-info}/RECORD +121 -76
  3. helm/benchmark/adaptation/adapter_spec.py +32 -31
  4. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
  5. helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py +7 -0
  6. helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +2 -0
  7. helm/benchmark/annotation/air_bench_annotator.py +64 -0
  8. helm/benchmark/annotation/annotator_factory.py +6 -0
  9. helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +1 -1
  10. helm/benchmark/annotation/live_qa_annotator.py +84 -0
  11. helm/benchmark/annotation/medication_qa_annotator.py +81 -0
  12. helm/benchmark/augmentations/perturbation.py +17 -1
  13. helm/benchmark/augmentations/test_perturbation.py +30 -0
  14. helm/benchmark/augmentations/translate_perturbation.py +1 -0
  15. helm/benchmark/huggingface_registration.py +16 -6
  16. helm/benchmark/metrics/air_bench_metrics.py +56 -0
  17. helm/benchmark/metrics/efficiency_metrics.py +9 -2
  18. helm/benchmark/metrics/evaluate_reference_metrics.py +16 -0
  19. helm/benchmark/metrics/fin_qa_metrics.py +60 -0
  20. helm/benchmark/metrics/fin_qa_metrics_helper.py +398 -0
  21. helm/benchmark/metrics/gpt4v_originality_critique_metrics.py +126 -0
  22. helm/benchmark/metrics/instruction_following_critique_metrics.py +1 -0
  23. helm/benchmark/metrics/live_qa_metrics.py +23 -0
  24. helm/benchmark/metrics/medication_qa_metrics.py +23 -0
  25. helm/benchmark/metrics/prometheus_vision_critique_metrics.py +185 -0
  26. helm/benchmark/metrics/reka_vibe_critique_metrics.py +158 -0
  27. helm/benchmark/metrics/unitxt_metrics.py +20 -10
  28. helm/benchmark/metrics/vision_language/emd_utils.py +4 -0
  29. helm/benchmark/metrics/vision_language/image_metrics.py +104 -21
  30. helm/benchmark/model_metadata_registry.py +5 -1
  31. helm/benchmark/presentation/schema.py +54 -4
  32. helm/benchmark/presentation/test_schema.py +11 -0
  33. helm/benchmark/run.py +16 -2
  34. helm/benchmark/run_expander.py +112 -63
  35. helm/benchmark/run_spec_factory.py +15 -10
  36. helm/benchmark/run_specs/air_bench_run_specs.py +40 -0
  37. helm/benchmark/run_specs/classic_run_specs.py +15 -11
  38. helm/benchmark/run_specs/decodingtrust_run_specs.py +3 -1
  39. helm/benchmark/run_specs/experimental_run_specs.py +33 -0
  40. helm/benchmark/run_specs/finance_run_specs.py +33 -0
  41. helm/benchmark/run_specs/vlm_run_specs.py +444 -65
  42. helm/benchmark/scenarios/air_bench_scenario.py +50 -0
  43. helm/benchmark/scenarios/ci_mcqa_scenario.py +80 -0
  44. helm/benchmark/scenarios/entity_data_imputation_scenario.py +8 -2
  45. helm/benchmark/scenarios/fin_qa_scenario.py +117 -0
  46. helm/benchmark/scenarios/legalbench_scenario.py +6 -2
  47. helm/benchmark/scenarios/math_scenario.py +1 -1
  48. helm/benchmark/scenarios/test_air_bench_scenario.py +27 -0
  49. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +83 -0
  50. helm/benchmark/scenarios/vision_language/bingo_scenario.py +3 -3
  51. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +134 -0
  52. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +74 -0
  53. helm/benchmark/scenarios/vision_language/gqa_scenario.py +91 -0
  54. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +4 -2
  55. helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +13 -2
  56. helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +1 -5
  57. helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +1 -5
  58. helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +5 -3
  59. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +117 -0
  60. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +103 -0
  61. helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py +92 -0
  62. helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py +117 -0
  63. helm/benchmark/scenarios/vision_language/originality_scenario.py +35 -0
  64. helm/benchmark/scenarios/vision_language/pairs_scenario.py +247 -0
  65. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +3 -3
  66. helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +95 -0
  67. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +2 -2
  68. helm/benchmark/scenarios/vision_language/vqa_scenario.py +4 -2
  69. helm/benchmark/static/schema_air_bench.yaml +3149 -0
  70. helm/benchmark/static/schema_classic.yaml +3 -59
  71. helm/benchmark/static/schema_finance.yaml +143 -0
  72. helm/benchmark/static/schema_image2structure.yaml +447 -0
  73. helm/benchmark/static/schema_instruction_following.yaml +3 -52
  74. helm/benchmark/static/schema_lite.yaml +3 -61
  75. helm/benchmark/static/schema_medical.yaml +255 -0
  76. helm/benchmark/static/schema_mmlu.yaml +3 -61
  77. helm/benchmark/static/schema_tables.yaml +200 -0
  78. helm/benchmark/static/schema_thai.yaml +223 -0
  79. helm/benchmark/static/schema_unitxt.yaml +3 -61
  80. helm/benchmark/static/schema_vhelm.yaml +824 -0
  81. helm/benchmark/static/schema_vhelm_lite.yaml +109 -0
  82. helm/benchmark/static_build/assets/air-overview-d2e6c49f.png +0 -0
  83. helm/benchmark/static_build/assets/index-30dbceba.js +10 -0
  84. helm/benchmark/static_build/assets/index-66b02d40.css +1 -0
  85. helm/benchmark/static_build/assets/overview-74aea3d8.png +0 -0
  86. helm/benchmark/static_build/assets/process-flow-bd2eba96.png +0 -0
  87. helm/benchmark/static_build/index.html +2 -2
  88. helm/clients/anthropic_client.py +78 -14
  89. helm/clients/auto_client.py +11 -0
  90. helm/clients/client.py +24 -7
  91. helm/clients/cohere_client.py +98 -3
  92. helm/clients/huggingface_client.py +71 -12
  93. helm/clients/openai_client.py +11 -5
  94. helm/clients/reka_client.py +189 -0
  95. helm/clients/test_client.py +3 -3
  96. helm/clients/test_huggingface_client.py +19 -3
  97. helm/clients/test_together_client.py +72 -2
  98. helm/clients/together_client.py +199 -2
  99. helm/clients/vertexai_client.py +117 -64
  100. helm/clients/vision_language/huggingface_vision2seq_client.py +145 -0
  101. helm/clients/vision_language/huggingface_vlm_client.py +12 -4
  102. helm/clients/vision_language/idefics_client.py +2 -2
  103. helm/clients/vision_language/paligemma_client.py +146 -0
  104. helm/clients/vision_language/palmyra_vision_client.py +84 -0
  105. helm/clients/yi_client.py +31 -0
  106. helm/common/critique_request.py +10 -1
  107. helm/common/images_utils.py +29 -3
  108. helm/config/model_deployments.yaml +504 -12
  109. helm/config/model_metadata.yaml +579 -52
  110. helm/config/tokenizer_configs.yaml +100 -1
  111. helm/proxy/critique/model_critique_client.py +32 -4
  112. helm/proxy/services/server_service.py +1 -1
  113. helm/tokenizers/auto_tokenizer.py +1 -1
  114. helm/tokenizers/cohere_tokenizer.py +44 -2
  115. helm/tokenizers/huggingface_tokenizer.py +36 -13
  116. helm/tokenizers/test_cohere_tokenizer.py +39 -0
  117. helm/tokenizers/test_huggingface_tokenizer.py +5 -1
  118. helm/benchmark/static/schema_vlm.yaml +0 -576
  119. helm/benchmark/static_build/assets/index-5088afcb.css +0 -1
  120. helm/benchmark/static_build/assets/index-d839df55.js +0 -9
  121. helm/benchmark/test_model_deployment_definition.py +0 -90
  122. {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.2.dist-info}/LICENSE +0 -0
  123. {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.2.dist-info}/WHEEL +0 -0
  124. {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.2.dist-info}/entry_points.txt +0 -0
  125. {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.2.dist-info}/top_level.txt +0 -0
@@ -100,6 +100,25 @@ models:
100
100
  # - j2-large -> j2-light
101
101
 
102
102
 
103
+ # AI Singapore
104
+ - name: aisingapore/sea-lion-7b
105
+ display_name: SEA-LION (7B)
106
+ description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
107
+ creator_organization_name: AI Singapore
108
+ access: open
109
+ num_parameters: 7000000000
110
+ release_date: 2023-02-24
111
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
112
+
113
+ - name: aisingapore/sea-lion-7b-instruct
114
+ display_name: SEA-LION Instruct (7B)
115
+ description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
116
+ creator_organization_name: AI Singapore
117
+ access: open
118
+ num_parameters: 7000000000
119
+ release_date: 2023-02-24
120
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
121
+
103
122
 
104
123
  # Aleph Alpha
105
124
  # Aleph Alpha's Luminous models: https://docs.aleph-alpha.com/docs/introduction/luminous
@@ -189,7 +208,7 @@ models:
189
208
 
190
209
  # Anthropic
191
210
  - name: anthropic/claude-v1.3
192
- display_name: Anthropic Claude v1.3
211
+ display_name: Claude v1.3
193
212
  description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
194
213
  creator_organization_name: Anthropic
195
214
  access: limited
@@ -198,7 +217,7 @@ models:
198
217
  tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
199
218
 
200
219
  - name: anthropic/claude-instant-v1
201
- display_name: Anthropic Claude Instant V1
220
+ display_name: Claude Instant V1
202
221
  description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)).
203
222
  creator_organization_name: Anthropic
204
223
  access: limited
@@ -206,7 +225,7 @@ models:
206
225
  tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
207
226
 
208
227
  - name: anthropic/claude-instant-1.2
209
- display_name: Anthropic Claude Instant 1.2
228
+ display_name: Claude Instant 1.2
210
229
  description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)).
211
230
  creator_organization_name: Anthropic
212
231
  access: limited
@@ -214,7 +233,7 @@ models:
214
233
  tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
215
234
 
216
235
  - name: anthropic/claude-2.0
217
- display_name: Anthropic Claude 2.0
236
+ display_name: Claude 2.0
218
237
  description: Claude 2.0 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf))
219
238
  creator_organization_name: Anthropic
220
239
  access: limited
@@ -222,7 +241,7 @@ models:
222
241
  tags: [ANTHROPIC_CLAUDE_2_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
223
242
 
224
243
  - name: anthropic/claude-2.1
225
- display_name: Anthropic Claude 2.1
244
+ display_name: Claude 2.1
226
245
  description: Claude 2.1 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf))
227
246
  creator_organization_name: Anthropic
228
247
  access: limited
@@ -231,7 +250,7 @@ models:
231
250
 
232
251
  - name: anthropic/claude-3-haiku-20240307
233
252
  display_name: Claude 3 Haiku (20240307)
234
- description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
253
+ description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
235
254
  creator_organization_name: Anthropic
236
255
  access: limited
237
256
  release_date: 2024-03-13 # https://www.anthropic.com/news/claude-3-haiku
@@ -239,7 +258,7 @@ models:
239
258
 
240
259
  - name: anthropic/claude-3-sonnet-20240229
241
260
  display_name: Claude 3 Sonnet (20240229)
242
- description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
261
+ description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
243
262
  creator_organization_name: Anthropic
244
263
  access: limited
245
264
  release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
@@ -247,9 +266,9 @@ models:
247
266
 
248
267
  - name: anthropic/claude-3-opus-20240229
249
268
  display_name: Claude 3 Opus (20240229)
250
- description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
251
- creator_organization_name: Anthropic
269
+ description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
252
270
  access: limited
271
+ creator_organization_name: Anthropic
253
272
  release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
254
273
  tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
255
274
 
@@ -327,6 +346,18 @@ models:
327
346
  release_date: 2023-05-09 # ArXiv submission date
328
347
  tags: [CODE_MODEL_TAG]
329
348
 
349
+ # BioMistral
350
+
351
+ - name: biomistral/biomistral-7b
352
+ display_name: BioMistral (7B)
353
+ description: BioMistral 7B is an open-source LLM tailored for the biomedical domain, utilizing Mistral as its foundation model and further pre-trained on PubMed Central.
354
+ creator_organization_name: BioMistral
355
+ access: open
356
+ num_parameters: 7300000000
357
+ release_date: 2024-02-15
358
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
359
+
360
+
330
361
 
331
362
 
332
363
  # Cerebras Systems
@@ -418,7 +449,7 @@ models:
418
449
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
419
450
 
420
451
  - name: cohere/command-medium-beta # DEPRECATED
421
- display_name: Cohere Command beta (6.1B)
452
+ display_name: Command beta (6.1B)
422
453
  description: Cohere Command beta (6.1B parameters) is fine-tuned from the medium model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
423
454
  creator_organization_name: Cohere
424
455
  access: limited
@@ -427,7 +458,7 @@ models:
427
458
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
428
459
 
429
460
  - name: cohere/command-xlarge-beta # DEPRECATED
430
- display_name: Cohere Command beta (52.4B)
461
+ display_name: Command beta (52.4B)
431
462
  description: Cohere Command beta (52.4B parameters) is fine-tuned from the XL model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
432
463
  creator_organization_name: Cohere
433
464
  access: limited
@@ -436,7 +467,7 @@ models:
436
467
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
437
468
 
438
469
  - name: cohere/command
439
- display_name: Cohere Command
470
+ display_name: Command
440
471
  description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog)
441
472
  creator_organization_name: Cohere
442
473
  access: limited
@@ -444,12 +475,30 @@ models:
444
475
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
445
476
 
446
477
  - name: cohere/command-light
447
- display_name: Cohere Command Light
478
+ display_name: Command Light
448
479
  description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog)
449
480
  creator_organization_name: Cohere
450
481
  access: limited
451
482
  release_date: 2023-09-29
452
- tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
483
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
484
+
485
+ - name: cohere/command-r
486
+ display_name: Command R
487
+ description: Command R is a multilingual 35B parameter model with a context length of 128K that has been trained with conversational tool use capabilities.
488
+ creator_organization_name: Cohere
489
+ access: open
490
+ num_parameters: 35000000000
491
+ release_date: 2024-03-11
492
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
493
+
494
+ - name: cohere/command-r-plus
495
+ display_name: Command R Plus
496
+ description: Command R+ is a multilingual 104B parameter model with a context length of 128K that has been trained with conversational tool use capabilities.
497
+ creator_organization_name: Cohere
498
+ access: open
499
+ num_parameters: 104000000000
500
+ release_date: 2024-04-04
501
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
453
502
 
454
503
  # Craiyon
455
504
  - name: craiyon/dalle-mini
@@ -534,7 +583,7 @@ models:
534
583
  access: open
535
584
  num_parameters: 132000000000
536
585
  release_date: 2024-03-27
537
- tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
586
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
538
587
 
539
588
 
540
589
  # DeepMind
@@ -559,8 +608,8 @@ models:
559
608
 
560
609
  # Deepseek
561
610
  - name: deepseek-ai/deepseek-llm-67b-chat
562
- display_name: DeepSeek Chat (67B)
563
- description: DeepSeek Chat is a open-source language model trained on 2 trillion tokens in both English and Chinese, and fine-tuned supervised fine-tuning (SFT) and Direct Preference Optimization (DPO). ([paper](https://arxiv.org/abs/2401.02954))
611
+ display_name: DeepSeek LLM Chat (67B)
612
+ description: DeepSeek LLM Chat is a open-source language model trained on 2 trillion tokens in both English and Chinese, and fine-tuned supervised fine-tuning (SFT) and Direct Preference Optimization (DPO). ([paper](https://arxiv.org/abs/2401.02954))
564
613
  creator_organization_name: DeepSeek
565
614
  access: open
566
615
  num_parameters: 67000000000
@@ -624,7 +673,16 @@ models:
624
673
  release_date: 2023-02-13
625
674
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
626
675
 
676
+ # EPFL LLM
627
677
 
678
+ - name: epfl-llm/meditron-7b
679
+ display_name: Meditron (7B)
680
+ description: Meditron-7B is a 7 billion parameter model adapted to the medical domain from Llama-2-7B through continued pretraining on a comprehensively curated medical corpus.
681
+ creator_organization_name: EPFL LLM
682
+ access: open
683
+ num_parameters: 7000000000
684
+ release_date: 2023-11-27
685
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
628
686
 
629
687
  # Google
630
688
  - name: google/t5-11b
@@ -670,15 +728,23 @@ models:
670
728
  creator_organization_name: Google
671
729
  access: limited
672
730
  release_date: 2023-12-13
673
- tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
731
+ tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
674
732
 
675
733
  - name: google/gemini-1.0-pro-001
676
- display_name: Gemini 1.0 Pro
734
+ display_name: Gemini 1.0 Pro (001)
677
735
  description: Gemini 1.0 Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
678
736
  creator_organization_name: Google
679
737
  access: limited
680
738
  release_date: 2023-12-13
681
- tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
739
+ tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
740
+
741
+ - name: google/gemini-1.0-pro-002
742
+ display_name: Gemini 1.0 Pro (002)
743
+ description: Gemini 1.0 Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
744
+ creator_organization_name: Google
745
+ access: limited
746
+ release_date: 2024-04-09
747
+ tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
682
748
 
683
749
  # Note: This is aliased to a snapshot of gemini-pro-vision. When possible, please use a versioned snapshot instead.
684
750
  - name: google/gemini-pro-vision
@@ -695,15 +761,79 @@ models:
695
761
  creator_organization_name: Google
696
762
  access: limited
697
763
  release_date: 2023-12-13
698
- tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
764
+ tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, GOOGLE_GEMINI_PRO_VISION_V1_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
765
+
766
+ - name: google/gemini-1.5-pro-001
767
+ display_name: Gemini 1.5 Pro (001)
768
+ description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
769
+ creator_organization_name: Google
770
+ access: limited
771
+ release_date: 2024-05-24
772
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
773
+
774
+ - name: google/gemini-1.5-flash-001
775
+ display_name: Gemini 1.5 Flash (001)
776
+ description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
777
+ creator_organization_name: Google
778
+ access: limited
779
+ release_date: 2024-05-24
780
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
699
781
 
700
782
  - name: google/gemini-1.5-pro-preview-0409
701
- display_name: Gemini 1.5 Pro
702
- description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. ([paper](https://arxiv.org/abs/2403.05530))
783
+ display_name: Gemini 1.5 Pro (0409 preview)
784
+ description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
703
785
  creator_organization_name: Google
704
786
  access: limited
705
787
  release_date: 2024-04-10
706
- tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
788
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
789
+
790
+ - name: google/gemini-1.5-pro-preview-0514
791
+ display_name: Gemini 1.5 Pro (0514 preview)
792
+ description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
793
+ creator_organization_name: Google
794
+ access: limited
795
+ release_date: 2024-05-14
796
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
797
+
798
+ - name: google/gemini-1.5-flash-preview-0514
799
+ display_name: Gemini 1.5 Flash (0514 preview)
800
+ description: Gemini 1.5 Flash is a smaller Gemini model. It has a 1 million token context window and allows interleaving text, images, audio and video as inputs. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([blog](https://blog.google/technology/developers/gemini-gemma-developer-updates-may-2024/))
801
+ creator_organization_name: Google
802
+ access: limited
803
+ release_date: 2024-05-14
804
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
805
+
806
+ - name: google/gemini-1.5-pro-001-safety-default
807
+ display_name: Gemini 1.5 Pro (001, default safety)
808
+ description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and uses default safety settings. ([paper](https://arxiv.org/abs/2403.05530))
809
+ creator_organization_name: Google
810
+ access: limited
811
+ release_date: 2024-05-24
812
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
813
+
814
+ - name: google/gemini-1.5-pro-001-safety-block-none
815
+ display_name: Gemini 1.5 Pro (001, BLOCK_NONE safety)
816
+ description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
817
+ creator_organization_name: Google
818
+ access: limited
819
+ release_date: 2024-05-24
820
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
821
+
822
+ - name: google/gemini-1.5-flash-001-safety-default
823
+ display_name: Gemini 1.5 Flash (001, default safety)
824
+ description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and uses default safety settings. ([paper](https://arxiv.org/abs/2403.05530))
825
+ creator_organization_name: Google
826
+ access: limited
827
+ release_date: 2024-05-24
828
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
829
+
830
+ - name: google/gemini-1.5-flash-001-safety-block-none
831
+ display_name: Gemini 1.5 Flash (001, BLOCK_NONE safety)
832
+ description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
833
+ creator_organization_name: Google
834
+ access: limited
835
+ release_date: 2024-05-24
836
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
707
837
 
708
838
  - name: google/gemma-2b
709
839
  display_name: Gemma (2B)
@@ -742,6 +872,22 @@ models:
742
872
  # TODO: Add OUTPUT_FORMAT_INSTRUCTIONS_TAG tag
743
873
  tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
744
874
 
875
+ - name: google/paligemma-3b-mix-224
876
+ display_name: PaliGemma (3B) Mix 224
877
+ description: PaliGemma is a versatile and lightweight vision-language model (VLM) inspired by PaLI-3 and based on open components such as the SigLIP vision model and the Gemma language model. Pre-trained with 224x224 input images and 128 token input/output text sequences. Finetuned on a mixture of downstream academic datasets. ([blog](https://developers.googleblog.com/en/gemma-family-and-toolkit-expansion-io-2024/))
878
+ creator_organization_name: Google
879
+ access: open
880
+ release_date: 2024-05-12
881
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
882
+
883
+ - name: google/paligemma-3b-mix-448
884
+ display_name: PaliGemma (3B) Mix 448
885
+ description: PaliGemma is a versatile and lightweight vision-language model (VLM) inspired by PaLI-3 and based on open components such as the SigLIP vision model and the Gemma language model. Pre-trained with 448x448 input images and 512 token input/output text sequences. Finetuned on a mixture of downstream academic datasets. ([blog](https://developers.googleblog.com/en/gemma-family-and-toolkit-expansion-io-2024/))
886
+ creator_organization_name: Google
887
+ access: open
888
+ release_date: 2024-05-12
889
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
890
+
745
891
  - name: google/text-bison@001
746
892
  display_name: PaLM-2 (Bison)
747
893
  description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
@@ -798,12 +944,35 @@ models:
798
944
  release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
799
945
  tags: [CODE_MODEL_TAG]
800
946
 
947
+ - name: google/medlm-medium
948
+ display_name: MedLM (Medium)
949
+ description: MedLM is a family of foundation models fine-tuned for the healthcare industry based on Google Research's medically-tuned large language model, Med-PaLM 2. ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/medlm/overview))
950
+ creator_organization_name: Google
951
+ access: limited
952
+ release_date: 2023-12-13
953
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
801
954
 
955
+ - name: google/medlm-large
956
+ display_name: MedLM (Large)
957
+ description: MedLM is a family of foundation models fine-tuned for the healthcare industry based on Google Research's medically-tuned large language model, Med-PaLM 2. ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/medlm/overview))
958
+ creator_organization_name: Google
959
+ access: limited
960
+ release_date: 2023-12-13
961
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
802
962
 
803
963
  # HuggingFace
964
+ - name: HuggingFaceM4/idefics2-8b
965
+ display_name: IDEFICS 2 (8B)
966
+ description: IDEFICS 2 (8B parameters) is an open multimodal model that accepts arbitrary sequences of image and text inputs and produces text outputs. ([blog](https://huggingface.co/blog/idefics2)).
967
+ creator_organization_name: HuggingFace
968
+ access: open
969
+ num_parameters: 8000000000
970
+ release_date: 2024-04-15
971
+ tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
972
+
804
973
  - name: HuggingFaceM4/idefics-9b
805
974
  display_name: IDEFICS (9B)
806
- description: IDEFICS (9B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics))
975
+ description: IDEFICS (9B parameters) is an open-source model based on DeepMind's Flamingo ([blog](https://huggingface.co/blog/idefics)).
807
976
  creator_organization_name: HuggingFace
808
977
  access: open
809
978
  num_parameters: 9000000000
@@ -811,8 +980,8 @@ models:
811
980
  tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
812
981
 
813
982
  - name: HuggingFaceM4/idefics-9b-instruct
814
- display_name: IDEFICS instruct (9B)
815
- description: IDEFICS instruct (9B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics))
983
+ display_name: IDEFICS-instruct (9B)
984
+ description: IDEFICS-instruct (9B parameters) is the instruction-tuned version of IDEFICS 9B ([blog](https://huggingface.co/blog/idefics)).
816
985
  creator_organization_name: HuggingFace
817
986
  access: open
818
987
  num_parameters: 9000000000
@@ -821,7 +990,7 @@ models:
821
990
 
822
991
  - name: HuggingFaceM4/idefics-80b
823
992
  display_name: IDEFICS (80B)
824
- description: IDEFICS (80B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics))
993
+ description: IDEFICS (80B parameters) is an open-source model based on DeepMind's Flamingo ([blog](https://huggingface.co/blog/idefics)).
825
994
  creator_organization_name: HuggingFace
826
995
  access: open
827
996
  num_parameters: 80000000000
@@ -829,8 +998,8 @@ models:
829
998
  tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
830
999
 
831
1000
  - name: HuggingFaceM4/idefics-80b-instruct
832
- display_name: IDEFICS instruct (80B)
833
- description: IDEFICS instruct (80B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics))
1001
+ display_name: IDEFICS-instruct (80B)
1002
+ description: IDEFICS-instruct (80B parameters) is the instruction-tuned version of IDEFICS 80B ([blog](https://huggingface.co/blog/idefics)).
834
1003
  creator_organization_name: HuggingFace
835
1004
  access: open
836
1005
  num_parameters: 80000000000
@@ -1050,8 +1219,6 @@ models:
1050
1219
  release_date: 2023-06-22
1051
1220
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1052
1221
 
1053
-
1054
-
1055
1222
  # Meta
1056
1223
  - name: meta/opt-iml-175b # NOT SUPPORTED
1057
1224
  display_name: OPT-IML (175B)
@@ -1210,6 +1377,44 @@ models:
1210
1377
  release_date: 2024-04-18
1211
1378
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1212
1379
 
1380
+ - name: meta/llama-3-8b-chat
1381
+ display_name: Llama 3 Instruct (8B)
1382
+ description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training.
1383
+ creator_organization_name: Meta
1384
+ access: open
1385
+ num_parameters: 8000000000
1386
+ release_date: 2024-04-18
1387
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1388
+
1389
+ - name: meta/llama-3-70b-chat
1390
+ display_name: Llama 3 Instruct (70B)
1391
+ description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training.
1392
+ creator_organization_name: Meta
1393
+ access: open
1394
+ num_parameters: 70000000000
1395
+ release_date: 2024-04-18
1396
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1397
+
1398
+ - name: meta/llama-guard-7b
1399
+ display_name: Llama Guard (7B)
1400
+ description: Llama-Guard is a 7B parameter Llama 2-based input-output safeguard model. It can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM it generates text in its output that indicates whether a given prompt or response is safe/unsafe, and if unsafe based on a policy, it also lists the violating subcategories.
1401
+ creator_organization_name: Meta
1402
+ access: open
1403
+ num_parameters: 7000000000
1404
+ release_date: 2023-12-07
1405
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1406
+
1407
+ - name: meta/llama-guard-2-8b
1408
+ display_name: Llama Guard 2 (8B)
1409
+ description: Llama Guard 2 is an 8B parameter Llama 3-based LLM safeguard model. Similar to Llama Guard, it can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.
1410
+ creator_organization_name: Meta
1411
+ access: open
1412
+ num_parameters: 8000000000
1413
+ release_date: 2024-04-18
1414
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1415
+
1416
+
1417
+
1213
1418
 
1214
1419
  # Microsoft/NVIDIA
1215
1420
  - name: microsoft/TNLGv2_530B
@@ -1247,11 +1452,46 @@ models:
1247
1452
  num_parameters: 13000000000
1248
1453
  release_date: 2023-10-05
1249
1454
  tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
1250
-
1455
+
1456
+ - name: uw-madison/llava-v1.6-vicuna-7b-hf
1457
+ display_name: LLaVA 1.6 (7B)
1458
+ description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
1459
+ creator_organization_name: Microsoft
1460
+ access: open
1461
+ num_parameters: 7000000000
1462
+ release_date: 2024-01-01
1463
+ tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
1464
+
1465
+ - name: uw-madison/llava-v1.6-vicuna-13b-hf
1466
+ display_name: LLaVA 1.6 (13B)
1467
+ description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
1468
+ creator_organization_name: Microsoft
1469
+ access: open
1470
+ num_parameters: 13000000000
1471
+ release_date: 2024-01-01
1472
+ tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
1473
+
1474
+ - name: uw-madison/llava-v1.6-mistral-7b-hf
1475
+ display_name: LLaVA 1.6 + Mistral (7B)
1476
+ description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
1477
+ creator_organization_name: Microsoft
1478
+ access: open
1479
+ num_parameters: 7000000000
1480
+ release_date: 2024-01-01
1481
+ tags: [ VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG ]
1482
+
1483
+ - name: uw-madison/llava-v1.6-34b-hf
1484
+ display_name: LLaVA + Nous-Hermes-2-Yi-34B (34B)
1485
+ description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
1486
+ creator_organization_name: Microsoft
1487
+ access: open
1488
+ num_parameters: 34000000000
1489
+ release_date: 2024-01-01
1490
+ tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
1251
1491
 
1252
1492
  - name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
1253
1493
  display_name: OpenFlamingo (9B)
1254
- description: OpenFlamingo is an open source implementation of DeepMind's Flamingo models. This 9B-parameter model uses a CLIP ViT-L/14 vision encoder and MPT-7B language model. ([paper](https://arxiv.org/abs/2308.01390))
1494
+ description: OpenFlamingo is an open source implementation of DeepMind's Flamingo models. This 9B-parameter model uses a CLIP ViT-L/14 vision encoder and MPT-7B language model ([paper](https://arxiv.org/abs/2308.01390)).
1255
1495
  creator_organization_name: OpenFlamingo
1256
1496
  access: open
1257
1497
  num_parameters: 9000000000
@@ -1267,7 +1507,15 @@ models:
1267
1507
  release_date: 2023-10-05
1268
1508
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1269
1509
 
1270
-
1510
+ # KAIST AI
1511
+ - name: kaistai/prometheus-vision-13b-v1.0-hf
1512
+ display_name: LLaVA + Vicuna-v1.5 (13B)
1513
+ description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
1514
+ creator_organization_name: KAIST AI
1515
+ access: open
1516
+ num_parameters: 13000000000
1517
+ release_date: 2024-01-01
1518
+ tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
1271
1519
 
1272
1520
  # 01.AI
1273
1521
  - name: 01-ai/yi-6b
@@ -1278,6 +1526,7 @@ models:
1278
1526
  num_parameters: 6000000000
1279
1527
  release_date: 2023-11-02
1280
1528
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1529
+
1281
1530
  - name: 01-ai/yi-34b
1282
1531
  display_name: Yi (34B)
1283
1532
  description: The Yi models are large language models trained from scratch by developers at 01.AI.
@@ -1287,6 +1536,39 @@ models:
1287
1536
  release_date: 2023-11-02
1288
1537
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1289
1538
 
1539
+ - name: 01-ai/yi-6b-chat
1540
+ display_name: Yi Chat (6B)
1541
+ description: The Yi models are large language models trained from scratch by developers at 01.AI.
1542
+ creator_organization_name: 01.AI
1543
+ access: open
1544
+ num_parameters: 6000000000
1545
+ release_date: 2023-11-23
1546
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1547
+
1548
+ - name: 01-ai/yi-34b-chat
1549
+ display_name: Yi Chat (34B)
1550
+ description: The Yi models are large language models trained from scratch by developers at 01.AI.
1551
+ creator_organization_name: 01.AI
1552
+ access: open
1553
+ num_parameters: 34000000000
1554
+ release_date: 2023-11-23
1555
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1556
+
1557
+ - name: 01-ai/yi-large
1558
+ display_name: Yi Large
1559
+ description: The Yi models are large language models trained from scratch by developers at 01.AI. ([tweet](https://x.com/01AI_Yi/status/1789894091620458667))
1560
+ creator_organization_name: 01.AI
1561
+ access: limited
1562
+ release_date: 2024-05-12
1563
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
1564
+
1565
+ - name: 01-ai/yi-large-preview
1566
+ display_name: Yi Large (Preview)
1567
+ description: The Yi models are large language models trained from scratch by developers at 01.AI. ([tweet](https://x.com/01AI_Yi/status/1789894091620458667))
1568
+ creator_organization_name: 01.AI
1569
+ access: limited
1570
+ release_date: 2024-05-12
1571
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
1290
1572
 
1291
1573
  # Allen Institute for AI
1292
1574
  # OLMo Blog: https://blog.allenai.org/olmo-open-language-model-87ccfc95f580
@@ -1318,29 +1600,64 @@ models:
1318
1600
  # TODO: Add instruct tag.
1319
1601
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1320
1602
 
1603
+ - name: allenai/olmo-1.7-7b
1604
+ display_name: OLMo 1.7 (7B)
1605
+ description: OLMo is a series of Open Language Models trained on the Dolma dataset. The instruct versions was trained on the Tulu SFT mixture and a cleaned version of the UltraFeedback dataset.
1606
+ creator_organization_name: Allen Institute for AI
1607
+ access: open
1608
+ num_parameters: 7000000000
1609
+ release_date: 2024-04-17
1610
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1321
1611
 
1322
1612
  # Mistral AI
1323
1613
  - name: mistralai/mistral-7b-v0.1
1324
1614
  display_name: Mistral v0.1 (7B)
1325
- description: Mistral 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA).
1615
+ description: Mistral 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/announcing-mistral-7b/))
1616
+ creator_organization_name: Mistral AI
1617
+ access: open
1618
+ num_parameters: 7300000000
1619
+ release_date: 2023-09-27
1620
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
1621
+
1622
+ - name: mistralai/mistral-7b-instruct-v0.1
1623
+ display_name: Mistral Instruct v0.1 (7B)
1624
+ description: Mistral v0.1 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA). The instruct version was fined-tuned using publicly available conversation datasets. ([blog post](https://mistral.ai/news/announcing-mistral-7b/))
1326
1625
  creator_organization_name: Mistral AI
1327
1626
  access: open
1328
1627
  num_parameters: 7300000000
1329
1628
  release_date: 2023-09-27
1330
1629
  tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1331
1630
 
1631
+ - name: mistralai/mistral-7b-instruct-v0.2
1632
+ display_name: Mistral Instruct v0.2 (7B)
1633
+ description: Mistral v0.2 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
1634
+ creator_organization_name: Mistral AI
1635
+ access: open
1636
+ num_parameters: 7300000000
1637
+ release_date: 2024-03-23
1638
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1639
+
1640
+ - name: mistralai/mistral-7b-instruct-v0.3
1641
+ display_name: Mistral Instruct v0.3 (7B)
1642
+ description: Mistral v0.3 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
1643
+ creator_organization_name: Mistral AI
1644
+ access: open
1645
+ num_parameters: 7300000000
1646
+ release_date: 2024-05-22
1647
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1648
+
1332
1649
  - name: mistralai/mixtral-8x7b-32kseqlen
1333
1650
  display_name: Mixtral (8x7B 32K seqlen)
1334
- description: Mistral AI's mixture-of-experts model ([tweet](https://twitter.com/MistralAI/status/1733150512395038967)).
1651
+ description: Mixtral is a mixture-of-experts model that has 46.7B total parameters but only uses 12.9B parameters per token. ([blog post](https://mistral.ai/news/mixtral-of-experts/), [tweet](https://twitter.com/MistralAI/status/1733150512395038967)).
1335
1652
  creator_organization_name: Mistral AI
1336
1653
  access: open
1337
1654
  num_parameters: 46700000000
1338
1655
  release_date: 2023-12-08
1339
- tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1656
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
1340
1657
 
1341
1658
  - name: mistralai/mixtral-8x7b-instruct-v0.1
1342
- display_name: Mixtral (8x7B Instruct)
1343
- description: Mixtral (8x7B Instruct) is a version of Mixtral (8x7B) that was optimized through supervised fine-tuning and direct preference optimisation (DPO) for careful instruction following.
1659
+ display_name: Mixtral Instruct (8x7B)
1660
+ description: Mixtral Instruct (8x7B) is a version of Mixtral (8x7B) that was optimized through supervised fine-tuning and direct preference optimisation (DPO) for careful instruction following. ([blog post](https://mistral.ai/news/mixtral-of-experts/)).
1344
1661
  creator_organization_name: Mistral AI
1345
1662
  access: open
1346
1663
  num_parameters: 46700000000
@@ -1350,7 +1667,16 @@ models:
1350
1667
 
1351
1668
  - name: mistralai/mixtral-8x22b
1352
1669
  display_name: Mixtral (8x22B)
1353
- description: Mistral AI's mixture-of-experts model ([tweet](https://twitter.com/MistralAI/status/1777869263778291896)).
1670
+ description: Mistral AI's mixture-of-experts model that uses 39B active parameters out of 141B ([blog post](https://mistral.ai/news/mixtral-8x22b/)).
1671
+ creator_organization_name: Mistral AI
1672
+ access: open
1673
+ num_parameters: 176000000000
1674
+ release_date: 2024-04-10
1675
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
1676
+
1677
+ - name: mistralai/mixtral-8x22b-instruct-v0.1
1678
+ display_name: Mixtral Instruct (8x22B)
1679
+ description: Mistral AI's mixture-of-experts model that uses 39B active parameters out of 141B ([blog post](https://mistral.ai/news/mixtral-8x22b/)).
1354
1680
  creator_organization_name: Mistral AI
1355
1681
  access: open
1356
1682
  num_parameters: 176000000000
@@ -1641,7 +1967,7 @@ models:
1641
1967
  tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1642
1968
 
1643
1969
  - name: openai/gpt-3.5-turbo-0125
1644
- display_name: gpt-3.5-turbo-0125
1970
+ display_name: GPT-3.5 Turbo (0125)
1645
1971
  description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2024-01-25.
1646
1972
  creator_organization_name: OpenAI
1647
1973
  access: limited
@@ -1720,9 +2046,26 @@ models:
1720
2046
  release_date: 2024-04-09
1721
2047
  tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1722
2048
 
2049
+ - name: openai/gpt-4o-2024-05-13
2050
+ display_name: GPT-4o (2024-05-13)
2051
+ description: GPT-4o (2024-05-13) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs.
2052
+ creator_organization_name: OpenAI
2053
+ access: limited
2054
+ release_date: 2024-04-09
2055
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2056
+
1723
2057
  - name: openai/gpt-4-vision-preview
1724
- display_name: GPT-4V (preview)
1725
- description: GPT-4V is a large multimodal model that accepts both text and images and is optimized for chat but works well for traditional completions tasks.
2058
+ # According to https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4, this model has pointed gpt-4-1106-vision-preview.
2059
+ display_name: GPT-4V (1106 preview)
2060
+ description: GPT-4V is a large multimodal model that accepts both text and images and is optimized for chat ([model card](https://openai.com/research/gpt-4v-system-card)).
2061
+ creator_organization_name: OpenAI
2062
+ access: limited
2063
+ release_date: 2023-11-06
2064
+ tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
2065
+
2066
+ - name: openai/gpt-4-1106-vision-preview
2067
+ display_name: GPT-4V (1106 preview)
2068
+ description: GPT-4V is a large multimodal model that accepts both text and images and is optimized for chat ([model card](https://openai.com/research/gpt-4v-system-card)).
1726
2069
  creator_organization_name: OpenAI
1727
2070
  access: limited
1728
2071
  release_date: 2023-11-06
@@ -1858,7 +2201,7 @@ models:
1858
2201
 
1859
2202
  - name: qwen/qwen-7b
1860
2203
  display_name: Qwen
1861
- description: 7B-parameter version of the large language model series, Qwen (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
2204
+ description: 7B-parameter version of the large language model series, Qwen (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
1862
2205
  creator_organization_name: Qwen
1863
2206
  access: open
1864
2207
  release_date: 2024-02-05
@@ -1866,7 +2209,7 @@ models:
1866
2209
 
1867
2210
  - name: qwen/qwen1.5-7b
1868
2211
  display_name: Qwen1.5 (7B)
1869
- description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
2212
+ description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
1870
2213
  creator_organization_name: Qwen
1871
2214
  access: open
1872
2215
  release_date: 2024-02-05
@@ -1874,7 +2217,7 @@ models:
1874
2217
 
1875
2218
  - name: qwen/qwen1.5-14b
1876
2219
  display_name: Qwen1.5 (14B)
1877
- description: 14B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
2220
+ description: 14B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
1878
2221
  creator_organization_name: Qwen
1879
2222
  access: open
1880
2223
  release_date: 2024-02-05
@@ -1882,23 +2225,71 @@ models:
1882
2225
 
1883
2226
  - name: qwen/qwen1.5-32b
1884
2227
  display_name: Qwen1.5 (32B)
1885
- description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
2228
+ description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 32B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-32b/))
1886
2229
  creator_organization_name: Qwen
1887
2230
  access: open
1888
- release_date: 2024-02-05
2231
+ release_date: 2024-04-02
1889
2232
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1890
2233
 
1891
2234
  - name: qwen/qwen1.5-72b
1892
2235
  display_name: Qwen1.5 (72B)
1893
- description: 72B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
2236
+ description: 72B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
1894
2237
  creator_organization_name: Qwen
1895
2238
  access: open
1896
2239
  release_date: 2024-02-05
1897
2240
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1898
2241
 
2242
+ - name: qwen/qwen1.5-7b-chat
2243
+ display_name: Qwen1.5 Chat (7B)
2244
+ description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
2245
+ creator_organization_name: Qwen
2246
+ access: open
2247
+ release_date: 2024-02-05
2248
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2249
+
2250
+ - name: qwen/qwen1.5-14b-chat
2251
+ display_name: Qwen1.5 Chat (14B)
2252
+ description: 14B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
2253
+ creator_organization_name: Qwen
2254
+ access: open
2255
+ release_date: 2024-02-05
2256
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2257
+
2258
+ - name: qwen/qwen1.5-32b-chat
2259
+ display_name: Qwen1.5 Chat (32B)
2260
+ description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 32B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-32b/))
2261
+ creator_organization_name: Qwen
2262
+ access: open
2263
+ release_date: 2024-04-02
2264
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2265
+
2266
+ - name: qwen/qwen1.5-72b-chat
2267
+ display_name: Qwen1.5 Chat (72B)
2268
+ description: 72B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
2269
+ creator_organization_name: Qwen
2270
+ access: open
2271
+ release_date: 2024-02-05
2272
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2273
+
2274
+ - name: qwen/qwen1.5-110b-chat
2275
+ display_name: Qwen1.5 Chat (110B)
2276
+ description: 110B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 110B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-110b/))
2277
+ creator_organization_name: Qwen
2278
+ access: open
2279
+ release_date: 2024-04-25
2280
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2281
+
2282
+ - name: qwen/qwen2-72b-instruct
2283
+ display_name: Qwen2 Instruct (72B)
2284
+ description: 72B-parameter chat version of the large language model series, Qwen2. Qwen2 uses Group Query Attention (GQA) and has extended context length support up to 128K tokens. ([blog](https://qwenlm.github.io/blog/qwen2/))
2285
+ creator_organization_name: Qwen
2286
+ access: open
2287
+ release_date: 2024-06-07
2288
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2289
+
1899
2290
  - name: qwen/qwen-vl
1900
2291
  display_name: Qwen-VL
1901
- description: Visual multimodal version of the large model series ([paper](https://arxiv.org/abs/2308.12966)).
2292
+ description: Visual multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2308.12966)).
1902
2293
  creator_organization_name: Alibaba Cloud
1903
2294
  access: open
1904
2295
  release_date: 2023-08-24
@@ -1906,12 +2297,49 @@ models:
1906
2297
 
1907
2298
  - name: qwen/qwen-vl-chat
1908
2299
  display_name: Qwen-VL Chat
1909
- description: Chat version of the visual multimodal model Qwen ([paper](https://arxiv.org/abs/2308.12966)).
2300
+ description: Chat version of Qwen-VL ([paper](https://arxiv.org/abs/2308.12966)).
1910
2301
  creator_organization_name: Alibaba Cloud
1911
2302
  access: open
1912
2303
  release_date: 2023-08-24
1913
2304
  tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
1914
2305
 
2306
+ # SAIL (Sea AI Lab)
2307
+ - name: sail/sailor-7b
2308
+ display_name: Sailor (7B)
2309
+ description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
2310
+ creator_organization_name: SAIL
2311
+ access: open
2312
+ num_parameters: 7000000000
2313
+ release_date: 2024-04-04
2314
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2315
+
2316
+ - name: sail/sailor-7b-chat
2317
+ display_name: Sailor Chat (7B)
2318
+ description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
2319
+ creator_organization_name: SAIL
2320
+ access: open
2321
+ num_parameters: 7000000000
2322
+ release_date: 2024-04-04
2323
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2324
+
2325
+ - name: sail/sailor-14b
2326
+ display_name: Sailor (14B)
2327
+ description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
2328
+ creator_organization_name: SAIL
2329
+ access: open
2330
+ num_parameters: 14000000000
2331
+ release_date: 2024-04-04
2332
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2333
+
2334
+ - name: sail/sailor-14b-chat
2335
+ display_name: Sailor Chat (14B)
2336
+ description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
2337
+ creator_organization_name: SAIL
2338
+ access: open
2339
+ num_parameters: 14000000000
2340
+ release_date: 2024-04-04
2341
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2342
+
1915
2343
  # Salesforce
1916
2344
  - name: salesforce/codegen # NOT SUPPORTED
1917
2345
  display_name: CodeGen (16B)
@@ -1922,6 +2350,34 @@ models:
1922
2350
  release_date: 2022-03-25
1923
2351
  tags: [] # TODO: add tags
1924
2352
 
2353
+ # SCB10X
2354
+ - name: scb10x/typhoon-v1.5-72b
2355
+ display_name: Typhoon v1.5 (72B)
2356
+ description: Typhoon v1.5 (72B) is pretrained Thai large language model with 72 billion parameters based on Qwen1.5-72B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
2357
+ creator_organization_name: SCB10X
2358
+ access: open
2359
+ num_parameters: 72000000000
2360
+ release_date: 2024-05-08
2361
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2362
+
2363
+ - name: scb10x/typhoon-v1.5-72b-instruct
2364
+ display_name: Typhoon v1.5 Instruct (72B)
2365
+ description: Typhoon v1.5 Instruct (72B) is pretrained Thai large language model with 72 billion parameters based on Qwen1.5-72B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
2366
+ creator_organization_name: SCB10X
2367
+ access: open
2368
+ num_parameters: 72000000000
2369
+ release_date: 2024-05-08
2370
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2371
+
2372
+ # Snowflake
2373
+ - name: snowflake/snowflake-arctic-instruct
2374
+ display_name: Arctic Instruct
2375
+ description: Arctic combines a 10B dense transformer model with a residual 128x3.66B MoE MLP resulting in 480B total and 17B active parameters chosen using a top-2 gating.
2376
+ creator_organization_name: Snowflake
2377
+ access: open
2378
+ num_parameters: 482000000000
2379
+ release_date: 2024-04-24
2380
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1925
2381
 
1926
2382
 
1927
2383
  # Stability AI
@@ -2188,6 +2644,15 @@ models:
2188
2644
  # Does not support echo
2189
2645
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2190
2646
 
2647
+ - name: writer/palmyra-vision-003
2648
+ display_name: Palmyra Vision 003
2649
+ description: Palmyra Vision 003 (internal only)
2650
+ creator_organization_name: Writer
2651
+ access: limited
2652
+ num_parameters: 5000000000
2653
+ release_date: 2024-05-24
2654
+ # Does not support echo
2655
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
2191
2656
 
2192
2657
 
2193
2658
  # Yandex
@@ -2199,3 +2664,65 @@ models:
2199
2664
  num_parameters: 100000000000
2200
2665
  release_date: 2022-06-23
2201
2666
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
2667
+
2668
+ # Reka
2669
+ - name: reka/reka-core
2670
+ display_name: Reka-Core
2671
+ description: Reka-Core
2672
+ creator_organization_name: Reka AI
2673
+ access: limited
2674
+ release_date: 2024-04-18
2675
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2676
+
2677
+ - name: reka/reka-core-20240415
2678
+ display_name: Reka-Core-20240415
2679
+ description: Reka-Core-20240415
2680
+ creator_organization_name: Reka AI
2681
+ access: limited
2682
+ release_date: 2024-04-18
2683
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2684
+
2685
+ - name: reka/reka-core-20240501
2686
+ display_name: Reka-Core-20240501
2687
+ description: Reka-Core-20240501
2688
+ creator_organization_name: Reka AI
2689
+ access: limited
2690
+ release_date: 2024-05-01
2691
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2692
+
2693
+ - name: reka/reka-flash
2694
+ display_name: Reka-Flash (21B)
2695
+ description: Reka-Flash (21B)
2696
+ creator_organization_name: Reka AI
2697
+ access: limited
2698
+ num_parameters: 21000000000
2699
+ release_date: 2024-04-18
2700
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2701
+
2702
+ - name: reka/reka-flash-20240226
2703
+ display_name: Reka-Flash-20240226 (21B)
2704
+ description: Reka-Flash-20240226 (21B)
2705
+ creator_organization_name: Reka AI
2706
+ access: limited
2707
+ num_parameters: 21000000000
2708
+ release_date: 2024-04-18
2709
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2710
+
2711
+ - name: reka/reka-edge
2712
+ display_name: Reka-Edge (7B)
2713
+ description: Reka-Edge (7B)
2714
+ creator_organization_name: Reka AI
2715
+ access: limited
2716
+ num_parameters: 7000000000
2717
+ release_date: 2024-04-18
2718
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2719
+
2720
+ - name: reka/reka-edge-20240208
2721
+ display_name: Reka-Edge-20240208 (7B)
2722
+ description: Reka-Edge-20240208 (7B)
2723
+ creator_organization_name: Reka AI
2724
+ access: limited
2725
+ num_parameters: 7000000000
2726
+ release_date: 2024-04-18
2727
+ tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
2728
+