PyPI - crfm-helm - Versions diffs - 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

crfm-helm 0.5.1py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (98) hide show

{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/METADATA +13 -3
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/RECORD +96 -63
helm/benchmark/adaptation/adapter_spec.py +32 -31
helm/benchmark/annotation/air_bench_annotator.py +64 -0
helm/benchmark/annotation/annotator_factory.py +6 -0
helm/benchmark/annotation/live_qa_annotator.py +84 -0
helm/benchmark/annotation/medication_qa_annotator.py +81 -0
helm/benchmark/augmentations/translate_perturbation.py +1 -0
helm/benchmark/huggingface_registration.py +16 -6
helm/benchmark/metrics/air_bench_metrics.py +56 -0
helm/benchmark/metrics/fin_qa_metrics.py +60 -0
helm/benchmark/metrics/fin_qa_metrics_helper.py +398 -0
helm/benchmark/metrics/gpt4v_originality_critique_metrics.py +126 -0
helm/benchmark/metrics/instruction_following_critique_metrics.py +1 -0
helm/benchmark/metrics/live_qa_metrics.py +23 -0
helm/benchmark/metrics/medication_qa_metrics.py +23 -0
helm/benchmark/metrics/prometheus_vision_critique_metrics.py +185 -0
helm/benchmark/metrics/reka_vibe_critique_metrics.py +158 -0
helm/benchmark/metrics/unitxt_metrics.py +20 -10
helm/benchmark/metrics/vision_language/emd_utils.py +4 -0
helm/benchmark/metrics/vision_language/image_metrics.py +29 -71
helm/benchmark/presentation/schema.py +54 -4
helm/benchmark/presentation/test_schema.py +11 -0
helm/benchmark/run.py +16 -2
helm/benchmark/run_expander.py +77 -0
helm/benchmark/run_spec_factory.py +4 -0
helm/benchmark/run_specs/air_bench_run_specs.py +40 -0
helm/benchmark/run_specs/classic_run_specs.py +15 -11
helm/benchmark/run_specs/decodingtrust_run_specs.py +3 -1
helm/benchmark/run_specs/experimental_run_specs.py +33 -0
helm/benchmark/run_specs/finance_run_specs.py +33 -0
helm/benchmark/run_specs/vlm_run_specs.py +168 -45
helm/benchmark/scenarios/air_bench_scenario.py +50 -0
helm/benchmark/scenarios/ci_mcqa_scenario.py +80 -0
helm/benchmark/scenarios/entity_data_imputation_scenario.py +8 -2
helm/benchmark/scenarios/fin_qa_scenario.py +117 -0
helm/benchmark/scenarios/test_air_bench_scenario.py +27 -0
helm/benchmark/scenarios/vision_language/bingo_scenario.py +3 -3
helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +13 -2
helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +1 -5
helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +0 -4
helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +4 -2
helm/benchmark/scenarios/vision_language/pairs_scenario.py +6 -5
helm/benchmark/scenarios/vision_language/unicorn_scenario.py +3 -3
helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +95 -0
helm/benchmark/static/schema_air_bench.yaml +3149 -0
helm/benchmark/static/schema_classic.yaml +3 -59
helm/benchmark/static/schema_finance.yaml +143 -0
helm/benchmark/static/schema_image2structure.yaml +254 -111
helm/benchmark/static/schema_instruction_following.yaml +3 -52
helm/benchmark/static/schema_lite.yaml +3 -61
helm/benchmark/static/schema_medical.yaml +255 -0
helm/benchmark/static/schema_mmlu.yaml +3 -61
helm/benchmark/static/schema_tables.yaml +200 -0
helm/benchmark/static/schema_thai.yaml +223 -0
helm/benchmark/static/schema_unitxt.yaml +3 -61
helm/benchmark/static/{schema_vlm.yaml → schema_vhelm.yaml} +294 -293
helm/benchmark/static/schema_vhelm_lite.yaml +4 -59
helm/benchmark/static_build/assets/air-overview-d2e6c49f.png +0 -0
helm/benchmark/static_build/assets/index-30dbceba.js +10 -0
helm/benchmark/static_build/assets/index-66b02d40.css +1 -0
helm/benchmark/static_build/assets/overview-74aea3d8.png +0 -0
helm/benchmark/static_build/assets/process-flow-bd2eba96.png +0 -0
helm/benchmark/static_build/index.html +2 -2
helm/clients/anthropic_client.py +43 -9
helm/clients/auto_client.py +11 -0
helm/clients/client.py +24 -7
helm/clients/cohere_client.py +98 -3
helm/clients/huggingface_client.py +71 -12
helm/clients/openai_client.py +9 -2
helm/clients/reka_client.py +189 -0
helm/clients/test_client.py +3 -3
helm/clients/test_huggingface_client.py +19 -3
helm/clients/test_together_client.py +72 -2
helm/clients/together_client.py +129 -23
helm/clients/vertexai_client.py +62 -18
helm/clients/vision_language/huggingface_vlm_client.py +1 -0
helm/clients/vision_language/paligemma_client.py +146 -0
helm/clients/vision_language/palmyra_vision_client.py +84 -0
helm/clients/yi_client.py +31 -0
helm/common/critique_request.py +10 -1
helm/common/images_utils.py +19 -0
helm/config/model_deployments.yaml +412 -18
helm/config/model_metadata.yaml +447 -25
helm/config/tokenizer_configs.yaml +93 -1
helm/proxy/critique/model_critique_client.py +32 -4
helm/proxy/services/server_service.py +1 -1
helm/tokenizers/auto_tokenizer.py +1 -1
helm/tokenizers/cohere_tokenizer.py +44 -2
helm/tokenizers/huggingface_tokenizer.py +36 -13
helm/tokenizers/test_cohere_tokenizer.py +39 -0
helm/tokenizers/test_huggingface_tokenizer.py +5 -1
helm/benchmark/static_build/assets/index-737eef9e.js +0 -10
helm/benchmark/static_build/assets/index-878a1094.css +0 -1
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/LICENSE +0 -0
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/WHEEL +0 -0
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/entry_points.txt +0 -0
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/top_level.txt +0 -0

helm/config/model_metadata.yaml CHANGED Viewed

@@ -100,6 +100,25 @@ models:
   # - j2-large -> j2-light
+  # AI Singapore
+  - name: aisingapore/sea-lion-7b
+    display_name: SEA-LION (7B)
+    description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
+    creator_organization_name: AI Singapore
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-02-24
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: aisingapore/sea-lion-7b-instruct
+    display_name: SEA-LION Instruct (7B)
+    description: SEA-LION is a collection of language models which has been pretrained and instruct-tuned on languages from the Southeast Asia region. It utilizes the MPT architecture and a custom SEABPETokenizer for tokenization.
+    creator_organization_name: AI Singapore
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-02-24
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
   # Aleph Alpha
   # Aleph Alpha's Luminous models: https://docs.aleph-alpha.com/docs/introduction/luminous
@@ -327,6 +346,18 @@ models:
     release_date: 2023-05-09 # ArXiv submission date
     tags: [CODE_MODEL_TAG]
+  # BioMistral
+  - name: biomistral/biomistral-7b
+    display_name: BioMistral (7B)
+    description: BioMistral 7B is an open-source LLM tailored for the biomedical domain, utilizing Mistral as its foundation model and further pre-trained on PubMed Central.
+    creator_organization_name: BioMistral
+    access: open
+    num_parameters: 7300000000
+    release_date: 2024-02-15
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
   # Cerebras Systems
@@ -418,7 +449,7 @@ models:
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
   - name: cohere/command-medium-beta # DEPRECATED
-    display_name: Cohere Command beta (6.1B)
+    display_name: Command beta (6.1B)
     description: Cohere Command beta (6.1B parameters) is fine-tuned from the medium model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
     creator_organization_name: Cohere
     access: limited
@@ -427,7 +458,7 @@ models:
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: cohere/command-xlarge-beta # DEPRECATED
-    display_name: Cohere Command beta (52.4B)
+    display_name: Command beta (52.4B)
     description: Cohere Command beta (52.4B parameters) is fine-tuned from the XL model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
     creator_organization_name: Cohere
     access: limited
@@ -436,7 +467,7 @@ models:
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: cohere/command
-    display_name: Cohere Command
+    display_name: Command
     description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog)
     creator_organization_name: Cohere
     access: limited
@@ -444,12 +475,30 @@ models:
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: cohere/command-light
-    display_name: Cohere Command Light
+    display_name: Command Light
     description: Command is Cohere’s flagship text generation model. It is trained to follow user commands and to be instantly useful in practical business applications. [docs](https://docs.cohere.com/reference/generate) and [changelog](https://docs.cohere.com/changelog)
     creator_organization_name: Cohere
     access: limited
     release_date: 2023-09-29
-    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: cohere/command-r
+    display_name: Command R
+    description: Command R is a multilingual 35B parameter model with a context length of 128K that has been trained with conversational tool use capabilities.
+    creator_organization_name: Cohere
+    access: open
+    num_parameters: 35000000000
+    release_date: 2024-03-11
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: cohere/command-r-plus
+    display_name: Command R Plus
+    description: Command R+ is a multilingual 104B parameter model with a context length of 128K that has been trained with conversational tool use capabilities.
+    creator_organization_name: Cohere
+    access: open
+    num_parameters: 104000000000
+    release_date: 2024-04-04
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   # Craiyon
   - name: craiyon/dalle-mini
@@ -624,7 +673,16 @@ models:
     release_date: 2023-02-13
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+  # EPFL LLM
+  - name: epfl-llm/meditron-7b
+    display_name: Meditron (7B)
+    description: Meditron-7B is a 7 billion parameter model adapted to the medical domain from Llama-2-7B through continued pretraining on a comprehensively curated medical corpus.
+    creator_organization_name: EPFL LLM
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-11-27
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
   # Google
   - name: google/t5-11b
@@ -673,13 +731,21 @@ models:
     tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: google/gemini-1.0-pro-001
-    display_name: Gemini 1.0 Pro
+    display_name: Gemini 1.0 Pro (001)
     description: Gemini 1.0 Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
     creator_organization_name: Google
     access: limited
     release_date: 2023-12-13
     tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/gemini-1.0-pro-002
+    display_name: Gemini 1.0 Pro (002)
+    description: Gemini 1.0 Pro is a multimodal model able to reason across text, images, video, audio and code. ([paper](https://arxiv.org/abs/2312.11805))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-04-09
+    tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
     # Note: This is aliased to a snapshot of gemini-pro-vision. When possible, please use a versioned snapshot instead.
   - name: google/gemini-pro-vision
     display_name: Gemini Pro Vision
@@ -697,14 +763,78 @@ models:
     release_date: 2023-12-13
     tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, GOOGLE_GEMINI_PRO_VISION_V1_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: google/gemini-1.5-pro-001
+    display_name: Gemini 1.5 Pro (001)
+    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-05-24
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/gemini-1.5-flash-001
+    display_name: Gemini 1.5 Flash (001)
+    description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-05-24
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: google/gemini-1.5-pro-preview-0409
     display_name: Gemini 1.5 Pro (0409 preview)
-    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. ([paper](https://arxiv.org/abs/2403.05530))
+    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
     creator_organization_name: Google
     access: limited
     release_date: 2024-04-10
     tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/gemini-1.5-pro-preview-0514
+    display_name: Gemini 1.5 Pro (0514 preview)
+    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-05-14
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/gemini-1.5-flash-preview-0514
+    display_name: Gemini 1.5 Flash (0514 preview)
+    description: Gemini 1.5 Flash is a smaller Gemini model. It has a 1 million token context window and allows interleaving text, images, audio and video as inputs. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([blog](https://blog.google/technology/developers/gemini-gemma-developer-updates-may-2024/))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-05-14
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/gemini-1.5-pro-001-safety-default
+    display_name: Gemini 1.5 Pro (001, default safety)
+    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and uses default safety settings. ([paper](https://arxiv.org/abs/2403.05530))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-05-24
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/gemini-1.5-pro-001-safety-block-none
+    display_name: Gemini 1.5 Pro (001, BLOCK_NONE safety)
+    description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-05-24
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/gemini-1.5-flash-001-safety-default
+    display_name: Gemini 1.5 Flash (001, default safety)
+    description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and uses default safety settings. ([paper](https://arxiv.org/abs/2403.05530))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-05-24
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/gemini-1.5-flash-001-safety-block-none
+    display_name: Gemini 1.5 Flash (001, BLOCK_NONE safety)
+    description: Gemini 1.5 Flash is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. This model is accessed through Vertex AI and has all safety thresholds set to `BLOCK_NONE`. ([paper](https://arxiv.org/abs/2403.05530))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2024-05-24
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: google/gemma-2b
     display_name: Gemma (2B)
     # TODO: Fill in Gemma description.
@@ -742,6 +872,22 @@ models:
     # TODO: Add OUTPUT_FORMAT_INSTRUCTIONS_TAG tag
     tags: [TEXT_MODEL_TAG, GOOGLE_GEMMA_INSTRUCT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: google/paligemma-3b-mix-224
+    display_name: PaliGemma (3B) Mix 224
+    description: PaliGemma is a versatile and lightweight vision-language model (VLM) inspired by PaLI-3 and based on open components such as the SigLIP vision model and the Gemma language model. Pre-trained with 224x224 input images and 128 token input/output text sequences. Finetuned on a mixture of downstream academic datasets. ([blog](https://developers.googleblog.com/en/gemma-family-and-toolkit-expansion-io-2024/))
+    creator_organization_name: Google
+    access: open
+    release_date: 2024-05-12
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: google/paligemma-3b-mix-448
+    display_name: PaliGemma (3B) Mix 448
+    description: PaliGemma is a versatile and lightweight vision-language model (VLM) inspired by PaLI-3 and based on open components such as the SigLIP vision model and the Gemma language model. Pre-trained with 448x448 input images and 512 token input/output text sequences. Finetuned on a mixture of downstream academic datasets. ([blog](https://developers.googleblog.com/en/gemma-family-and-toolkit-expansion-io-2024/))
+    creator_organization_name: Google
+    access: open
+    release_date: 2024-05-12
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
   - name: google/text-bison@001
     display_name: PaLM-2 (Bison)
     description: The best value PaLM model. PaLM 2 (Pathways Language Model) is a Transformer-based model trained using a mixture of objectives that was evaluated on English and multilingual language, and reasoning tasks. ([report](https://arxiv.org/pdf/2305.10403.pdf))
@@ -798,7 +944,21 @@ models:
     release_date: 2023-06-29 # Source: https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/code-generation#model_versions
     tags: [CODE_MODEL_TAG]
+  - name: google/medlm-medium
+    display_name: MedLM (Medium)
+    description: MedLM is a family of foundation models fine-tuned for the healthcare industry based on Google Research's medically-tuned large language model, Med-PaLM 2. ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/medlm/overview))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2023-12-13
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: google/medlm-large
+    display_name: MedLM (Large)
+    description: MedLM is a family of foundation models fine-tuned for the healthcare industry based on Google Research's medically-tuned large language model, Med-PaLM 2. ([documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/medlm/overview))
+    creator_organization_name: Google
+    access: limited
+    release_date: 2023-12-13
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
   # HuggingFace
   - name: HuggingFaceM4/idefics2-8b
@@ -1059,8 +1219,6 @@ models:
     release_date: 2023-06-22
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   # Meta
   - name: meta/opt-iml-175b # NOT SUPPORTED
     display_name: OPT-IML (175B)
@@ -1220,7 +1378,7 @@ models:
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
   - name: meta/llama-3-8b-chat
-    display_name: Llama 3 Chat (8B)
+    display_name: Llama 3 Instruct (8B)
     description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training.
     creator_organization_name: Meta
     access: open
@@ -1229,7 +1387,7 @@ models:
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: meta/llama-3-70b-chat
-    display_name: Llama 3 Chat (70B)
+    display_name: Llama 3 Instruct (70B)
     description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training.
     creator_organization_name: Meta
     access: open
@@ -1237,6 +1395,26 @@ models:
     release_date: 2024-04-18
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: meta/llama-guard-7b
+    display_name: Llama Guard (7B)
+    description: Llama-Guard is a 7B parameter Llama 2-based input-output safeguard model. It can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM it generates text in its output that indicates whether a given prompt or response is safe/unsafe, and if unsafe based on a policy, it also lists the violating subcategories.
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 7000000000
+    release_date: 2023-12-07
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: meta/llama-guard-2-8b
+    display_name: Llama Guard 2 (8B)
+    description: Llama Guard 2 is an 8B parameter Llama 3-based LLM safeguard model. Similar to Llama Guard, it can be used for classifying content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-04-18
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   # Microsoft/NVIDIA
   - name: microsoft/TNLGv2_530B
@@ -1329,7 +1507,15 @@ models:
     release_date: 2023-10-05
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+  # KAIST AI
+  - name: kaistai/prometheus-vision-13b-v1.0-hf
+    display_name: LLaVA + Vicuna-v1.5 (13B)
+    description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
+    creator_organization_name: KAIST AI
+    access: open
+    num_parameters: 13000000000
+    release_date: 2024-01-01
+    tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
   # 01.AI
   - name: 01-ai/yi-6b
@@ -1340,6 +1526,7 @@ models:
     num_parameters: 6000000000
     release_date: 2023-11-02
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
   - name: 01-ai/yi-34b
     display_name: Yi (34B)
     description: The Yi models are large language models trained from scratch by developers at 01.AI.
@@ -1348,6 +1535,7 @@ models:
     num_parameters: 34000000000
     release_date: 2023-11-02
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
   - name: 01-ai/yi-6b-chat
     display_name: Yi Chat (6B)
     description: The Yi models are large language models trained from scratch by developers at 01.AI.
@@ -1356,6 +1544,7 @@ models:
     num_parameters: 6000000000
     release_date: 2023-11-23
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
   - name: 01-ai/yi-34b-chat
     display_name: Yi Chat (34B)
     description: The Yi models are large language models trained from scratch by developers at 01.AI.
@@ -1365,6 +1554,22 @@ models:
     release_date: 2023-11-23
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: 01-ai/yi-large
+    display_name: Yi Large
+    description: The Yi models are large language models trained from scratch by developers at 01.AI. ([tweet](https://x.com/01AI_Yi/status/1789894091620458667))
+    creator_organization_name: 01.AI
+    access: limited
+    release_date: 2024-05-12
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: 01-ai/yi-large-preview
+    display_name: Yi Large (Preview)
+    description: The Yi models are large language models trained from scratch by developers at 01.AI. ([tweet](https://x.com/01AI_Yi/status/1789894091620458667))
+    creator_organization_name: 01.AI
+    access: limited
+    release_date: 2024-05-12
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
   # Allen Institute for AI
   # OLMo Blog: https://blog.allenai.org/olmo-open-language-model-87ccfc95f580
   - name: allenai/olmo-7b
@@ -1395,29 +1600,64 @@ models:
     # TODO: Add instruct tag.
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: allenai/olmo-1.7-7b
+    display_name: OLMo 1.7 (7B)
+    description: OLMo is a series of Open Language Models trained on the Dolma dataset. The instruct versions was trained on the Tulu SFT mixture and a cleaned version of the UltraFeedback dataset.
+    creator_organization_name: Allen Institute for AI
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-04-17
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
   # Mistral AI
   - name: mistralai/mistral-7b-v0.1
     display_name: Mistral v0.1 (7B)
-    description: Mistral 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA).
+    description: Mistral 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/announcing-mistral-7b/))
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 7300000000
+    release_date: 2023-09-27
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: mistralai/mistral-7b-instruct-v0.1
+    display_name: Mistral Instruct v0.1 (7B)
+    description: Mistral v0.1 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA) and Sliding-Window Attention (SWA). The instruct version was fined-tuned using publicly available conversation datasets. ([blog post](https://mistral.ai/news/announcing-mistral-7b/))
     creator_organization_name: Mistral AI
     access: open
     num_parameters: 7300000000
     release_date: 2023-09-27
     tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: mistralai/mistral-7b-instruct-v0.2
+    display_name: Mistral Instruct v0.2 (7B)
+    description: Mistral v0.2 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 7300000000
+    release_date: 2024-03-23
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: mistralai/mistral-7b-instruct-v0.3
+    display_name: Mistral Instruct v0.3 (7B)
+    description: Mistral v0.3 Instruct 7B is a 7.3B parameter transformer model that uses Grouped-Query Attention (GQA). Compared to v0.1, v0.2 has a 32k context window and no Sliding-Window Attention (SWA). ([blog post](https://mistral.ai/news/la-plateforme/))
+    creator_organization_name: Mistral AI
+    access: open
+    num_parameters: 7300000000
+    release_date: 2024-05-22
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: mistralai/mixtral-8x7b-32kseqlen
     display_name: Mixtral (8x7B 32K seqlen)
-    description: Mistral AI's mixture-of-experts model ([tweet](https://twitter.com/MistralAI/status/1733150512395038967)).
+    description: Mixtral is a mixture-of-experts model that has 46.7B total parameters but only uses 12.9B parameters per token. ([blog post](https://mistral.ai/news/mixtral-of-experts/), [tweet](https://twitter.com/MistralAI/status/1733150512395038967)).
     creator_organization_name: Mistral AI
     access: open
     num_parameters: 46700000000
     release_date: 2023-12-08
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
   - name: mistralai/mixtral-8x7b-instruct-v0.1
-    display_name: Mixtral (8x7B Instruct)
-    description: Mixtral (8x7B Instruct) is a version of Mixtral (8x7B) that was optimized through supervised fine-tuning and direct preference optimisation (DPO) for careful instruction following.
+    display_name: Mixtral Instruct (8x7B)
+    description: Mixtral Instruct (8x7B) is a version of Mixtral (8x7B) that was optimized through supervised fine-tuning and direct preference optimisation (DPO) for careful instruction following. ([blog post](https://mistral.ai/news/mixtral-of-experts/)).
     creator_organization_name: Mistral AI
     access: open
     num_parameters: 46700000000
@@ -1432,7 +1672,7 @@ models:
     access: open
     num_parameters: 176000000000
     release_date: 2024-04-10
-    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
   - name: mistralai/mixtral-8x22b-instruct-v0.1
     display_name: Mixtral Instruct (8x22B)
@@ -1727,7 +1967,7 @@ models:
     tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: openai/gpt-3.5-turbo-0125
-    display_name: gpt-3.5-turbo-0125
+    display_name: GPT-3.5 Turbo (0125)
     description: Sibling model of text-davinci-003 that is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2024-01-25.
     creator_organization_name: OpenAI
     access: limited
@@ -1806,6 +2046,14 @@ models:
     release_date: 2024-04-09
     tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: openai/gpt-4o-2024-05-13
+    display_name: GPT-4o (2024-05-13)
+    description: GPT-4o (2024-05-13) is a large multimodal model that accepts as input any combination of text, audio, and image and generates any combination of text, audio, and image outputs.
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2024-04-09
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: openai/gpt-4-vision-preview
     # According to https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4, this model has pointed gpt-4-1106-vision-preview.
     display_name: GPT-4V (1106 preview)
@@ -1953,7 +2201,7 @@ models:
   - name: qwen/qwen-7b
     display_name: Qwen
-    description: 7B-parameter version of the large language model series, Qwen (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
+    description: 7B-parameter version of the large language model series, Qwen (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
     creator_organization_name: Qwen
     access: open
     release_date: 2024-02-05
@@ -1961,7 +2209,7 @@ models:
   - name: qwen/qwen1.5-7b
     display_name: Qwen1.5 (7B)
-    description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
+    description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
     creator_organization_name: Qwen
     access: open
     release_date: 2024-02-05
@@ -1969,7 +2217,7 @@ models:
   - name: qwen/qwen1.5-14b
     display_name: Qwen1.5 (14B)
-    description: 14B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
+    description: 14B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
     creator_organization_name: Qwen
     access: open
     release_date: 2024-02-05
@@ -1977,20 +2225,68 @@ models:
   - name: qwen/qwen1.5-32b
     display_name: Qwen1.5 (32B)
-    description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
+    description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 32B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-32b/))
     creator_organization_name: Qwen
     access: open
-    release_date: 2024-02-05
+    release_date: 2024-04-02
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
   - name: qwen/qwen1.5-72b
     display_name: Qwen1.5 (72B)
-    description: 72B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen-7B is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
+    description: 72B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
     creator_organization_name: Qwen
     access: open
     release_date: 2024-02-05
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: qwen/qwen1.5-7b-chat
+    display_name: Qwen1.5 Chat (7B)
+    description: 7B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-02-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: qwen/qwen1.5-14b-chat
+    display_name: Qwen1.5 Chat (14B)
+    description: 14B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-02-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: qwen/qwen1.5-32b-chat
+    display_name: Qwen1.5 Chat (32B)
+    description: 32B-parameter version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 32B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-32b/))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-04-02
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: qwen/qwen1.5-72b-chat
+    display_name: Qwen1.5 Chat (72B)
+    description: 72B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. ([blog](https://qwenlm.github.io/blog/qwen1.5/))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-02-05
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: qwen/qwen1.5-110b-chat
+    display_name: Qwen1.5 Chat (110B)
+    description: 110B-parameter chat version of the large language model series, Qwen 1.5 (abbr. Tongyi Qianwen), proposed by Aibaba Cloud. Qwen is a family of transformer models with SwiGLU activation, RoPE, and multi-head attention. The 110B version also includes grouped query attention (GQA). ([blog](https://qwenlm.github.io/blog/qwen1.5-110b/))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-04-25
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: qwen/qwen2-72b-instruct
+    display_name: Qwen2 Instruct (72B)
+    description: 72B-parameter chat version of the large language model series, Qwen2. Qwen2 uses Group Query Attention (GQA) and has extended context length support up to 128K tokens. ([blog](https://qwenlm.github.io/blog/qwen2/))
+    creator_organization_name: Qwen
+    access: open
+    release_date: 2024-06-07
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: qwen/qwen-vl
     display_name: Qwen-VL
     description: Visual multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2308.12966)).
@@ -2007,6 +2303,43 @@ models:
     release_date: 2023-08-24
     tags: [VISION_LANGUAGE_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+  # SAIL (Sea AI Lab)
+  - name: sail/sailor-7b
+    display_name: Sailor (7B)
+    description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
+    creator_organization_name: SAIL
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-04-04
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: sail/sailor-7b-chat
+    display_name: Sailor Chat (7B)
+    description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
+    creator_organization_name: SAIL
+    access: open
+    num_parameters: 7000000000
+    release_date: 2024-04-04
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: sail/sailor-14b
+    display_name: Sailor (14B)
+    description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
+    creator_organization_name: SAIL
+    access: open
+    num_parameters: 14000000000
+    release_date: 2024-04-04
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: sail/sailor-14b-chat
+    display_name: Sailor Chat (14B)
+    description: Sailor is a suite of Open Language Models tailored for South-East Asia, focusing on languages such as Indonesian, Thai, Vietnamese, Malay, and Lao. These models were continually pre-trained from Qwen1.5. ([paper](https://arxiv.org/abs/2404.03608))
+    creator_organization_name: SAIL
+    access: open
+    num_parameters: 14000000000
+    release_date: 2024-04-04
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   # Salesforce
   - name: salesforce/codegen # NOT SUPPORTED
     display_name: CodeGen (16B)
@@ -2017,6 +2350,24 @@ models:
     release_date: 2022-03-25
     tags: [] # TODO: add tags
+  # SCB10X
+  - name: scb10x/typhoon-v1.5-72b
+    display_name: Typhoon v1.5 (72B)
+    description: Typhoon v1.5 (72B) is pretrained Thai large language model with 72 billion parameters based on Qwen1.5-72B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
+    creator_organization_name: SCB10X
+    access: open
+    num_parameters: 72000000000
+    release_date: 2024-05-08
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: scb10x/typhoon-v1.5-72b-instruct
+    display_name: Typhoon v1.5 Instruct (72B)
+    description: Typhoon v1.5 Instruct (72B) is pretrained Thai large language model with 72 billion parameters based on Qwen1.5-72B. ([blog](https://blog.opentyphoon.ai/typhoon-1-5-release-a9364cb8e8d7))
+    creator_organization_name: SCB10X
+    access: open
+    num_parameters: 72000000000
+    release_date: 2024-05-08
+    tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   # Snowflake
   - name: snowflake/snowflake-arctic-instruct
@@ -2293,6 +2644,15 @@ models:
     # Does not support echo
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: writer/palmyra-vision-003
+    display_name: Palmyra Vision 003
+    description:  Palmyra Vision 003 (internal only)
+    creator_organization_name: Writer
+    access: limited
+    num_parameters: 5000000000
+    release_date: 2024-05-24
+    # Does not support echo
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
   # Yandex
@@ -2304,3 +2664,65 @@ models:
     num_parameters: 100000000000
     release_date: 2022-06-23
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG]
+  # Reka
+  - name: reka/reka-core
+    display_name: Reka-Core
+    description: Reka-Core
+    creator_organization_name: Reka AI
+    access: limited
+    release_date: 2024-04-18
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: reka/reka-core-20240415
+    display_name: Reka-Core-20240415
+    description: Reka-Core-20240415
+    creator_organization_name: Reka AI
+    access: limited
+    release_date: 2024-04-18
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: reka/reka-core-20240501
+    display_name: Reka-Core-20240501
+    description: Reka-Core-20240501
+    creator_organization_name: Reka AI
+    access: limited
+    release_date: 2024-05-01
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: reka/reka-flash
+    display_name: Reka-Flash (21B)
+    description: Reka-Flash (21B)
+    creator_organization_name: Reka AI
+    access: limited
+    num_parameters: 21000000000
+    release_date: 2024-04-18
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: reka/reka-flash-20240226
+    display_name: Reka-Flash-20240226 (21B)
+    description: Reka-Flash-20240226 (21B)
+    creator_organization_name: Reka AI
+    access: limited
+    num_parameters: 21000000000
+    release_date: 2024-04-18
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: reka/reka-edge
+    display_name: Reka-Edge (7B)
+    description: Reka-Edge (7B)
+    creator_organization_name: Reka AI
+    access: limited
+    num_parameters: 7000000000
+    release_date: 2024-04-18
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: reka/reka-edge-20240208
+    display_name: Reka-Edge-20240208 (7B)
+    description: Reka-Edge-20240208 (7B)
+    creator_organization_name: Reka AI
+    access: limited
+    num_parameters: 7000000000
+    release_date: 2024-04-18
+    tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]

crfm-helm 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl

Potentially problematic release.

crfm-helm 0.5.1py3-none-any.whl → 0.5.2py3-none-any.whl