crfm-helm 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.1.dist-info}/METADATA +7 -3
- {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.1.dist-info}/RECORD +53 -41
- helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
- helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py +7 -0
- helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +2 -0
- helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +1 -1
- helm/benchmark/augmentations/perturbation.py +17 -1
- helm/benchmark/augmentations/test_perturbation.py +30 -0
- helm/benchmark/metrics/efficiency_metrics.py +9 -2
- helm/benchmark/metrics/evaluate_reference_metrics.py +16 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +142 -17
- helm/benchmark/model_metadata_registry.py +5 -1
- helm/benchmark/run_expander.py +35 -63
- helm/benchmark/run_spec_factory.py +11 -10
- helm/benchmark/run_specs/vlm_run_specs.py +294 -38
- helm/benchmark/scenarios/legalbench_scenario.py +6 -2
- helm/benchmark/scenarios/math_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +83 -0
- helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +134 -0
- helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +74 -0
- helm/benchmark/scenarios/vision_language/gqa_scenario.py +91 -0
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +4 -2
- helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/math_vista_scenario.py +117 -0
- helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +103 -0
- helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py +92 -0
- helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py +117 -0
- helm/benchmark/scenarios/vision_language/originality_scenario.py +35 -0
- helm/benchmark/scenarios/vision_language/pairs_scenario.py +246 -0
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +4 -2
- helm/benchmark/static/schema_image2structure.yaml +304 -0
- helm/benchmark/static/schema_vhelm_lite.yaml +164 -0
- helm/benchmark/static/schema_vlm.yaml +257 -10
- helm/benchmark/static_build/assets/index-737eef9e.js +10 -0
- helm/benchmark/static_build/assets/index-878a1094.css +1 -0
- helm/benchmark/static_build/index.html +2 -2
- helm/clients/anthropic_client.py +36 -6
- helm/clients/openai_client.py +2 -3
- helm/clients/together_client.py +93 -2
- helm/clients/vertexai_client.py +59 -50
- helm/clients/vision_language/huggingface_vision2seq_client.py +145 -0
- helm/clients/vision_language/huggingface_vlm_client.py +11 -4
- helm/clients/vision_language/idefics_client.py +2 -2
- helm/common/images_utils.py +10 -3
- helm/config/model_deployments.yaml +100 -2
- helm/config/model_metadata.yaml +136 -31
- helm/config/tokenizer_configs.yaml +7 -0
- helm/benchmark/static_build/assets/index-5088afcb.css +0 -1
- helm/benchmark/static_build/assets/index-d839df55.js +0 -9
- helm/benchmark/test_model_deployment_definition.py +0 -90
- {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.1.dist-info}/LICENSE +0 -0
- {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.1.dist-info}/WHEEL +0 -0
- {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.1.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.1.dist-info}/top_level.txt +0 -0
helm/common/images_utils.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import io
|
|
3
|
+
|
|
3
4
|
import requests
|
|
4
5
|
import shutil
|
|
5
|
-
from typing import List, Optional
|
|
6
|
+
from typing import List, Optional, Tuple
|
|
6
7
|
from urllib.request import urlopen
|
|
7
8
|
|
|
8
9
|
import numpy as np
|
|
@@ -28,6 +29,12 @@ def open_image(image_location: str) -> Image.Image:
|
|
|
28
29
|
return image.convert("RGB")
|
|
29
30
|
|
|
30
31
|
|
|
32
|
+
def get_dimensions(image_location: str) -> Tuple[int, int]:
|
|
33
|
+
"""Returns the dimensions of the image."""
|
|
34
|
+
image: Image.Image = open_image(image_location)
|
|
35
|
+
return image.size
|
|
36
|
+
|
|
37
|
+
|
|
31
38
|
def encode_base64(image_location: str, format="JPEG") -> str:
|
|
32
39
|
"""Returns the base64 representation of an image file."""
|
|
33
40
|
image_file = io.BytesIO()
|
|
@@ -36,7 +43,7 @@ def encode_base64(image_location: str, format="JPEG") -> str:
|
|
|
36
43
|
return base64.b64encode(image_file.getvalue()).decode("ascii")
|
|
37
44
|
|
|
38
45
|
|
|
39
|
-
def copy_image(src: str, dest: str, width: Optional[int] = None, height: Optional[int] = None):
|
|
46
|
+
def copy_image(src: str, dest: str, width: Optional[int] = None, height: Optional[int] = None) -> None:
|
|
40
47
|
"""
|
|
41
48
|
Copies the image file from `src` path to `dest` path. If dimensions `width` and `height`
|
|
42
49
|
are specified, resizes the image before copying. `src` can be a URL.
|
|
@@ -44,7 +51,7 @@ def copy_image(src: str, dest: str, width: Optional[int] = None, height: Optiona
|
|
|
44
51
|
if (width is not None and height is not None) or is_url(src):
|
|
45
52
|
image = open_image(src)
|
|
46
53
|
if width is not None and height is not None:
|
|
47
|
-
image = image.resize((width, height), Image.
|
|
54
|
+
image = image.resize((width, height), Image.Resampling.LANCZOS)
|
|
48
55
|
image.save(dest)
|
|
49
56
|
else:
|
|
50
57
|
shutil.copy(src, dest)
|
|
@@ -436,7 +436,7 @@ model_deployments:
|
|
|
436
436
|
|
|
437
437
|
- name: google/gemini-pro-vision
|
|
438
438
|
model_name: google/gemini-pro-vision
|
|
439
|
-
tokenizer_name:
|
|
439
|
+
tokenizer_name: openai/cl100k_base
|
|
440
440
|
max_sequence_length: 12288
|
|
441
441
|
max_sequence_and_generated_tokens_length: 16384 # Officially max_sequence_length + 4096, in practice max_output_tokens <= 2048 for vision models
|
|
442
442
|
client_spec:
|
|
@@ -709,7 +709,35 @@ model_deployments:
|
|
|
709
709
|
max_sequence_length: 2048
|
|
710
710
|
client_spec:
|
|
711
711
|
class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
|
|
712
|
-
|
|
712
|
+
|
|
713
|
+
- name: huggingface/llava-v1.6-vicuna-7b-hf
|
|
714
|
+
model_name: uw-madison/llava-v1.6-vicuna-7b-hf
|
|
715
|
+
tokenizer_name: hf-internal-testing/llama-tokenizer
|
|
716
|
+
max_sequence_length: 2048
|
|
717
|
+
client_spec:
|
|
718
|
+
class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
|
|
719
|
+
|
|
720
|
+
- name: huggingface/llava-v1.6-vicuna-13b-hf
|
|
721
|
+
model_name: uw-madison/llava-v1.6-vicuna-13b-hf
|
|
722
|
+
tokenizer_name: hf-internal-testing/llama-tokenizer
|
|
723
|
+
max_sequence_length: 2048
|
|
724
|
+
client_spec:
|
|
725
|
+
class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
|
|
726
|
+
|
|
727
|
+
- name: huggingface/llava-v1.6-mistral-7b-hf
|
|
728
|
+
model_name: uw-madison/llava-v1.6-mistral-7b-hf
|
|
729
|
+
tokenizer_name: hf-internal-testing/llama-tokenizer
|
|
730
|
+
max_sequence_length: 2048
|
|
731
|
+
client_spec:
|
|
732
|
+
class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
|
|
733
|
+
|
|
734
|
+
- name: huggingface/llava-v1.6-34b-hf
|
|
735
|
+
model_name: uw-madison/llava-v1.6-34b-hf
|
|
736
|
+
tokenizer_name: hf-internal-testing/llama-tokenizer
|
|
737
|
+
max_sequence_length: 2048
|
|
738
|
+
client_spec:
|
|
739
|
+
class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
|
|
740
|
+
|
|
713
741
|
## OpenFlamingo
|
|
714
742
|
- name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
|
|
715
743
|
model_name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
|
|
@@ -963,6 +991,15 @@ model_deployments:
|
|
|
963
991
|
class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
|
|
964
992
|
|
|
965
993
|
# HuggingFaceM4
|
|
994
|
+
- name: HuggingFaceM4/idefics2-8b
|
|
995
|
+
model_name: HuggingFaceM4/idefics2-8b
|
|
996
|
+
# From https://huggingface.co/docs/transformers/main/en/model_doc/idefics2,
|
|
997
|
+
# "constructs a IDEFICS2 processor which wraps a LLama tokenizer."
|
|
998
|
+
tokenizer_name: hf-internal-testing/llama-tokenizer
|
|
999
|
+
max_sequence_length: 2048
|
|
1000
|
+
client_spec:
|
|
1001
|
+
class_name: "helm.clients.vision_language.huggingface_vision2seq_client.HuggingFaceVision2SeqClient"
|
|
1002
|
+
|
|
966
1003
|
- name: HuggingFaceM4/idefics-9b
|
|
967
1004
|
model_name: HuggingFaceM4/idefics-9b
|
|
968
1005
|
tokenizer_name: HuggingFaceM4/idefics-9b
|
|
@@ -1320,6 +1357,15 @@ model_deployments:
|
|
|
1320
1357
|
client_spec:
|
|
1321
1358
|
class_name: "helm.clients.openai_client.OpenAIClient"
|
|
1322
1359
|
|
|
1360
|
+
- name: openai/gpt-4-1106-vision-preview
|
|
1361
|
+
model_name: openai/gpt-4-1106-vision-preview
|
|
1362
|
+
tokenizer_name: openai/cl100k_base
|
|
1363
|
+
max_sequence_length: 128000 # According to https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
|
|
1364
|
+
max_request_length: 128001
|
|
1365
|
+
max_sequence_and_generated_tokens_length: 132096
|
|
1366
|
+
client_spec:
|
|
1367
|
+
class_name: "helm.clients.openai_client.OpenAIClient"
|
|
1368
|
+
|
|
1323
1369
|
## Codex Models
|
|
1324
1370
|
# DEPRECATED: Codex models have been shut down on March 23 2023.
|
|
1325
1371
|
|
|
@@ -1589,6 +1635,24 @@ model_deployments:
|
|
|
1589
1635
|
args:
|
|
1590
1636
|
together_model: meta-llama/Meta-Llama-3-70B
|
|
1591
1637
|
|
|
1638
|
+
- name: together/llama-3-8b-chat
|
|
1639
|
+
model_name: meta/llama-3-8b-chat
|
|
1640
|
+
tokenizer_name: meta/llama-3-8b
|
|
1641
|
+
max_sequence_length: 8191
|
|
1642
|
+
client_spec:
|
|
1643
|
+
class_name: "helm.clients.together_client.TogetherClient"
|
|
1644
|
+
args:
|
|
1645
|
+
together_model: meta-llama/Meta-Llama-3-8B
|
|
1646
|
+
|
|
1647
|
+
- name: together/llama-3-70b-chat
|
|
1648
|
+
model_name: meta/llama-3-70b-chat
|
|
1649
|
+
tokenizer_name: meta/llama-3-8b
|
|
1650
|
+
max_sequence_length: 8191
|
|
1651
|
+
client_spec:
|
|
1652
|
+
class_name: "helm.clients.together_client.TogetherClient"
|
|
1653
|
+
args:
|
|
1654
|
+
together_model: meta-llama/Meta-Llama-3-70B
|
|
1655
|
+
|
|
1592
1656
|
# 01.AI
|
|
1593
1657
|
- name: together/yi-6b
|
|
1594
1658
|
model_name: 01-ai/yi-6b
|
|
@@ -1608,6 +1672,24 @@ model_deployments:
|
|
|
1608
1672
|
args:
|
|
1609
1673
|
together_model: zero-one-ai/Yi-34B
|
|
1610
1674
|
|
|
1675
|
+
- name: together/yi-6b-chat
|
|
1676
|
+
model_name: 01-ai/yi-6b-chat
|
|
1677
|
+
tokenizer_name: 01-ai/Yi-6B
|
|
1678
|
+
max_sequence_length: 4095
|
|
1679
|
+
client_spec:
|
|
1680
|
+
class_name: "helm.clients.together_client.TogetherClient"
|
|
1681
|
+
args:
|
|
1682
|
+
together_model: zero-one-ai/Yi-6B
|
|
1683
|
+
|
|
1684
|
+
- name: together/yi-34b-chat
|
|
1685
|
+
model_name: 01-ai/yi-34b-chat
|
|
1686
|
+
tokenizer_name: 01-ai/Yi-6B
|
|
1687
|
+
max_sequence_length: 4095
|
|
1688
|
+
client_spec:
|
|
1689
|
+
class_name: "helm.clients.together_client.TogetherClient"
|
|
1690
|
+
args:
|
|
1691
|
+
together_model: zero-one-ai/Yi-34B
|
|
1692
|
+
|
|
1611
1693
|
|
|
1612
1694
|
# Allen Institute for AI
|
|
1613
1695
|
- name: together/olmo-7b
|
|
@@ -1665,6 +1747,22 @@ model_deployments:
|
|
|
1665
1747
|
client_spec:
|
|
1666
1748
|
class_name: "helm.clients.together_client.TogetherClient"
|
|
1667
1749
|
|
|
1750
|
+
- name: together/mixtral-8x22b-instruct-v0.1
|
|
1751
|
+
model_name: mistralai/mixtral-8x22b-instruct-v0.1
|
|
1752
|
+
tokenizer_name: mistralai/Mistral-7B-v0.1
|
|
1753
|
+
max_sequence_length: 65535
|
|
1754
|
+
client_spec:
|
|
1755
|
+
class_name: "helm.clients.together_client.TogetherClient"
|
|
1756
|
+
|
|
1757
|
+
|
|
1758
|
+
## Snowflake
|
|
1759
|
+
- name: together/snowflake-arctic-instruct
|
|
1760
|
+
model_name: snowflake/snowflake-arctic-instruct
|
|
1761
|
+
tokenizer_name: snowflake/snowflake-arctic-instruct
|
|
1762
|
+
max_sequence_length: 4000 # Lower than 4096 because of chat tokens
|
|
1763
|
+
client_spec:
|
|
1764
|
+
class_name: "helm.clients.together_client.TogetherChatClient"
|
|
1765
|
+
|
|
1668
1766
|
## Stanford
|
|
1669
1767
|
- name: together/alpaca-7b
|
|
1670
1768
|
model_name: stanford/alpaca-7b
|
helm/config/model_metadata.yaml
CHANGED
|
@@ -189,7 +189,7 @@ models:
|
|
|
189
189
|
|
|
190
190
|
# Anthropic
|
|
191
191
|
- name: anthropic/claude-v1.3
|
|
192
|
-
display_name:
|
|
192
|
+
display_name: Claude v1.3
|
|
193
193
|
description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
|
|
194
194
|
creator_organization_name: Anthropic
|
|
195
195
|
access: limited
|
|
@@ -198,7 +198,7 @@ models:
|
|
|
198
198
|
tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
199
199
|
|
|
200
200
|
- name: anthropic/claude-instant-v1
|
|
201
|
-
display_name:
|
|
201
|
+
display_name: Claude Instant V1
|
|
202
202
|
description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)).
|
|
203
203
|
creator_organization_name: Anthropic
|
|
204
204
|
access: limited
|
|
@@ -206,7 +206,7 @@ models:
|
|
|
206
206
|
tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
207
207
|
|
|
208
208
|
- name: anthropic/claude-instant-1.2
|
|
209
|
-
display_name:
|
|
209
|
+
display_name: Claude Instant 1.2
|
|
210
210
|
description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)).
|
|
211
211
|
creator_organization_name: Anthropic
|
|
212
212
|
access: limited
|
|
@@ -214,7 +214,7 @@ models:
|
|
|
214
214
|
tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
215
215
|
|
|
216
216
|
- name: anthropic/claude-2.0
|
|
217
|
-
display_name:
|
|
217
|
+
display_name: Claude 2.0
|
|
218
218
|
description: Claude 2.0 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf))
|
|
219
219
|
creator_organization_name: Anthropic
|
|
220
220
|
access: limited
|
|
@@ -222,7 +222,7 @@ models:
|
|
|
222
222
|
tags: [ANTHROPIC_CLAUDE_2_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
223
223
|
|
|
224
224
|
- name: anthropic/claude-2.1
|
|
225
|
-
display_name:
|
|
225
|
+
display_name: Claude 2.1
|
|
226
226
|
description: Claude 2.1 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf))
|
|
227
227
|
creator_organization_name: Anthropic
|
|
228
228
|
access: limited
|
|
@@ -231,7 +231,7 @@ models:
|
|
|
231
231
|
|
|
232
232
|
- name: anthropic/claude-3-haiku-20240307
|
|
233
233
|
display_name: Claude 3 Haiku (20240307)
|
|
234
|
-
description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
|
|
234
|
+
description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
|
|
235
235
|
creator_organization_name: Anthropic
|
|
236
236
|
access: limited
|
|
237
237
|
release_date: 2024-03-13 # https://www.anthropic.com/news/claude-3-haiku
|
|
@@ -239,7 +239,7 @@ models:
|
|
|
239
239
|
|
|
240
240
|
- name: anthropic/claude-3-sonnet-20240229
|
|
241
241
|
display_name: Claude 3 Sonnet (20240229)
|
|
242
|
-
description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
|
|
242
|
+
description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
|
|
243
243
|
creator_organization_name: Anthropic
|
|
244
244
|
access: limited
|
|
245
245
|
release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
|
|
@@ -247,9 +247,9 @@ models:
|
|
|
247
247
|
|
|
248
248
|
- name: anthropic/claude-3-opus-20240229
|
|
249
249
|
display_name: Claude 3 Opus (20240229)
|
|
250
|
-
description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
|
|
251
|
-
creator_organization_name: Anthropic
|
|
250
|
+
description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
|
|
252
251
|
access: limited
|
|
252
|
+
creator_organization_name: Anthropic
|
|
253
253
|
release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
|
|
254
254
|
tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
255
255
|
|
|
@@ -534,7 +534,7 @@ models:
|
|
|
534
534
|
access: open
|
|
535
535
|
num_parameters: 132000000000
|
|
536
536
|
release_date: 2024-03-27
|
|
537
|
-
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
537
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
538
538
|
|
|
539
539
|
|
|
540
540
|
# DeepMind
|
|
@@ -559,8 +559,8 @@ models:
|
|
|
559
559
|
|
|
560
560
|
# Deepseek
|
|
561
561
|
- name: deepseek-ai/deepseek-llm-67b-chat
|
|
562
|
-
display_name: DeepSeek Chat (67B)
|
|
563
|
-
description: DeepSeek Chat is a open-source language model trained on 2 trillion tokens in both English and Chinese, and fine-tuned supervised fine-tuning (SFT) and Direct Preference Optimization (DPO). ([paper](https://arxiv.org/abs/2401.02954))
|
|
562
|
+
display_name: DeepSeek LLM Chat (67B)
|
|
563
|
+
description: DeepSeek LLM Chat is a open-source language model trained on 2 trillion tokens in both English and Chinese, and fine-tuned supervised fine-tuning (SFT) and Direct Preference Optimization (DPO). ([paper](https://arxiv.org/abs/2401.02954))
|
|
564
564
|
creator_organization_name: DeepSeek
|
|
565
565
|
access: open
|
|
566
566
|
num_parameters: 67000000000
|
|
@@ -670,7 +670,7 @@ models:
|
|
|
670
670
|
creator_organization_name: Google
|
|
671
671
|
access: limited
|
|
672
672
|
release_date: 2023-12-13
|
|
673
|
-
tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
673
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
674
674
|
|
|
675
675
|
- name: google/gemini-1.0-pro-001
|
|
676
676
|
display_name: Gemini 1.0 Pro
|
|
@@ -678,7 +678,7 @@ models:
|
|
|
678
678
|
creator_organization_name: Google
|
|
679
679
|
access: limited
|
|
680
680
|
release_date: 2023-12-13
|
|
681
|
-
tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
681
|
+
tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
682
682
|
|
|
683
683
|
# Note: This is aliased to a snapshot of gemini-pro-vision. When possible, please use a versioned snapshot instead.
|
|
684
684
|
- name: google/gemini-pro-vision
|
|
@@ -695,15 +695,15 @@ models:
|
|
|
695
695
|
creator_organization_name: Google
|
|
696
696
|
access: limited
|
|
697
697
|
release_date: 2023-12-13
|
|
698
|
-
tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
698
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, GOOGLE_GEMINI_PRO_VISION_V1_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
699
699
|
|
|
700
700
|
- name: google/gemini-1.5-pro-preview-0409
|
|
701
|
-
display_name: Gemini 1.5 Pro
|
|
701
|
+
display_name: Gemini 1.5 Pro (0409 preview)
|
|
702
702
|
description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. ([paper](https://arxiv.org/abs/2403.05530))
|
|
703
703
|
creator_organization_name: Google
|
|
704
704
|
access: limited
|
|
705
705
|
release_date: 2024-04-10
|
|
706
|
-
tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
706
|
+
tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
707
707
|
|
|
708
708
|
- name: google/gemma-2b
|
|
709
709
|
display_name: Gemma (2B)
|
|
@@ -801,9 +801,18 @@ models:
|
|
|
801
801
|
|
|
802
802
|
|
|
803
803
|
# HuggingFace
|
|
804
|
+
- name: HuggingFaceM4/idefics2-8b
|
|
805
|
+
display_name: IDEFICS 2 (8B)
|
|
806
|
+
description: IDEFICS 2 (8B parameters) is an open multimodal model that accepts arbitrary sequences of image and text inputs and produces text outputs. ([blog](https://huggingface.co/blog/idefics2)).
|
|
807
|
+
creator_organization_name: HuggingFace
|
|
808
|
+
access: open
|
|
809
|
+
num_parameters: 8000000000
|
|
810
|
+
release_date: 2024-04-15
|
|
811
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
812
|
+
|
|
804
813
|
- name: HuggingFaceM4/idefics-9b
|
|
805
814
|
display_name: IDEFICS (9B)
|
|
806
|
-
description: IDEFICS (9B parameters) is an open-source model based on DeepMind's Flamingo
|
|
815
|
+
description: IDEFICS (9B parameters) is an open-source model based on DeepMind's Flamingo ([blog](https://huggingface.co/blog/idefics)).
|
|
807
816
|
creator_organization_name: HuggingFace
|
|
808
817
|
access: open
|
|
809
818
|
num_parameters: 9000000000
|
|
@@ -811,8 +820,8 @@ models:
|
|
|
811
820
|
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
812
821
|
|
|
813
822
|
- name: HuggingFaceM4/idefics-9b-instruct
|
|
814
|
-
display_name: IDEFICS
|
|
815
|
-
description: IDEFICS
|
|
823
|
+
display_name: IDEFICS-instruct (9B)
|
|
824
|
+
description: IDEFICS-instruct (9B parameters) is the instruction-tuned version of IDEFICS 9B ([blog](https://huggingface.co/blog/idefics)).
|
|
816
825
|
creator_organization_name: HuggingFace
|
|
817
826
|
access: open
|
|
818
827
|
num_parameters: 9000000000
|
|
@@ -821,7 +830,7 @@ models:
|
|
|
821
830
|
|
|
822
831
|
- name: HuggingFaceM4/idefics-80b
|
|
823
832
|
display_name: IDEFICS (80B)
|
|
824
|
-
description: IDEFICS (80B parameters) is an open-source model based on DeepMind's Flamingo
|
|
833
|
+
description: IDEFICS (80B parameters) is an open-source model based on DeepMind's Flamingo ([blog](https://huggingface.co/blog/idefics)).
|
|
825
834
|
creator_organization_name: HuggingFace
|
|
826
835
|
access: open
|
|
827
836
|
num_parameters: 80000000000
|
|
@@ -829,8 +838,8 @@ models:
|
|
|
829
838
|
tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
830
839
|
|
|
831
840
|
- name: HuggingFaceM4/idefics-80b-instruct
|
|
832
|
-
display_name: IDEFICS
|
|
833
|
-
description: IDEFICS
|
|
841
|
+
display_name: IDEFICS-instruct (80B)
|
|
842
|
+
description: IDEFICS-instruct (80B parameters) is the instruction-tuned version of IDEFICS 80B ([blog](https://huggingface.co/blog/idefics)).
|
|
834
843
|
creator_organization_name: HuggingFace
|
|
835
844
|
access: open
|
|
836
845
|
num_parameters: 80000000000
|
|
@@ -1210,6 +1219,24 @@ models:
|
|
|
1210
1219
|
release_date: 2024-04-18
|
|
1211
1220
|
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1212
1221
|
|
|
1222
|
+
- name: meta/llama-3-8b-chat
|
|
1223
|
+
display_name: Llama 3 Chat (8B)
|
|
1224
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training.
|
|
1225
|
+
creator_organization_name: Meta
|
|
1226
|
+
access: open
|
|
1227
|
+
num_parameters: 8000000000
|
|
1228
|
+
release_date: 2024-04-18
|
|
1229
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1230
|
+
|
|
1231
|
+
- name: meta/llama-3-70b-chat
|
|
1232
|
+
display_name: Llama 3 Chat (70B)
|
|
1233
|
+
description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training.
|
|
1234
|
+
creator_organization_name: Meta
|
|
1235
|
+
access: open
|
|
1236
|
+
num_parameters: 70000000000
|
|
1237
|
+
release_date: 2024-04-18
|
|
1238
|
+
tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1239
|
+
|
|
1213
1240
|
|
|
1214
1241
|
# Microsoft/NVIDIA
|
|
1215
1242
|
- name: microsoft/TNLGv2_530B
|
|
@@ -1247,11 +1274,46 @@ models:
|
|
|
1247
1274
|
num_parameters: 13000000000
|
|
1248
1275
|
release_date: 2023-10-05
|
|
1249
1276
|
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1250
|
-
|
|
1277
|
+
|
|
1278
|
+
- name: uw-madison/llava-v1.6-vicuna-7b-hf
|
|
1279
|
+
display_name: LLaVA 1.6 (7B)
|
|
1280
|
+
description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
|
|
1281
|
+
creator_organization_name: Microsoft
|
|
1282
|
+
access: open
|
|
1283
|
+
num_parameters: 7000000000
|
|
1284
|
+
release_date: 2024-01-01
|
|
1285
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1286
|
+
|
|
1287
|
+
- name: uw-madison/llava-v1.6-vicuna-13b-hf
|
|
1288
|
+
display_name: LLaVA 1.6 (13B)
|
|
1289
|
+
description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
|
|
1290
|
+
creator_organization_name: Microsoft
|
|
1291
|
+
access: open
|
|
1292
|
+
num_parameters: 13000000000
|
|
1293
|
+
release_date: 2024-01-01
|
|
1294
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1295
|
+
|
|
1296
|
+
- name: uw-madison/llava-v1.6-mistral-7b-hf
|
|
1297
|
+
display_name: LLaVA 1.6 + Mistral (7B)
|
|
1298
|
+
description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
|
|
1299
|
+
creator_organization_name: Microsoft
|
|
1300
|
+
access: open
|
|
1301
|
+
num_parameters: 7000000000
|
|
1302
|
+
release_date: 2024-01-01
|
|
1303
|
+
tags: [ VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG ]
|
|
1304
|
+
|
|
1305
|
+
- name: uw-madison/llava-v1.6-34b-hf
|
|
1306
|
+
display_name: LLaVA + Nous-Hermes-2-Yi-34B (34B)
|
|
1307
|
+
description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
|
|
1308
|
+
creator_organization_name: Microsoft
|
|
1309
|
+
access: open
|
|
1310
|
+
num_parameters: 34000000000
|
|
1311
|
+
release_date: 2024-01-01
|
|
1312
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
|
|
1251
1313
|
|
|
1252
1314
|
- name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
|
|
1253
1315
|
display_name: OpenFlamingo (9B)
|
|
1254
|
-
description: OpenFlamingo is an open source implementation of DeepMind's Flamingo models. This 9B-parameter model uses a CLIP ViT-L/14 vision encoder and MPT-7B language model
|
|
1316
|
+
description: OpenFlamingo is an open source implementation of DeepMind's Flamingo models. This 9B-parameter model uses a CLIP ViT-L/14 vision encoder and MPT-7B language model ([paper](https://arxiv.org/abs/2308.01390)).
|
|
1255
1317
|
creator_organization_name: OpenFlamingo
|
|
1256
1318
|
access: open
|
|
1257
1319
|
num_parameters: 9000000000
|
|
@@ -1286,7 +1348,22 @@ models:
|
|
|
1286
1348
|
num_parameters: 34000000000
|
|
1287
1349
|
release_date: 2023-11-02
|
|
1288
1350
|
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1289
|
-
|
|
1351
|
+
- name: 01-ai/yi-6b-chat
|
|
1352
|
+
display_name: Yi Chat (6B)
|
|
1353
|
+
description: The Yi models are large language models trained from scratch by developers at 01.AI.
|
|
1354
|
+
creator_organization_name: 01.AI
|
|
1355
|
+
access: open
|
|
1356
|
+
num_parameters: 6000000000
|
|
1357
|
+
release_date: 2023-11-23
|
|
1358
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1359
|
+
- name: 01-ai/yi-34b-chat
|
|
1360
|
+
display_name: Yi Chat (34B)
|
|
1361
|
+
description: The Yi models are large language models trained from scratch by developers at 01.AI.
|
|
1362
|
+
creator_organization_name: 01.AI
|
|
1363
|
+
access: open
|
|
1364
|
+
num_parameters: 34000000000
|
|
1365
|
+
release_date: 2023-11-23
|
|
1366
|
+
tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
|
|
1290
1367
|
|
|
1291
1368
|
# Allen Institute for AI
|
|
1292
1369
|
# OLMo Blog: https://blog.allenai.org/olmo-open-language-model-87ccfc95f580
|
|
@@ -1350,7 +1427,16 @@ models:
|
|
|
1350
1427
|
|
|
1351
1428
|
- name: mistralai/mixtral-8x22b
|
|
1352
1429
|
display_name: Mixtral (8x22B)
|
|
1353
|
-
description: Mistral AI's mixture-of-experts model ([
|
|
1430
|
+
description: Mistral AI's mixture-of-experts model that uses 39B active parameters out of 141B ([blog post](https://mistral.ai/news/mixtral-8x22b/)).
|
|
1431
|
+
creator_organization_name: Mistral AI
|
|
1432
|
+
access: open
|
|
1433
|
+
num_parameters: 176000000000
|
|
1434
|
+
release_date: 2024-04-10
|
|
1435
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1436
|
+
|
|
1437
|
+
- name: mistralai/mixtral-8x22b-instruct-v0.1
|
|
1438
|
+
display_name: Mixtral Instruct (8x22B)
|
|
1439
|
+
description: Mistral AI's mixture-of-experts model that uses 39B active parameters out of 141B ([blog post](https://mistral.ai/news/mixtral-8x22b/)).
|
|
1354
1440
|
creator_organization_name: Mistral AI
|
|
1355
1441
|
access: open
|
|
1356
1442
|
num_parameters: 176000000000
|
|
@@ -1721,8 +1807,17 @@ models:
|
|
|
1721
1807
|
tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
1722
1808
|
|
|
1723
1809
|
- name: openai/gpt-4-vision-preview
|
|
1724
|
-
|
|
1725
|
-
|
|
1810
|
+
# According to https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4, this model has pointed gpt-4-1106-vision-preview.
|
|
1811
|
+
display_name: GPT-4V (1106 preview)
|
|
1812
|
+
description: GPT-4V is a large multimodal model that accepts both text and images and is optimized for chat ([model card](https://openai.com/research/gpt-4v-system-card)).
|
|
1813
|
+
creator_organization_name: OpenAI
|
|
1814
|
+
access: limited
|
|
1815
|
+
release_date: 2023-11-06
|
|
1816
|
+
tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
|
|
1817
|
+
|
|
1818
|
+
- name: openai/gpt-4-1106-vision-preview
|
|
1819
|
+
display_name: GPT-4V (1106 preview)
|
|
1820
|
+
description: GPT-4V is a large multimodal model that accepts both text and images and is optimized for chat ([model card](https://openai.com/research/gpt-4v-system-card)).
|
|
1726
1821
|
creator_organization_name: OpenAI
|
|
1727
1822
|
access: limited
|
|
1728
1823
|
release_date: 2023-11-06
|
|
@@ -1898,7 +1993,7 @@ models:
|
|
|
1898
1993
|
|
|
1899
1994
|
- name: qwen/qwen-vl
|
|
1900
1995
|
display_name: Qwen-VL
|
|
1901
|
-
description: Visual multimodal version of the large model series ([paper](https://arxiv.org/abs/2308.12966)).
|
|
1996
|
+
description: Visual multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2308.12966)).
|
|
1902
1997
|
creator_organization_name: Alibaba Cloud
|
|
1903
1998
|
access: open
|
|
1904
1999
|
release_date: 2023-08-24
|
|
@@ -1906,7 +2001,7 @@ models:
|
|
|
1906
2001
|
|
|
1907
2002
|
- name: qwen/qwen-vl-chat
|
|
1908
2003
|
display_name: Qwen-VL Chat
|
|
1909
|
-
description: Chat version of
|
|
2004
|
+
description: Chat version of Qwen-VL ([paper](https://arxiv.org/abs/2308.12966)).
|
|
1910
2005
|
creator_organization_name: Alibaba Cloud
|
|
1911
2006
|
access: open
|
|
1912
2007
|
release_date: 2023-08-24
|
|
@@ -1923,6 +2018,16 @@ models:
|
|
|
1923
2018
|
tags: [] # TODO: add tags
|
|
1924
2019
|
|
|
1925
2020
|
|
|
2021
|
+
# Snowflake
|
|
2022
|
+
- name: snowflake/snowflake-arctic-instruct
|
|
2023
|
+
display_name: Arctic Instruct
|
|
2024
|
+
description: Arctic combines a 10B dense transformer model with a residual 128x3.66B MoE MLP resulting in 480B total and 17B active parameters chosen using a top-2 gating.
|
|
2025
|
+
creator_organization_name: Snowflake
|
|
2026
|
+
access: open
|
|
2027
|
+
num_parameters: 482000000000
|
|
2028
|
+
release_date: 2024-04-24
|
|
2029
|
+
tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
|
|
2030
|
+
|
|
1926
2031
|
|
|
1927
2032
|
# Stability AI
|
|
1928
2033
|
- name: stabilityai/stablelm-base-alpha-3b
|
|
@@ -331,6 +331,13 @@ tokenizer_configs:
|
|
|
331
331
|
end_of_text_token: "<|endoftext|>"
|
|
332
332
|
prefix_token: ""
|
|
333
333
|
|
|
334
|
+
# Snowflake
|
|
335
|
+
- name: snowflake/snowflake-arctic-instruct
|
|
336
|
+
tokenizer_spec:
|
|
337
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
338
|
+
end_of_text_token: "<|im_end|>"
|
|
339
|
+
prefix_token: "<|im_start|>"
|
|
340
|
+
|
|
334
341
|
# Tiiuae
|
|
335
342
|
- name: tiiuae/falcon-7b
|
|
336
343
|
tokenizer_spec:
|