crfm-helm 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (56) hide show
  1. {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.1.dist-info}/METADATA +7 -3
  2. {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.1.dist-info}/RECORD +53 -41
  3. helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
  4. helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py +7 -0
  5. helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +2 -0
  6. helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +1 -1
  7. helm/benchmark/augmentations/perturbation.py +17 -1
  8. helm/benchmark/augmentations/test_perturbation.py +30 -0
  9. helm/benchmark/metrics/efficiency_metrics.py +9 -2
  10. helm/benchmark/metrics/evaluate_reference_metrics.py +16 -0
  11. helm/benchmark/metrics/vision_language/image_metrics.py +142 -17
  12. helm/benchmark/model_metadata_registry.py +5 -1
  13. helm/benchmark/run_expander.py +35 -63
  14. helm/benchmark/run_spec_factory.py +11 -10
  15. helm/benchmark/run_specs/vlm_run_specs.py +294 -38
  16. helm/benchmark/scenarios/legalbench_scenario.py +6 -2
  17. helm/benchmark/scenarios/math_scenario.py +1 -1
  18. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +83 -0
  19. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +134 -0
  20. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +74 -0
  21. helm/benchmark/scenarios/vision_language/gqa_scenario.py +91 -0
  22. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +4 -2
  23. helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +1 -1
  24. helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +1 -1
  25. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +117 -0
  26. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +103 -0
  27. helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py +92 -0
  28. helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py +117 -0
  29. helm/benchmark/scenarios/vision_language/originality_scenario.py +35 -0
  30. helm/benchmark/scenarios/vision_language/pairs_scenario.py +246 -0
  31. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +2 -2
  32. helm/benchmark/scenarios/vision_language/vqa_scenario.py +4 -2
  33. helm/benchmark/static/schema_image2structure.yaml +304 -0
  34. helm/benchmark/static/schema_vhelm_lite.yaml +164 -0
  35. helm/benchmark/static/schema_vlm.yaml +257 -10
  36. helm/benchmark/static_build/assets/index-737eef9e.js +10 -0
  37. helm/benchmark/static_build/assets/index-878a1094.css +1 -0
  38. helm/benchmark/static_build/index.html +2 -2
  39. helm/clients/anthropic_client.py +36 -6
  40. helm/clients/openai_client.py +2 -3
  41. helm/clients/together_client.py +93 -2
  42. helm/clients/vertexai_client.py +59 -50
  43. helm/clients/vision_language/huggingface_vision2seq_client.py +145 -0
  44. helm/clients/vision_language/huggingface_vlm_client.py +11 -4
  45. helm/clients/vision_language/idefics_client.py +2 -2
  46. helm/common/images_utils.py +10 -3
  47. helm/config/model_deployments.yaml +100 -2
  48. helm/config/model_metadata.yaml +136 -31
  49. helm/config/tokenizer_configs.yaml +7 -0
  50. helm/benchmark/static_build/assets/index-5088afcb.css +0 -1
  51. helm/benchmark/static_build/assets/index-d839df55.js +0 -9
  52. helm/benchmark/test_model_deployment_definition.py +0 -90
  53. {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.1.dist-info}/LICENSE +0 -0
  54. {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.1.dist-info}/WHEEL +0 -0
  55. {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.1.dist-info}/entry_points.txt +0 -0
  56. {crfm_helm-0.5.0.dist-info → crfm_helm-0.5.1.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,9 @@
1
1
  import base64
2
2
  import io
3
+
3
4
  import requests
4
5
  import shutil
5
- from typing import List, Optional
6
+ from typing import List, Optional, Tuple
6
7
  from urllib.request import urlopen
7
8
 
8
9
  import numpy as np
@@ -28,6 +29,12 @@ def open_image(image_location: str) -> Image.Image:
28
29
  return image.convert("RGB")
29
30
 
30
31
 
32
+ def get_dimensions(image_location: str) -> Tuple[int, int]:
33
+ """Returns the dimensions of the image."""
34
+ image: Image.Image = open_image(image_location)
35
+ return image.size
36
+
37
+
31
38
  def encode_base64(image_location: str, format="JPEG") -> str:
32
39
  """Returns the base64 representation of an image file."""
33
40
  image_file = io.BytesIO()
@@ -36,7 +43,7 @@ def encode_base64(image_location: str, format="JPEG") -> str:
36
43
  return base64.b64encode(image_file.getvalue()).decode("ascii")
37
44
 
38
45
 
39
- def copy_image(src: str, dest: str, width: Optional[int] = None, height: Optional[int] = None):
46
+ def copy_image(src: str, dest: str, width: Optional[int] = None, height: Optional[int] = None) -> None:
40
47
  """
41
48
  Copies the image file from `src` path to `dest` path. If dimensions `width` and `height`
42
49
  are specified, resizes the image before copying. `src` can be a URL.
@@ -44,7 +51,7 @@ def copy_image(src: str, dest: str, width: Optional[int] = None, height: Optiona
44
51
  if (width is not None and height is not None) or is_url(src):
45
52
  image = open_image(src)
46
53
  if width is not None and height is not None:
47
- image = image.resize((width, height), Image.ANTIALIAS)
54
+ image = image.resize((width, height), Image.Resampling.LANCZOS)
48
55
  image.save(dest)
49
56
  else:
50
57
  shutil.copy(src, dest)
@@ -436,7 +436,7 @@ model_deployments:
436
436
 
437
437
  - name: google/gemini-pro-vision
438
438
  model_name: google/gemini-pro-vision
439
- tokenizer_name: google/gemma-2b # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
439
+ tokenizer_name: openai/cl100k_base
440
440
  max_sequence_length: 12288
441
441
  max_sequence_and_generated_tokens_length: 16384 # Officially max_sequence_length + 4096, in practice max_output_tokens <= 2048 for vision models
442
442
  client_spec:
@@ -709,7 +709,35 @@ model_deployments:
709
709
  max_sequence_length: 2048
710
710
  client_spec:
711
711
  class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
712
-
712
+
713
+ - name: huggingface/llava-v1.6-vicuna-7b-hf
714
+ model_name: uw-madison/llava-v1.6-vicuna-7b-hf
715
+ tokenizer_name: hf-internal-testing/llama-tokenizer
716
+ max_sequence_length: 2048
717
+ client_spec:
718
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
719
+
720
+ - name: huggingface/llava-v1.6-vicuna-13b-hf
721
+ model_name: uw-madison/llava-v1.6-vicuna-13b-hf
722
+ tokenizer_name: hf-internal-testing/llama-tokenizer
723
+ max_sequence_length: 2048
724
+ client_spec:
725
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
726
+
727
+ - name: huggingface/llava-v1.6-mistral-7b-hf
728
+ model_name: uw-madison/llava-v1.6-mistral-7b-hf
729
+ tokenizer_name: hf-internal-testing/llama-tokenizer
730
+ max_sequence_length: 2048
731
+ client_spec:
732
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
733
+
734
+ - name: huggingface/llava-v1.6-34b-hf
735
+ model_name: uw-madison/llava-v1.6-34b-hf
736
+ tokenizer_name: hf-internal-testing/llama-tokenizer
737
+ max_sequence_length: 2048
738
+ client_spec:
739
+ class_name: "helm.clients.vision_language.huggingface_vlm_client.HuggingFaceVLMClient"
740
+
713
741
  ## OpenFlamingo
714
742
  - name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
715
743
  model_name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
@@ -963,6 +991,15 @@ model_deployments:
963
991
  class_name: "helm.benchmark.window_services.image_generation.clip_window_service.CLIPWindowService"
964
992
 
965
993
  # HuggingFaceM4
994
+ - name: HuggingFaceM4/idefics2-8b
995
+ model_name: HuggingFaceM4/idefics2-8b
996
+ # From https://huggingface.co/docs/transformers/main/en/model_doc/idefics2,
997
+ # "constructs a IDEFICS2 processor which wraps a LLama tokenizer."
998
+ tokenizer_name: hf-internal-testing/llama-tokenizer
999
+ max_sequence_length: 2048
1000
+ client_spec:
1001
+ class_name: "helm.clients.vision_language.huggingface_vision2seq_client.HuggingFaceVision2SeqClient"
1002
+
966
1003
  - name: HuggingFaceM4/idefics-9b
967
1004
  model_name: HuggingFaceM4/idefics-9b
968
1005
  tokenizer_name: HuggingFaceM4/idefics-9b
@@ -1320,6 +1357,15 @@ model_deployments:
1320
1357
  client_spec:
1321
1358
  class_name: "helm.clients.openai_client.OpenAIClient"
1322
1359
 
1360
+ - name: openai/gpt-4-1106-vision-preview
1361
+ model_name: openai/gpt-4-1106-vision-preview
1362
+ tokenizer_name: openai/cl100k_base
1363
+ max_sequence_length: 128000 # According to https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
1364
+ max_request_length: 128001
1365
+ max_sequence_and_generated_tokens_length: 132096
1366
+ client_spec:
1367
+ class_name: "helm.clients.openai_client.OpenAIClient"
1368
+
1323
1369
  ## Codex Models
1324
1370
  # DEPRECATED: Codex models have been shut down on March 23 2023.
1325
1371
 
@@ -1589,6 +1635,24 @@ model_deployments:
1589
1635
  args:
1590
1636
  together_model: meta-llama/Meta-Llama-3-70B
1591
1637
 
1638
+ - name: together/llama-3-8b-chat
1639
+ model_name: meta/llama-3-8b-chat
1640
+ tokenizer_name: meta/llama-3-8b
1641
+ max_sequence_length: 8191
1642
+ client_spec:
1643
+ class_name: "helm.clients.together_client.TogetherClient"
1644
+ args:
1645
+ together_model: meta-llama/Meta-Llama-3-8B
1646
+
1647
+ - name: together/llama-3-70b-chat
1648
+ model_name: meta/llama-3-70b-chat
1649
+ tokenizer_name: meta/llama-3-8b
1650
+ max_sequence_length: 8191
1651
+ client_spec:
1652
+ class_name: "helm.clients.together_client.TogetherClient"
1653
+ args:
1654
+ together_model: meta-llama/Meta-Llama-3-70B
1655
+
1592
1656
  # 01.AI
1593
1657
  - name: together/yi-6b
1594
1658
  model_name: 01-ai/yi-6b
@@ -1608,6 +1672,24 @@ model_deployments:
1608
1672
  args:
1609
1673
  together_model: zero-one-ai/Yi-34B
1610
1674
 
1675
+ - name: together/yi-6b-chat
1676
+ model_name: 01-ai/yi-6b-chat
1677
+ tokenizer_name: 01-ai/Yi-6B
1678
+ max_sequence_length: 4095
1679
+ client_spec:
1680
+ class_name: "helm.clients.together_client.TogetherClient"
1681
+ args:
1682
+ together_model: zero-one-ai/Yi-6B
1683
+
1684
+ - name: together/yi-34b-chat
1685
+ model_name: 01-ai/yi-34b-chat
1686
+ tokenizer_name: 01-ai/Yi-6B
1687
+ max_sequence_length: 4095
1688
+ client_spec:
1689
+ class_name: "helm.clients.together_client.TogetherClient"
1690
+ args:
1691
+ together_model: zero-one-ai/Yi-34B
1692
+
1611
1693
 
1612
1694
  # Allen Institute for AI
1613
1695
  - name: together/olmo-7b
@@ -1665,6 +1747,22 @@ model_deployments:
1665
1747
  client_spec:
1666
1748
  class_name: "helm.clients.together_client.TogetherClient"
1667
1749
 
1750
+ - name: together/mixtral-8x22b-instruct-v0.1
1751
+ model_name: mistralai/mixtral-8x22b-instruct-v0.1
1752
+ tokenizer_name: mistralai/Mistral-7B-v0.1
1753
+ max_sequence_length: 65535
1754
+ client_spec:
1755
+ class_name: "helm.clients.together_client.TogetherClient"
1756
+
1757
+
1758
+ ## Snowflake
1759
+ - name: together/snowflake-arctic-instruct
1760
+ model_name: snowflake/snowflake-arctic-instruct
1761
+ tokenizer_name: snowflake/snowflake-arctic-instruct
1762
+ max_sequence_length: 4000 # Lower than 4096 because of chat tokens
1763
+ client_spec:
1764
+ class_name: "helm.clients.together_client.TogetherChatClient"
1765
+
1668
1766
  ## Stanford
1669
1767
  - name: together/alpaca-7b
1670
1768
  model_name: stanford/alpaca-7b
@@ -189,7 +189,7 @@ models:
189
189
 
190
190
  # Anthropic
191
191
  - name: anthropic/claude-v1.3
192
- display_name: Anthropic Claude v1.3
192
+ display_name: Claude v1.3
193
193
  description: A 52B parameter language model, trained using reinforcement learning from human feedback [paper](https://arxiv.org/pdf/2204.05862.pdf).
194
194
  creator_organization_name: Anthropic
195
195
  access: limited
@@ -198,7 +198,7 @@ models:
198
198
  tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
199
199
 
200
200
  - name: anthropic/claude-instant-v1
201
- display_name: Anthropic Claude Instant V1
201
+ display_name: Claude Instant V1
202
202
  description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)).
203
203
  creator_organization_name: Anthropic
204
204
  access: limited
@@ -206,7 +206,7 @@ models:
206
206
  tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
207
207
 
208
208
  - name: anthropic/claude-instant-1.2
209
- display_name: Anthropic Claude Instant 1.2
209
+ display_name: Claude Instant 1.2
210
210
  description: A lightweight version of Claude, a model trained using reinforcement learning from human feedback ([docs](https://www.anthropic.com/index/introducing-claude)).
211
211
  creator_organization_name: Anthropic
212
212
  access: limited
@@ -214,7 +214,7 @@ models:
214
214
  tags: [ANTHROPIC_CLAUDE_1_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
215
215
 
216
216
  - name: anthropic/claude-2.0
217
- display_name: Anthropic Claude 2.0
217
+ display_name: Claude 2.0
218
218
  description: Claude 2.0 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf))
219
219
  creator_organization_name: Anthropic
220
220
  access: limited
@@ -222,7 +222,7 @@ models:
222
222
  tags: [ANTHROPIC_CLAUDE_2_MODEL_TAG, TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
223
223
 
224
224
  - name: anthropic/claude-2.1
225
- display_name: Anthropic Claude 2.1
225
+ display_name: Claude 2.1
226
226
  description: Claude 2.1 is a general purpose large language model developed by Anthropic. It uses a transformer architecture and is trained via unsupervised learning, RLHF, and Constitutional AI (including both a supervised and Reinforcement Learning (RL) phase). ([model card](https://efficient-manatee.files.svdcdn.com/production/images/Model-Card-Claude-2.pdf))
227
227
  creator_organization_name: Anthropic
228
228
  access: limited
@@ -231,7 +231,7 @@ models:
231
231
 
232
232
  - name: anthropic/claude-3-haiku-20240307
233
233
  display_name: Claude 3 Haiku (20240307)
234
- description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
234
+ description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
235
235
  creator_organization_name: Anthropic
236
236
  access: limited
237
237
  release_date: 2024-03-13 # https://www.anthropic.com/news/claude-3-haiku
@@ -239,7 +239,7 @@ models:
239
239
 
240
240
  - name: anthropic/claude-3-sonnet-20240229
241
241
  display_name: Claude 3 Sonnet (20240229)
242
- description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
242
+ description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
243
243
  creator_organization_name: Anthropic
244
244
  access: limited
245
245
  release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
@@ -247,9 +247,9 @@ models:
247
247
 
248
248
  - name: anthropic/claude-3-opus-20240229
249
249
  display_name: Claude 3 Opus (20240229)
250
- description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI.
251
- creator_organization_name: Anthropic
250
+ description: Claude 3 is a a family of models that possess vision and multilingual capabilities. They were trained with various methods such as unsupervised learning and Constitutional AI ([blog](https://www.anthropic.com/news/claude-3-family)).
252
251
  access: limited
252
+ creator_organization_name: Anthropic
253
253
  release_date: 2024-03-04 # https://www.anthropic.com/news/claude-3-family
254
254
  tags: [ANTHROPIC_CLAUDE_3_MODEL_TAG, TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
255
255
 
@@ -534,7 +534,7 @@ models:
534
534
  access: open
535
535
  num_parameters: 132000000000
536
536
  release_date: 2024-03-27
537
- tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG]
537
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
538
538
 
539
539
 
540
540
  # DeepMind
@@ -559,8 +559,8 @@ models:
559
559
 
560
560
  # Deepseek
561
561
  - name: deepseek-ai/deepseek-llm-67b-chat
562
- display_name: DeepSeek Chat (67B)
563
- description: DeepSeek Chat is a open-source language model trained on 2 trillion tokens in both English and Chinese, and fine-tuned supervised fine-tuning (SFT) and Direct Preference Optimization (DPO). ([paper](https://arxiv.org/abs/2401.02954))
562
+ display_name: DeepSeek LLM Chat (67B)
563
+ description: DeepSeek LLM Chat is a open-source language model trained on 2 trillion tokens in both English and Chinese, and fine-tuned supervised fine-tuning (SFT) and Direct Preference Optimization (DPO). ([paper](https://arxiv.org/abs/2401.02954))
564
564
  creator_organization_name: DeepSeek
565
565
  access: open
566
566
  num_parameters: 67000000000
@@ -670,7 +670,7 @@ models:
670
670
  creator_organization_name: Google
671
671
  access: limited
672
672
  release_date: 2023-12-13
673
- tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
673
+ tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
674
674
 
675
675
  - name: google/gemini-1.0-pro-001
676
676
  display_name: Gemini 1.0 Pro
@@ -678,7 +678,7 @@ models:
678
678
  creator_organization_name: Google
679
679
  access: limited
680
680
  release_date: 2023-12-13
681
- tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
681
+ tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
682
682
 
683
683
  # Note: This is aliased to a snapshot of gemini-pro-vision. When possible, please use a versioned snapshot instead.
684
684
  - name: google/gemini-pro-vision
@@ -695,15 +695,15 @@ models:
695
695
  creator_organization_name: Google
696
696
  access: limited
697
697
  release_date: 2023-12-13
698
- tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
698
+ tags: [VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, GOOGLE_GEMINI_PRO_VISION_V1_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
699
699
 
700
700
  - name: google/gemini-1.5-pro-preview-0409
701
- display_name: Gemini 1.5 Pro
701
+ display_name: Gemini 1.5 Pro (0409 preview)
702
702
  description: Gemini 1.5 Pro is a multimodal mixture-of-experts model capable of recalling and reasoning over fine-grained information from long contexts. ([paper](https://arxiv.org/abs/2403.05530))
703
703
  creator_organization_name: Google
704
704
  access: limited
705
705
  release_date: 2024-04-10
706
- tags: [TEXT_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
706
+ tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, GOOGLE_GEMINI_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
707
707
 
708
708
  - name: google/gemma-2b
709
709
  display_name: Gemma (2B)
@@ -801,9 +801,18 @@ models:
801
801
 
802
802
 
803
803
  # HuggingFace
804
+ - name: HuggingFaceM4/idefics2-8b
805
+ display_name: IDEFICS 2 (8B)
806
+ description: IDEFICS 2 (8B parameters) is an open multimodal model that accepts arbitrary sequences of image and text inputs and produces text outputs. ([blog](https://huggingface.co/blog/idefics2)).
807
+ creator_organization_name: HuggingFace
808
+ access: open
809
+ num_parameters: 8000000000
810
+ release_date: 2024-04-15
811
+ tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
812
+
804
813
  - name: HuggingFaceM4/idefics-9b
805
814
  display_name: IDEFICS (9B)
806
- description: IDEFICS (9B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics))
815
+ description: IDEFICS (9B parameters) is an open-source model based on DeepMind's Flamingo ([blog](https://huggingface.co/blog/idefics)).
807
816
  creator_organization_name: HuggingFace
808
817
  access: open
809
818
  num_parameters: 9000000000
@@ -811,8 +820,8 @@ models:
811
820
  tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
812
821
 
813
822
  - name: HuggingFaceM4/idefics-9b-instruct
814
- display_name: IDEFICS instruct (9B)
815
- description: IDEFICS instruct (9B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics))
823
+ display_name: IDEFICS-instruct (9B)
824
+ description: IDEFICS-instruct (9B parameters) is the instruction-tuned version of IDEFICS 9B ([blog](https://huggingface.co/blog/idefics)).
816
825
  creator_organization_name: HuggingFace
817
826
  access: open
818
827
  num_parameters: 9000000000
@@ -821,7 +830,7 @@ models:
821
830
 
822
831
  - name: HuggingFaceM4/idefics-80b
823
832
  display_name: IDEFICS (80B)
824
- description: IDEFICS (80B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics))
833
+ description: IDEFICS (80B parameters) is an open-source model based on DeepMind's Flamingo ([blog](https://huggingface.co/blog/idefics)).
825
834
  creator_organization_name: HuggingFace
826
835
  access: open
827
836
  num_parameters: 80000000000
@@ -829,8 +838,8 @@ models:
829
838
  tags: [VISION_LANGUAGE_MODEL_TAG, IDEFICS_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
830
839
 
831
840
  - name: HuggingFaceM4/idefics-80b-instruct
832
- display_name: IDEFICS instruct (80B)
833
- description: IDEFICS instruct (80B parameters) is an open-source model based on DeepMind's Flamingo. ([blog](https://huggingface.co/blog/idefics))
841
+ display_name: IDEFICS-instruct (80B)
842
+ description: IDEFICS-instruct (80B parameters) is the instruction-tuned version of IDEFICS 80B ([blog](https://huggingface.co/blog/idefics)).
834
843
  creator_organization_name: HuggingFace
835
844
  access: open
836
845
  num_parameters: 80000000000
@@ -1210,6 +1219,24 @@ models:
1210
1219
  release_date: 2024-04-18
1211
1220
  tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
1212
1221
 
1222
+ - name: meta/llama-3-8b-chat
1223
+ display_name: Llama 3 Chat (8B)
1224
+ description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training.
1225
+ creator_organization_name: Meta
1226
+ access: open
1227
+ num_parameters: 8000000000
1228
+ release_date: 2024-04-18
1229
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1230
+
1231
+ - name: meta/llama-3-70b-chat
1232
+ display_name: Llama 3 Chat (70B)
1233
+ description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. It used SFT, rejection sampling, PPO and DPO for post-training.
1234
+ creator_organization_name: Meta
1235
+ access: open
1236
+ num_parameters: 70000000000
1237
+ release_date: 2024-04-18
1238
+ tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1239
+
1213
1240
 
1214
1241
  # Microsoft/NVIDIA
1215
1242
  - name: microsoft/TNLGv2_530B
@@ -1247,11 +1274,46 @@ models:
1247
1274
  num_parameters: 13000000000
1248
1275
  release_date: 2023-10-05
1249
1276
  tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
1250
-
1277
+
1278
+ - name: uw-madison/llava-v1.6-vicuna-7b-hf
1279
+ display_name: LLaVA 1.6 (7B)
1280
+ description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
1281
+ creator_organization_name: Microsoft
1282
+ access: open
1283
+ num_parameters: 7000000000
1284
+ release_date: 2024-01-01
1285
+ tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
1286
+
1287
+ - name: uw-madison/llava-v1.6-vicuna-13b-hf
1288
+ display_name: LLaVA 1.6 (13B)
1289
+ description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
1290
+ creator_organization_name: Microsoft
1291
+ access: open
1292
+ num_parameters: 13000000000
1293
+ release_date: 2024-01-01
1294
+ tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
1295
+
1296
+ - name: uw-madison/llava-v1.6-mistral-7b-hf
1297
+ display_name: LLaVA 1.6 + Mistral (7B)
1298
+ description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
1299
+ creator_organization_name: Microsoft
1300
+ access: open
1301
+ num_parameters: 7000000000
1302
+ release_date: 2024-01-01
1303
+ tags: [ VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG ]
1304
+
1305
+ - name: uw-madison/llava-v1.6-34b-hf
1306
+ display_name: LLaVA + Nous-Hermes-2-Yi-34B (34B)
1307
+ description: LLaVa is an open-source chatbot trained by fine-tuning LlamA/Vicuna on GPT-generated multimodal instruction-following data. ([paper](https://arxiv.org/abs/2304.08485))
1308
+ creator_organization_name: Microsoft
1309
+ access: open
1310
+ num_parameters: 34000000000
1311
+ release_date: 2024-01-01
1312
+ tags: [VISION_LANGUAGE_MODEL_TAG, LLAVA_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
1251
1313
 
1252
1314
  - name: openflamingo/OpenFlamingo-9B-vitl-mpt7b
1253
1315
  display_name: OpenFlamingo (9B)
1254
- description: OpenFlamingo is an open source implementation of DeepMind's Flamingo models. This 9B-parameter model uses a CLIP ViT-L/14 vision encoder and MPT-7B language model. ([paper](https://arxiv.org/abs/2308.01390))
1316
+ description: OpenFlamingo is an open source implementation of DeepMind's Flamingo models. This 9B-parameter model uses a CLIP ViT-L/14 vision encoder and MPT-7B language model ([paper](https://arxiv.org/abs/2308.01390)).
1255
1317
  creator_organization_name: OpenFlamingo
1256
1318
  access: open
1257
1319
  num_parameters: 9000000000
@@ -1286,7 +1348,22 @@ models:
1286
1348
  num_parameters: 34000000000
1287
1349
  release_date: 2023-11-02
1288
1350
  tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1289
-
1351
+ - name: 01-ai/yi-6b-chat
1352
+ display_name: Yi Chat (6B)
1353
+ description: The Yi models are large language models trained from scratch by developers at 01.AI.
1354
+ creator_organization_name: 01.AI
1355
+ access: open
1356
+ num_parameters: 6000000000
1357
+ release_date: 2023-11-23
1358
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1359
+ - name: 01-ai/yi-34b-chat
1360
+ display_name: Yi Chat (34B)
1361
+ description: The Yi models are large language models trained from scratch by developers at 01.AI.
1362
+ creator_organization_name: 01.AI
1363
+ access: open
1364
+ num_parameters: 34000000000
1365
+ release_date: 2023-11-23
1366
+ tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG]
1290
1367
 
1291
1368
  # Allen Institute for AI
1292
1369
  # OLMo Blog: https://blog.allenai.org/olmo-open-language-model-87ccfc95f580
@@ -1350,7 +1427,16 @@ models:
1350
1427
 
1351
1428
  - name: mistralai/mixtral-8x22b
1352
1429
  display_name: Mixtral (8x22B)
1353
- description: Mistral AI's mixture-of-experts model ([tweet](https://twitter.com/MistralAI/status/1777869263778291896)).
1430
+ description: Mistral AI's mixture-of-experts model that uses 39B active parameters out of 141B ([blog post](https://mistral.ai/news/mixtral-8x22b/)).
1431
+ creator_organization_name: Mistral AI
1432
+ access: open
1433
+ num_parameters: 176000000000
1434
+ release_date: 2024-04-10
1435
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1436
+
1437
+ - name: mistralai/mixtral-8x22b-instruct-v0.1
1438
+ display_name: Mixtral Instruct (8x22B)
1439
+ description: Mistral AI's mixture-of-experts model that uses 39B active parameters out of 141B ([blog post](https://mistral.ai/news/mixtral-8x22b/)).
1354
1440
  creator_organization_name: Mistral AI
1355
1441
  access: open
1356
1442
  num_parameters: 176000000000
@@ -1721,8 +1807,17 @@ models:
1721
1807
  tags: [TEXT_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
1722
1808
 
1723
1809
  - name: openai/gpt-4-vision-preview
1724
- display_name: GPT-4V (preview)
1725
- description: GPT-4V is a large multimodal model that accepts both text and images and is optimized for chat but works well for traditional completions tasks.
1810
+ # According to https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4, this model has pointed gpt-4-1106-vision-preview.
1811
+ display_name: GPT-4V (1106 preview)
1812
+ description: GPT-4V is a large multimodal model that accepts both text and images and is optimized for chat ([model card](https://openai.com/research/gpt-4v-system-card)).
1813
+ creator_organization_name: OpenAI
1814
+ access: limited
1815
+ release_date: 2023-11-06
1816
+ tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
1817
+
1818
+ - name: openai/gpt-4-1106-vision-preview
1819
+ display_name: GPT-4V (1106 preview)
1820
+ description: GPT-4V is a large multimodal model that accepts both text and images and is optimized for chat ([model card](https://openai.com/research/gpt-4v-system-card)).
1726
1821
  creator_organization_name: OpenAI
1727
1822
  access: limited
1728
1823
  release_date: 2023-11-06
@@ -1898,7 +1993,7 @@ models:
1898
1993
 
1899
1994
  - name: qwen/qwen-vl
1900
1995
  display_name: Qwen-VL
1901
- description: Visual multimodal version of the large model series ([paper](https://arxiv.org/abs/2308.12966)).
1996
+ description: Visual multimodal version of the Qwen large language model series ([paper](https://arxiv.org/abs/2308.12966)).
1902
1997
  creator_organization_name: Alibaba Cloud
1903
1998
  access: open
1904
1999
  release_date: 2023-08-24
@@ -1906,7 +2001,7 @@ models:
1906
2001
 
1907
2002
  - name: qwen/qwen-vl-chat
1908
2003
  display_name: Qwen-VL Chat
1909
- description: Chat version of the visual multimodal model Qwen ([paper](https://arxiv.org/abs/2308.12966)).
2004
+ description: Chat version of Qwen-VL ([paper](https://arxiv.org/abs/2308.12966)).
1910
2005
  creator_organization_name: Alibaba Cloud
1911
2006
  access: open
1912
2007
  release_date: 2023-08-24
@@ -1923,6 +2018,16 @@ models:
1923
2018
  tags: [] # TODO: add tags
1924
2019
 
1925
2020
 
2021
+ # Snowflake
2022
+ - name: snowflake/snowflake-arctic-instruct
2023
+ display_name: Arctic Instruct
2024
+ description: Arctic combines a 10B dense transformer model with a residual 128x3.66B MoE MLP resulting in 480B total and 17B active parameters chosen using a top-2 gating.
2025
+ creator_organization_name: Snowflake
2026
+ access: open
2027
+ num_parameters: 482000000000
2028
+ release_date: 2024-04-24
2029
+ tags: [TEXT_MODEL_TAG, PARTIAL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
2030
+
1926
2031
 
1927
2032
  # Stability AI
1928
2033
  - name: stabilityai/stablelm-base-alpha-3b
@@ -331,6 +331,13 @@ tokenizer_configs:
331
331
  end_of_text_token: "<|endoftext|>"
332
332
  prefix_token: ""
333
333
 
334
+ # Snowflake
335
+ - name: snowflake/snowflake-arctic-instruct
336
+ tokenizer_spec:
337
+ class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
338
+ end_of_text_token: "<|im_end|>"
339
+ prefix_token: "<|im_start|>"
340
+
334
341
  # Tiiuae
335
342
  - name: tiiuae/falcon-7b
336
343
  tokenizer_spec: