PyPI - crfm-helm - Versions diffs - 0.5.3__py3-none-any.whl → 0.5.4__py3-none-any.whl - Mend

crfm-helm 0.5.3py3-none-any.whl → 0.5.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (60) hide show

{crfm_helm-0.5.3.dist-info → crfm_helm-0.5.4.dist-info}/METADATA +57 -62
{crfm_helm-0.5.3.dist-info → crfm_helm-0.5.4.dist-info}/RECORD +53 -55
{crfm_helm-0.5.3.dist-info → crfm_helm-0.5.4.dist-info}/WHEEL +1 -1
helm/benchmark/annotation/anthropic_red_team_annotator.py +11 -24
helm/benchmark/annotation/call_center_annotator.py +22 -11
helm/benchmark/annotation/harm_bench_annotator.py +11 -24
helm/benchmark/annotation/live_qa_annotator.py +9 -4
helm/benchmark/annotation/medication_qa_annotator.py +9 -4
helm/benchmark/annotation/model_as_judge.py +70 -19
helm/benchmark/annotation/simple_safety_tests_annotator.py +11 -25
helm/benchmark/annotation/xstest_annotator.py +20 -30
helm/benchmark/metrics/safety_metrics.py +39 -17
helm/benchmark/metrics/unitxt_metrics.py +17 -3
helm/benchmark/metrics/vision_language/image_metrics.py +6 -2
helm/benchmark/presentation/create_plots.py +1 -1
helm/benchmark/presentation/schema.py +3 -0
helm/benchmark/presentation/summarize.py +106 -256
helm/benchmark/presentation/test_summarize.py +145 -3
helm/benchmark/run_expander.py +27 -0
helm/benchmark/run_specs/bhasa_run_specs.py +27 -13
helm/benchmark/run_specs/finance_run_specs.py +6 -2
helm/benchmark/run_specs/vlm_run_specs.py +8 -3
helm/benchmark/scenarios/bhasa_scenario.py +226 -82
helm/benchmark/scenarios/raft_scenario.py +1 -1
helm/benchmark/static/schema_bhasa.yaml +10 -10
helm/benchmark/static/schema_legal.yaml +566 -0
helm/benchmark/static/schema_safety.yaml +25 -6
helm/benchmark/static/schema_tables.yaml +26 -2
helm/benchmark/static/schema_vhelm.yaml +42 -11
helm/benchmark/static_build/assets/index-3ee38b3d.js +10 -0
helm/benchmark/static_build/assets/vhelm-aspects-1437d673.png +0 -0
helm/benchmark/static_build/assets/vhelm-framework-a1ca3f3f.png +0 -0
helm/benchmark/static_build/assets/vhelm-model-8afb7616.png +0 -0
helm/benchmark/static_build/index.html +1 -1
helm/benchmark/window_services/tokenizer_service.py +0 -5
helm/clients/openai_client.py +16 -1
helm/clients/palmyra_client.py +1 -2
helm/clients/together_client.py +22 -0
helm/common/cache.py +8 -30
helm/common/key_value_store.py +9 -9
helm/common/mongo_key_value_store.py +3 -3
helm/common/test_cache.py +1 -48
helm/common/tokenization_request.py +0 -9
helm/config/model_deployments.yaml +135 -3
helm/config/model_metadata.yaml +134 -6
helm/config/tokenizer_configs.yaml +24 -0
helm/proxy/server.py +0 -9
helm/proxy/services/remote_service.py +0 -6
helm/proxy/services/server_service.py +5 -18
helm/proxy/services/service.py +0 -6
helm/benchmark/data_overlap/__init__.py +0 -0
helm/benchmark/data_overlap/data_overlap_spec.py +0 -86
helm/benchmark/data_overlap/export_scenario_text.py +0 -119
helm/benchmark/data_overlap/light_scenario.py +0 -60
helm/benchmark/static_build/assets/index-58f97dcd.js +0 -10
helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
{crfm_helm-0.5.3.dist-info → crfm_helm-0.5.4.dist-info}/LICENSE +0 -0
{crfm_helm-0.5.3.dist-info → crfm_helm-0.5.4.dist-info}/entry_points.txt +0 -0
{crfm_helm-0.5.3.dist-info → crfm_helm-0.5.4.dist-info}/top_level.txt +0 -0

helm/clients/together_client.py CHANGED Viewed

@@ -7,6 +7,7 @@ import requests
 from retrying import retry
 from helm.common.cache import CacheConfig
+from helm.common.media_object import IMAGE_TYPE, TEXT_TYPE
 from helm.common.optional_dependencies import handle_module_not_found_error
 from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput, Token
 from helm.clients.client import CachingClient, truncate_sequence, cleanup_str
@@ -323,8 +324,29 @@ class TogetherChatClient(CachingClient):
         self._together_model = together_model
     def convert_to_raw_chat_request(self, request: Request) -> TogetherRawChatRequest:
+        request.validate()
+        messages: List[Dict[str, Any]]
         if request.messages:
             messages = request.messages
+        elif request.multimodal_prompt:
+            message_contents = []
+            for media_object in request.multimodal_prompt.media_objects:
+                if media_object.is_type(IMAGE_TYPE) and media_object.location:
+                    assert media_object.location
+                    if media_object.is_local_file:
+                        from helm.common.images_utils import encode_base64
+                        base64_image: str = encode_base64(media_object.location)
+                        image_url = f"data:image/jpeg;base64,{base64_image}"
+                    else:
+                        image_url = media_object.location
+                    message_contents.append({"type": "image_url", "image_url": {"url": image_url}})
+                elif media_object.is_type(TEXT_TYPE):
+                    assert media_object.text
+                    message_contents.append({"type": "text", "text": media_object.text})
+                else:
+                    raise ValueError(f"Unrecognized MediaObject type {media_object.type}")
+            messages = [{"role": "user", "content": message_contents}]
         else:
             messages = [{"role": "user", "content": request.prompt}]
         if self._together_model is not None:

helm/common/cache.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from collections import defaultdict
 from dataclasses import dataclass
-from typing import Dict, Callable, Generator, Mapping, Optional, Tuple
+from typing import Dict, Callable, Generator, Mapping, Tuple
 import json
 import threading
@@ -38,6 +38,12 @@ class CacheConfig:
 class KeyValueStoreCacheConfig(CacheConfig):
     """Configuration for a cache backed by a key-value store."""
+    # This was originally to distinguish between "primitive" cache configs
+    # and "compound" cache configs. But we don't have any "compound" cache configs currently.
+    # Hypthetical "compound" example: ReadOnlyCacheConfig(SqliteCacheConfig("path"))
+    # TODO: Maybe remove this eventually?
+    pass
 @dataclass(frozen=True)
 class SqliteCacheConfig(KeyValueStoreCacheConfig):
@@ -78,24 +84,6 @@ class MongoCacheConfig(KeyValueStoreCacheConfig):
         return f"{self.uri}/{self.collection_name}"
-@dataclass(frozen=True)
-class WithFollowerCacheConfig(CacheConfig):
-    """Configuration of a cache backed by a main cache and a follower cache."""
-    # Configuration for the main cache.
-    # Responses will be written to and served out of this cache.
-    main: KeyValueStoreCacheConfig
-    # Configuration for the follower cache.
-    # The follower cache is a write-only cache. Responses will be written to this cache,
-    # but not served out of this cache.
-    follower: KeyValueStoreCacheConfig
-    @property
-    def cache_stats_key(self) -> str:
-        return self.main.cache_stats_key
 def get_all_from_sqlite(path: str) -> Generator[Tuple[Dict, Dict], None, None]:
     """Yields all decoded key, value pairs from the SQLite cache.
@@ -126,7 +114,7 @@ def create_key_value_store(config: KeyValueStoreCacheConfig) -> KeyValueStore:
     elif isinstance(config, BlackHoleCacheConfig):
         return BlackHoleKeyValueStore()
     else:
-        raise ValueError(f"KeyValueStoreCacheConfig with unknown type: {config}")
+        raise ValueError(f"CacheConfig with unknown type: {config}")
 @retry
@@ -189,14 +177,8 @@ class Cache(object):
     def __init__(self, config: CacheConfig):
         hlog(f"Created cache with config: {config}")
-        self.config: KeyValueStoreCacheConfig
-        self.follower_config: Optional[KeyValueStoreCacheConfig]
         if isinstance(config, KeyValueStoreCacheConfig):
             self.config = config
-            self.follower_config = None
-        elif isinstance(config, WithFollowerCacheConfig):
-            self.config = config.main
-            self.follower_config = config.follower
         else:
             raise ValueError(f"CacheConfig with unknown type: {config}")
@@ -216,8 +198,4 @@ class Cache(object):
                 response = compute()
                 write_to_key_value_store(key_value_store, request, response)
-        if self.follower_config is not None:
-            # TODO: Initialize follower_key_value_store in constructor
-            with create_key_value_store(self.follower_config) as follower_key_value_store:
-                write_to_key_value_store(follower_key_value_store, request, response)
         return response, cached

helm/common/key_value_store.py CHANGED Viewed

@@ -15,11 +15,11 @@ class KeyValueStore(contextlib.AbstractContextManager):
     """Key value store that persists writes."""
     @abstractmethod
-    def contains(self, key: Dict) -> bool:
+    def contains(self, key: Mapping) -> bool:
         pass
     @abstractmethod
-    def get(self, key: Dict) -> Optional[Dict]:
+    def get(self, key: Mapping) -> Optional[Dict]:
         pass
     @abstractmethod
@@ -35,7 +35,7 @@ class KeyValueStore(contextlib.AbstractContextManager):
         pass
     @abstractmethod
-    def remove(self, key: Dict) -> None:
+    def remove(self, key: Mapping) -> None:
         pass
@@ -53,10 +53,10 @@ class SqliteKeyValueStore(KeyValueStore):
     def __exit__(self, exc_type, exc_value, traceback) -> None:
         self._sqlite_dict.__exit__(exc_type, exc_value, traceback)
-    def contains(self, key: Dict) -> bool:
+    def contains(self, key: Mapping) -> bool:
         return request_to_key(key) in self._sqlite_dict
-    def get(self, key: Dict) -> Optional[Dict]:
+    def get(self, key: Mapping) -> Optional[Dict]:
         key_string = request_to_key(key)
         result = self._sqlite_dict.get(key_string)
         if result is not None:
@@ -77,7 +77,7 @@ class SqliteKeyValueStore(KeyValueStore):
         for key, value in pairs:
             self.put(key, value)
-    def remove(self, key: Dict) -> None:
+    def remove(self, key: Mapping) -> None:
         del self._sqlite_dict[key]
         self._sqlite_dict.commit()
@@ -91,10 +91,10 @@ class BlackHoleKeyValueStore(KeyValueStore):
     def __exit__(self, exc_type, exc_value, traceback) -> None:
         pass
-    def contains(self, key: Dict) -> bool:
+    def contains(self, key: Mapping) -> bool:
         return False
-    def get(self, key: Dict) -> Optional[Dict]:
+    def get(self, key: Mapping) -> Optional[Dict]:
         return None
     def get_all(self) -> Generator[Tuple[Dict, Dict], None, None]:
@@ -109,5 +109,5 @@ class BlackHoleKeyValueStore(KeyValueStore):
     def multi_put(self, pairs: Iterable[Tuple[Dict, Dict]]) -> None:
         return None
-    def remove(self, key: Dict) -> None:
+    def remove(self, key: Mapping) -> None:
         return None

helm/common/mongo_key_value_store.py CHANGED Viewed

@@ -39,11 +39,11 @@ class MongoKeyValueStore(KeyValueStore):
         serialized = json.dumps(key, sort_keys=True)
         return json.loads(serialized, object_pairs_hook=SON)
-    def contains(self, key: Dict) -> bool:
+    def contains(self, key: Mapping) -> bool:
         query = {self._REQUEST_KEY: self._canonicalize_key(key)}
         return self._collection.find_one(query) is not None
-    def get(self, key: Dict) -> Optional[Dict]:
+    def get(self, key: Mapping) -> Optional[Dict]:
         query = {self._REQUEST_KEY: self._canonicalize_key(key)}
         document = self._collection.find_one(query)
         if document is not None:
@@ -84,6 +84,6 @@ class MongoKeyValueStore(KeyValueStore):
         # Note: unlike put, multi_put does not support documents with null bytes in keys.
         self._collection.bulk_write(operations)
-    def remove(self, key: Dict) -> None:
+    def remove(self, key: Mapping) -> None:
         query = {self._REQUEST_KEY: self._canonicalize_key(key)}
         self._collection.delete_one(query)

helm/common/test_cache.py CHANGED Viewed

@@ -3,9 +3,7 @@ import tempfile
 import unittest
 import threading
-from helm.common.cache import Cache, SqliteCacheConfig, WithFollowerCacheConfig, cache_stats, get_all_from_sqlite
-from sqlitedict import SqliteDict
+from helm.common.cache import Cache, SqliteCacheConfig, cache_stats, get_all_from_sqlite
 class TestCache(unittest.TestCase):
@@ -99,51 +97,6 @@ class TestCache(unittest.TestCase):
         assert cache_stats.num_computes[self.cache_path] >= num_items
         assert cache_stats.num_computes[self.cache_path] <= num_items * num_threads
-    def test_follower(self):
-        cache = Cache(SqliteCacheConfig(self.cache_path))
-        request_1 = {"name": "request1"}
-        compute_1 = lambda: {"response": "response1"}
-        response, cached = cache.get(request_1, compute_1)
-        assert response == {"response": "response1"}
-        assert not cached
-        assert cache_stats.num_queries[self.cache_path] == 1
-        assert cache_stats.num_computes[self.cache_path] == 1
-        follower_cache_file = tempfile.NamedTemporaryFile(delete=False)
-        follower_cache_path = follower_cache_file.name
-        with follower_cache_file:
-            cache_with_follower_config = WithFollowerCacheConfig(
-                main=SqliteCacheConfig(self.cache_path),
-                follower=SqliteCacheConfig(follower_cache_path),
-            )
-            cache_with_follower = Cache(cache_with_follower_config)
-            response, cached = cache_with_follower.get(request_1, compute_1)
-            assert response == {"response": "response1"}
-            assert cached
-            assert cache_stats.num_queries[self.cache_path] == 2
-            assert cache_stats.num_computes[self.cache_path] == 1
-            assert cache_stats.num_queries[follower_cache_path] == 0
-            assert cache_stats.num_computes[follower_cache_path] == 0
-            request_2 = {"name": "request2"}
-            compute_2 = lambda: {"response": "response2"}
-            response, cached = cache_with_follower.get(request_2, compute_2)
-            assert response == {"response": "response2"}
-            assert not cached
-            assert cache_stats.num_queries[self.cache_path] == 3
-            assert cache_stats.num_computes[self.cache_path] == 2
-            assert cache_stats.num_queries[follower_cache_path] == 0
-            assert cache_stats.num_computes[follower_cache_path] == 0
-            expected_dict = {
-                '{"name": "request1"}': {"response": "response1"},
-                '{"name": "request2"}': {"response": "response2"},
-            }
-            self.assertCountEqual(SqliteDict(follower_cache_path).items(), expected_dict.items())
     def test_get_all_from_sqlite(self):
         cache = Cache(SqliteCacheConfig(self.cache_path))
         num_items = 10  # TODO: Inrcrease to 100

helm/common/tokenization_request.py CHANGED Viewed

@@ -2,15 +2,6 @@ from dataclasses import dataclass
 from typing import List, Optional, Union
-@dataclass(frozen=True)
-class WindowServiceInfo:
-    tokenizer_name: str
-    max_sequence_length: int
-    max_request_length: int
-    end_of_text_token: str
-    prefix_token: str
 @dataclass(frozen=True)
 class TokenizationRequest:
     """A `TokenizationRequest` specifies how to tokenize some text."""

helm/config/model_deployments.yaml CHANGED Viewed

@@ -626,6 +626,26 @@ model_deployments:
       args:
         trust_remote_code: true
+  - name: huggingface/llama3-8b-cpt-sea-lionv2-base
+    model_name: aisingapore/llama3-8b-cpt-sea-lionv2-base
+    tokenizer_name: meta/llama-3-8b-instruct
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        torch_dtype: torch.bfloat16
+  - name: huggingface/llama3-8b-cpt-sea-lionv2.1-instruct
+    model_name: aisingapore/llama3-8b-cpt-sea-lionv2.1-instruct
+    tokenizer_name: meta/llama-3-8b-instruct
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        torch_dtype: torch.bfloat16
   ## Bigcode
   - name: huggingface/santacoder
     model_name: bigcode/santacoder
@@ -1641,6 +1661,21 @@ model_deployments:
     client_spec:
       class_name: "helm.clients.openai_client.OpenAIClient"
+  ## o1 Models
+  - name: openai/o1-preview-2024-09-12
+    model_name: openai/o1-preview-2024-09-12
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
+  - name: openai/o1-mini-2024-09-12
+    model_name: openai/o1-mini-2024-09-12
+    tokenizer_name: openai/cl100k_base
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openai_client.OpenAIClient"
   ## Text Similarity Models
   # OpenAI similarity embedding models: https://beta.openai.com/docs/guides/embeddings
   # The number of parameters is guessed based on the number of parameters of the
@@ -1831,7 +1866,25 @@ model_deployments:
     client_spec:
       class_name: "helm.clients.together_client.TogetherClient"
       args:
-        together_model: meta-llama/Meta-Llama-3-8B
+        together_model: meta-llama/Llama-3-8b-hf
+  - name: together/llama-3-8b-instruct-turbo
+    model_name: meta/llama-3-8b-instruct-turbo
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: meta-llama/Meta-Llama-3-8B-Instruct-Turbo
+  - name: together/llama-3-8b-instruct-lite
+    model_name: meta/llama-3-8b-instruct-lite
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: meta-llama/Meta-Llama-3-8B-Instruct-Lite
   - name: together/llama-3-70b
     model_name: meta/llama-3-70b
@@ -1842,6 +1895,24 @@ model_deployments:
       args:
         together_model: meta-llama/Meta-Llama-3-70B
+  - name: together/llama-3-70b-instruct-turbo
+    model_name: meta/llama-3-70b-instruct-turbo
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: meta-llama/Meta-Llama-3-70B-Instruct-Turbo
+  - name: together/llama-3-70b-instruct-lite
+    model_name: meta/llama-3-70b-instruct-lite
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 8191
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherClient"
+      args:
+        together_model: meta-llama/Meta-Llama-3-70B-Instruct-Lite
   - name: together/llama-3.1-8b-instruct-turbo
     model_name: meta/llama-3.1-8b-instruct-turbo
     tokenizer_name: meta/llama-3.1-8b
@@ -1871,7 +1942,7 @@ model_deployments:
   - name: together/llama-3-8b-chat
     model_name: meta/llama-3-8b-chat
-    tokenizer_name: meta/llama-3-8b
+    tokenizer_name: meta/llama-3-8b-instruct
     max_sequence_length: 8182
     client_spec:
       class_name: "helm.clients.together_client.TogetherChatClient"
@@ -1880,13 +1951,40 @@ model_deployments:
   - name: together/llama-3-70b-chat
     model_name: meta/llama-3-70b-chat
-    tokenizer_name: meta/llama-3-8b
+    tokenizer_name: meta/llama-3-8b-instruct
     max_sequence_length: 8182
     client_spec:
       class_name: "helm.clients.together_client.TogetherChatClient"
       args:
         together_model: meta-llama/Llama-3-70b-chat-hf
+  - name: together/llama-3.2-3b-instruct-turbo
+    model_name: meta/llama-3.2-3b-instruct-turbo
+    tokenizer_name: meta/llama-3.2-3b-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: meta-llama/Llama-3.2-3B-Instruct-Turbo
+  - name: together/llama-3.2-11b-vision-instruct-turbo
+    model_name: meta/llama-3.2-11b-vision-instruct-turbo
+    tokenizer_name: meta/llama-3.2-11b-vision-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
+  - name: together/llama-3.2-90b-vision-instruct-turbo
+    model_name: meta/llama-3.2-90b-vision-instruct-turbo
+    tokenizer_name: meta/llama-3.2-11b-vision-instruct
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
   - name: together/llama-guard-7b
     model_name: meta/llama-guard-7b
     tokenizer_name: meta-llama/Llama-2-7b-hf
@@ -2262,6 +2360,40 @@ model_deployments:
     client_spec:
       class_name: "helm.clients.vision_language.palmyra_vision_client.PalmyraVisionClient"
+  - name: writer/palmyra-x-004
+    model_name: writer/palmyra-x-004
+    # Actual tokenizer is Llama 2, but it cannot be used in HELM due to this issue:
+    # https://github.com/stanford-crfm/helm/issues/2467
+    # Work around by using Llama 3 tokenizer for now.
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.palmyra_client.PalmyraChatClient"
+  - name: writer/palmyra-med-32k
+    model_name: writer/palmyra-med-32k
+    # Palmyra-Med uses the "<|end_of_text|>" as the end of text token, which is used by meta/llama-3-8b,
+    # rather than "<|eot_id|>", which is used by meta/llama-3-8b-instruct
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 32000
+    client_spec:
+      class_name: "helm.clients.palmyra_client.PalmyraChatClient"
+  - name: writer/palmyra-med
+    model_name: writer/palmyra-med
+    # Palmyra-Med uses the "<|end_of_text|>" as the end of text token, which is used by meta/llama-3-8b,
+    # rather than "<|eot_id|>", which is used by meta/llama-3-8b-instruct
+    tokenizer_name: meta/llama-3-8b
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.palmyra_client.PalmyraChatClient"
+  - name: writer/palmyra-fin-32k
+    model_name: writer/palmyra-fin-32k
+    tokenizer_name: meta/llama-3-8b-instruct
+    max_sequence_length: 32000
+    client_spec:
+      class_name: "helm.clients.palmyra_client.PalmyraChatClient"
   # Qwen

helm/config/model_metadata.yaml CHANGED Viewed

@@ -145,6 +145,23 @@ models:
     release_date: 2023-02-24
     tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: aisingapore/llama3-8b-cpt-sea-lionv2-base
+    display_name: Llama 3 CPT SEA-Lion v2 (8B)
+    description: Llama 3 CPT SEA-Lion v2 (8B) is a multilingual model which was continued pre-trained on 48B additional tokens, including tokens in Southeast Asian languages.
+    creator_organization_name: AI Singapore
+    access: open
+    num_parameters: 80300000000
+    release_date: 2024-07-31
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: aisingapore/llama3-8b-cpt-sea-lionv2.1-instruct
+    display_name: Llama 3 CPT SEA-Lion v2.1 Instruct (8B)
+    description: Llama 3 CPT SEA-Lion v2.1 Instruct (8B) is a multilingual model which has been fine-tuned with around 100,000 English instruction-completion pairs alongside a smaller pool of around 50,000 instruction-completion pairs from other Southeast Asian languages, such as Indonesian, Thai and Vietnamese.
+    creator_organization_name: AI Singapore
+    access: open
+    num_parameters: 80300000000
+    release_date: 2024-08-21
+    tags: [TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   # Aleph Alpha
   # Aleph Alpha's Luminous models: https://docs.aleph-alpha.com/docs/introduction/luminous
@@ -1427,6 +1444,24 @@ models:
     num_parameters: 8000000000
     release_date: 2024-04-18
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: meta/llama-3-8b-instruct-turbo
+    display_name: Llama 3 Instruct Turbo (8B)
+    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Turbo is Together's implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-07-18
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: meta/llama-3-8b-instruct-lite
+    display_name: Llama 3 Instruct Lite (8B)
+    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Lite is Together's implementation, it leverages a number of optimizations including INT4 quantization, provides the most cost-efficient and scalable Llama 3 models available anywhere, while maintaining excellent quality relative to full precision reference implementations ([blog](https://www.together.ai/blog/together-inference-engine-2))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 8000000000
+    release_date: 2024-07-18
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: meta/llama-3-70b
     display_name: Llama 3 (70B)
@@ -1436,6 +1471,24 @@ models:
     num_parameters: 70000000000
     release_date: 2024-04-18
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+  - name: meta/llama-3-70b-instruct-turbo
+    display_name: Llama 3 Instruct Turbo (70B)
+    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Turbo is Together's implementation, providing fast FP8 performance while maintaining quality, closely matching FP16 reference models. ([blog](https://www.together.ai/blog/together-inference-engine-2))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-07-18
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: meta/llama-3-70b-instruct-lite
+    display_name: Llama 3 Instruct Lite (70B)
+    description: Llama 3 is a family of language models that have been trained on more than 15 trillion tokens, and use Grouped-Query Attention (GQA) for improved inference scalability. ([paper](https://ai.meta.com/research/publications/the-llama-3-herd-of-models/) Lite is Together's implementation, it leverages a number of optimizations including INT4 quantization, provides the most cost-efficient and scalable Llama 3 models available anywhere, while maintaining excellent quality relative to full precision reference implementations ([blog](https://www.together.ai/blog/together-inference-engine-2))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 70000000000
+    release_date: 2024-07-18
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: meta/llama-3.1-8b-instruct-turbo
     display_name: Llama 3.1 Instruct Turbo (8B)
@@ -1444,7 +1497,7 @@ models:
     access: open
     num_parameters: 8000000000
     release_date: 2024-07-23
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: meta/llama-3.1-70b-instruct-turbo
     display_name: Llama 3.1 Instruct Turbo (70B)
@@ -1453,7 +1506,7 @@ models:
     access: open
     num_parameters: 70000000000
     release_date: 2024-07-23
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: meta/llama-3.1-405b-instruct-turbo
     display_name: Llama 3.1 Instruct Turbo (405B)
@@ -1462,7 +1515,34 @@ models:
     access: open
     num_parameters: 405000000000
     release_date: 2024-07-23
-    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG]
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: meta/llama-3.2-3b-instruct-turbo
+    display_name: Llama 3.2 Instruct Turbo (3B)
+    description: The Meta Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned text-only generative models in 1B and 3B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 3210000000
+    release_date: 2024-09-25
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: meta/llama-3.2-11b-vision-instruct-turbo
+    display_name: Llama 3.2 Vision Instruct Turbo (11B)
+    description: The Llama 3.2 Vision collection of multimodal large language models (LLMs) is a collection of pretrained and instruction-tuned image reasoning generative models in 11B and 90B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 10700000000
+    release_date: 2024-09-25
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG. LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: meta/llama-3.2-90b-vision-instruct-turbo
+    display_name: Llama 3.2 Vision Instruct Turbo (90B)
+    description: The Llama 3.2 Vision collection of multimodal large language models (LLMs) is a collection of pretrained and instruction-tuned image reasoning generative models in 11B and 90B sizes. ([blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)) Turbo is Together's implementation, providing a near negligible difference in quality from the reference implementation with faster performance and lower cost, currently using FP8 quantization. ([blog](https://www.together.ai/blog/llama-31-quality))
+    creator_organization_name: Meta
+    access: open
+    num_parameters: 88600000000
+    release_date: 2024-09-25
+    tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG. LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   - name: meta/llama-3-8b-chat
     display_name: Llama 3 Instruct (8B)
@@ -1510,9 +1590,6 @@ models:
     tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   # Microsoft/NVIDIA
   - name: microsoft/TNLGv2_530B
     display_name: TNLG v2 (530B)
@@ -2218,6 +2295,23 @@ models:
     release_date: 2023-11-06
     tags: [VISION_LANGUAGE_MODEL_TAG, OPENAI_CHATGPT_MODEL_TAG, FULL_FUNCTIONALITY_VLM_TAG]
+  ## o1 Models
+  - name: openai/o1-preview-2024-09-12
+    display_name: o1-preview (2024-09-12)
+    description: o1-preview is a language model trained with reinforcement learning to perform complex reasoning that can produce a long internal chain of thought before responding to the user. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2024-09-12
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: openai/o1-mini-2024-09-12
+    display_name: o1-mini (2024-09-12)
+    description: o1-mini is a cost-effective reasoning model for applications that require reasoning without broad world knowledge. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/openai-o1-mini-advancing-cost-efficient-reasoning/))
+    creator_organization_name: OpenAI
+    access: limited
+    release_date: 2024-09-12
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   ## Codex Models
   # DEPRECATED: Codex models have been shut down on March 23 2023.
@@ -2928,6 +3022,40 @@ models:
     # Does not support echo
     tags: [VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_VLM_TAG]
+  - name: writer/palmyra-x-004
+    display_name: Palmyra-X-004
+    description: Palmyra-X-004 language model with a large context window of up to 128,000 tokens that excels in processing and understanding complex tasks.
+    creator_organization_name: Writer
+    access: limited
+    release_date: 2024-09-12
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: writer/palmyra-med-32k
+    display_name: Palmyra-Med 32K (70B)
+    description: Palmyra-Med 32K (70B) is a model finetuned from Palmyra-X-003 intended for medical applications.
+    creator_organization_name: Writer
+    access: open
+    num_parameters: 70600000000
+    release_date: 2024-07-31
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: writer/palmyra-med
+    display_name: Palmyra-Med (70B)
+    description: Palmyra-Med (70B) is a model finetuned from Palmyra-X-003 intended for medical applications.
+    creator_organization_name: Writer
+    access: open
+    num_parameters: 70600000000
+    release_date: 2024-07-31
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
+  - name: writer/palmyra-fin-32k
+    display_name: Palmyra-Fin 32K (70B)
+    description: Palmyra-Fin 32K (70B) is a model finetuned from Palmyra-X-003 intended for financial applications.
+    creator_organization_name: Writer
+    access: open
+    num_parameters: 70600000000
+    release_date: 2024-07-31
+    tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG]
   # Yandex
   - name: yandex/yalm

crfm-helm 0.5.3__py3-none-any.whl → 0.5.4__py3-none-any.whl

Potentially problematic release.

crfm-helm 0.5.3py3-none-any.whl → 0.5.4py3-none-any.whl