PyPI - crfm-helm - Versions diffs - 0.5.7__py3-none-any.whl → 0.5.9__py3-none-any.whl - Mend

crfm-helm 0.5.7py3-none-any.whl → 0.5.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (333) hide show

helm/clients/vision_language/paligemma_client.py CHANGED Viewed

@@ -8,7 +8,7 @@ from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
 from helm.common.cache import CacheConfig
 from helm.common.images_utils import open_image
 from helm.common.gpu_utils import get_torch_device_name
-from helm.common.hierarchical_logger import hlog, htrack_block
+from helm.common.hierarchical_logger import hexception, hlog, htrack_block
 from helm.common.media_object import TEXT_TYPE
 from helm.common.optional_dependencies import handle_module_not_found_error
 from helm.common.request import Request, RequestResult, GeneratedOutput, Token
@@ -126,6 +126,7 @@ class PaliGemmaClient(CachingClient):
                     result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
                     concat_results.append(result)
             except RuntimeError as model_error:
+                hexception(model_error)
                 return RequestResult(success=False, cached=False, error=str(model_error), completions=[], embedding=[])
             for result in concat_results:

helm/clients/vision_language/palmyra_vision_client.py CHANGED Viewed

@@ -5,6 +5,7 @@ import requests
 from helm.common.cache import CacheConfig
 from helm.common.images_utils import encode_base64
+from helm.common.hierarchical_logger import hexception
 from helm.common.media_object import TEXT_TYPE
 from helm.common.request import Request, RequestResult, GeneratedOutput, ErrorFlags
 from helm.common.request import wrap_request_time
@@ -76,6 +77,7 @@ class PalmyraVisionClient(CachingClient):
             )
             result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
         except PalmyraVisionContentBlockedError as ex:
+            hexception(ex)
             return RequestResult(
                 success=False,
                 cached=False,

helm/clients/vision_language/qwen2_vlm_client.py CHANGED Viewed

@@ -8,7 +8,7 @@ import torch
 from helm.common.cache import CacheConfig
 from helm.common.gpu_utils import get_torch_device_name
-from helm.common.hierarchical_logger import hlog, htrack_block
+from helm.common.hierarchical_logger import hexception, hlog, htrack_block
 from helm.common.media_object import TEXT_TYPE
 from helm.common.request import Request, RequestResult, GeneratedOutput, Token
 from helm.common.request import wrap_request_time
@@ -157,6 +157,7 @@ class Qwen2VLMClient(CachingClient):
                     )
                     result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
                 except RuntimeError as model_error:
+                    hexception(model_error)
                     return RequestResult(
                         success=False,
                         cached=False,

helm/clients/vision_language/qwen_vlm_client.py CHANGED Viewed

@@ -7,7 +7,7 @@ from transformers.generation import GenerationConfig
 from helm.common.cache import CacheConfig
 from helm.common.gpu_utils import get_torch_device_name
-from helm.common.hierarchical_logger import hlog, htrack_block
+from helm.common.hierarchical_logger import hexception, hlog, htrack_block
 from helm.common.media_object import TEXT_TYPE
 from helm.common.request import Request, RequestResult, GeneratedOutput, Token
 from helm.common.request import wrap_request_time
@@ -139,6 +139,7 @@ class QwenVLMClient(CachingClient):
                     )
                     result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
                 except RuntimeError as model_error:
+                    hexception(model_error)
                     return RequestResult(
                         success=False, cached=False, error=str(model_error), completions=[], embedding=[]
                     )

helm/clients/writer_client.py CHANGED Viewed

@@ -2,6 +2,7 @@ from typing import Any, Dict, List, Mapping, Optional
 from helm.clients.client import CachingClient
 from helm.common.cache import CacheConfig
+from helm.common.hierarchical_logger import hexception
 from helm.common.optional_dependencies import handle_module_not_found_error
 from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput, Token
@@ -82,6 +83,7 @@ class WriterClient(CachingClient):
             raw_response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
             chat_completion: ChatCompletion = ChatCompletion.model_validate(raw_response)
         except Exception as error:
+            hexception(error)
             return RequestResult(
                 success=False,
                 cached=False,

helm/common/hierarchical_logger.py CHANGED Viewed

@@ -64,6 +64,16 @@ class HierarchicalLogger(object):
         self.logger.warning(self.indent() + str(x), **kwargs)
         sys.stdout.flush()
+    def error(self, x: Any, **kwargs) -> None:
+        kwargs["stacklevel"] = kwargs.get("stacklevel", 1) + 1
+        self.logger.error(self.indent() + str(x), **kwargs)
+        sys.stdout.flush()
+    def exception(self, x: Any, **kwargs) -> None:
+        kwargs["stacklevel"] = kwargs.get("stacklevel", 1) + 1
+        self.logger.exception(self.indent() + str(x), **kwargs)
+        sys.stdout.flush()
 def format_time(s: float) -> str:
     """Return a nice string representation of `s` seconds."""
@@ -96,6 +106,16 @@ def hwarn(x: Any, **kwargs) -> None:
     singleton.warn(x, **kwargs)
+def herror(x: Any, **kwargs) -> None:
+    kwargs["stacklevel"] = kwargs.get("stacklevel", 1) + 1
+    singleton.error(x, **kwargs)
+def hexception(x: Any, **kwargs) -> None:
+    kwargs["stacklevel"] = kwargs.get("stacklevel", 1) + 1
+    singleton.exception(x, **kwargs)
 class htrack_block:
     def __init__(self, x: Any, stacklevel=1) -> None:
         self._stacklevel = stacklevel + 1

helm/common/optional_dependencies.py CHANGED Viewed

@@ -9,7 +9,7 @@ def handle_module_not_found_error(e: ModuleNotFoundError, suggestions: Optional[
     # TODO: Ask user to install more specific optional dependencies
     # e.g. crfm-helm[plots] or crfm-helm[server]
     suggested_commands = " or ".join(
-        [f"`pip install crfm-helm[{suggestion}]`" for suggestion in (suggestions or []) + ["all"]]
+        [f'`pip install "crfm-helm[{suggestion}]"`' for suggestion in (suggestions or []) + ["all"]]
     )
     raise OptionalDependencyNotInstalled(
         f"Optional dependency {e.name} is not installed. Please run {suggested_commands} to install it."

helm/common/test_general.py CHANGED Viewed

@@ -1,5 +1,8 @@
 import shutil
 import os
+import pytest
 from helm.common.general import (
     ensure_file_downloaded,
     format_tags,
@@ -12,6 +15,7 @@ from helm.common.general import (
 def test_ensure_file_downloaded():
+    pytest.skip("Skipping download tests because these downloads are not reliable and may be throttled")
     ensure_file_downloaded("https://ftp.gnu.org/gnu/tar/tar-1.34.tar.gz", "test-tar", unpack=True, unpack_type="untar")
     assert os.path.isdir("test-tar")
     shutil.rmtree("test-tar")

helm/config/model_deployments.yaml CHANGED Viewed

@@ -730,6 +730,13 @@ model_deployments:
         thinking_budget_tokens: 10000
         stream: true
+  - name: anthropic/claude-sonnet-4-5-20250929
+    model_name: anthropic/claude-sonnet-4-5-20250929
+    tokenizer_name: anthropic/claude
+    max_sequence_length: 200000
+    client_spec:
+      class_name: "helm.clients.anthropic_client.AnthropicMessagesClient"
   - name: anthropic/stanford-online-all-v4-s3
     deprecated: true # Closed model, not accessible via API
     model_name: anthropic/stanford-online-all-v4-s3
@@ -861,6 +868,20 @@ model_deployments:
         parse_thinking: true
         disable_logprobs: True
+  - name: together/deepseek-r1-distill-llama-70b
+    model_name: deepseek-ai/deepseek-r1-distill-llama-70b
+    tokenizer_name: deepseek-ai/deepseek-r1-distill-llama-70b
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+  - name: together/deepseek-r1-distill-qwen-14b
+    model_name: deepseek-ai/deepseek-r1-distill-qwen-14b
+    tokenizer_name: deepseek-ai/deepseek-r1-distill-qwen-14b
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
   # Gooseai
   # TODO: Migrate these models to use OpenAIClient
@@ -1088,6 +1109,14 @@ model_deployments:
         # - https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations#global-endpoint
         location: global
+  - name: google/gemini-2.5-flash-lite
+    model_name: google/gemini-2.5-flash-lite
+    tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
+    max_sequence_length: 1048576  # Source: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash
+    # TODO: Max output tokens: 65536
+    client_spec:
+      class_name: "helm.clients.vertexai_client.VertexAIChatClient"
   - name: google/gemini-2.5-flash-preview-04-17
     model_name: google/gemini-2.5-flash-preview-04-17
     tokenizer_name: google/gemma-2b  # Gemini has no tokenizer endpoint, so we approximate by using Gemma's tokenizer.
@@ -2616,6 +2645,27 @@ model_deployments:
     client_spec:
       class_name: "helm.clients.openai_client.OpenAIClient"
+  - name: openai/gpt-5-2025-08-07
+    model_name: openai/gpt-5-2025-08-07
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 400000
+    client_spec:
+      class_name: "helm.clients.openai_responses_client.OpenAIResponseClient"
+  - name: openai/gpt-5-mini-2025-08-07
+    model_name: openai/gpt-5-mini-2025-08-07
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 400000
+    client_spec:
+      class_name: "helm.clients.openai_responses_client.OpenAIResponseClient"
+  - name: openai/gpt-5-nano-2025-08-07
+    model_name: openai/gpt-5-nano-2025-08-07
+    tokenizer_name: openai/o200k_base
+    max_sequence_length: 400000
+    client_spec:
+      class_name: "helm.clients.openai_responses_client.OpenAIResponseClient"
   - name: openai/whisper-1_gpt-4o-2024-11-20
     model_name: openai/whisper-1_gpt-4o-2024-11-20
     tokenizer_name: openai/o200k_base
@@ -2860,6 +2910,23 @@ model_deployments:
         openai_model_name: o3-pro-2025-06-10
         reasoning_effort: high
+  ## GPT-OSS
+  - name: together/gpt-oss-20b
+    model_name: openai/gpt-oss-20b
+    tokenizer_name: openai/o200k_harmony
+    # Source: https://platform.openai.com/docs/models/gpt-oss-20b
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+  - name: together/gpt-oss-120b
+    model_name: openai/gpt-oss-120b
+    tokenizer_name: openai/o200k_harmony
+    # Source: https://platform.openai.com/docs/models/gpt-oss-120b
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
   ## Text Similarity Models
   # OpenAI similarity embedding models: https://beta.openai.com/docs/guides/embeddings
   # The number of parameters is guessed based on the number of parameters of the
@@ -3541,6 +3608,16 @@ model_deployments:
       args:
         together_model: togethercomputer/RedPajama-INCITE-7B-Instruct
+  ## Z.ai
+  - name: together/glm-4.5-air-fp8
+    model_name: zai-org/glm-4.5-air-fp8
+    tokenizer_name: zai-org/glm-4.5-air-fp8
+    max_sequence_length: 131072
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        parse_thinking: true
   - name: thudm/cogview2
     model_name: thudm/cogview2
     tokenizer_name: openai/clip-vit-large-patch14
@@ -3816,7 +3893,25 @@ model_deployments:
       class_name: "helm.clients.together_client.TogetherChatClient"
       args:
         parse_thinking: true
+  - name: together/qwen3-next-80b-a3b-thinking
+    model_name: qwen/qwen3-next-80b-a3b-thinking
+    tokenizer_name: qwen/qwen3-next-80b-a3b-thinking
+    max_sequence_length: 262144
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        parse_thinking: true
+  - name: together/qwen3-235b-a22b-instruct-2507-fp8
+    model_name: qwen/qwen3-235b-a22b-instruct-2507-fp8
+    tokenizer_name: qwen/qwen3-235b-a22b-instruct-2507-fp8
+    max_sequence_length: 262144
+    client_spec:
+      class_name: "helm.clients.together_client.TogetherChatClient"
+      args:
+        together_model: Qwen/Qwen3-235B-A22B-Instruct-2507-tput
   - name: huggingface/qwen2.5-7b-instruct-4bit
     model_name: qwen/qwen2.5-7b-instruct
     tokenizer_name: qwen/qwen2.5-7b-instruct
@@ -4256,6 +4351,201 @@ model_deployments:
         args:
             pretrained_model_name_or_path: deepseek-ai/deepseek-coder-6.7b-instruct
+  # AceGPT-v2
+  - name: huggingface/acegpt-v2-8b-chat
+    model_name: freedomintelligence/acegpt-v2-8b-chat
+    tokenizer_name: freedomintelligence/acegpt-v2-8b-chat
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+  - name: huggingface/acegpt-v2-32b-chat
+    model_name: freedomintelligence/acegpt-v2-32b-chat
+    tokenizer_name: freedomintelligence/acegpt-v2-32b-chat
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+  - name: huggingface/acegpt-v2-70b-chat
+    model_name: freedomintelligence/acegpt-v2-70b-chat
+    tokenizer_name: freedomintelligence/acegpt-v2-70b-chat
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+  # ALLaM
+  - name: huggingface/allam-7b-instruct-preview
+    model_name: allam-ai/allam-7b-instruct-preview
+    tokenizer_name: allam-ai/allam-7b-instruct-preview
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+  # SILMA
+  - name: huggingface/silma-9b-instruct-v1.0
+    model_name: silma-ai/silma-9b-instruct-v1.0
+    tokenizer_name: silma-ai/silma-9b-instruct-v1.0
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+  # Jais Family
+  #
+  # NOTE: Jais Family models require `transformers<=4.52.3`.
+  # On more recent versions of transformers, one of the following errors might occur:
+  #
+  #   File "/path/to//site-packages/transformers/models/gemma3n/configuration_gemma3n.py", line 31, in <module>
+  #     from timm.data import ImageNetInfo, infer_imagenet_subset
+  # ImportError: cannot import name 'ImageNetInfo' from 'timm.data' (/path/to/site-packages/timm/data/__init__.py)
+  #
+  #   File "/path/to/.cache/huggingface/modules/transformers_modules/inceptionai/jais-family-590m-chat/90ac4769212b4964c6e81e183140224628228365/modeling_jais.py", line 899, in forward
+  #     past_length = past_key_values[0][0].size(-2)
+  # AttributeError: 'NoneType' object has no attribute 'size'
+  - name: huggingface/jais-family-590m-chat
+    model_name: inceptionai/jais-family-590m-chat
+    tokenizer_name: inceptionai/jais-family-590m-chat
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        trust_remote_code: true
+        revision: 90ac4769212b4964c6e81e183140224628228365
+  - name: huggingface/jais-family-1p3b-chat
+    model_name: inceptionai/jais-family-1p3b-chat
+    tokenizer_name: inceptionai/jais-family-590m-chat
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        trust_remote_code: true
+        revision: 4b93176e2cb00f369b3bc0a8786e4cf16260c804
+  - name: huggingface/jais-family-2p7b-chat
+    model_name: inceptionai/jais-family-2p7b-chat
+    tokenizer_name: inceptionai/jais-family-590m-chat
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        trust_remote_code: true
+        revision: b2bf5d1bcd969ce868f66fb1ad8c3480289ea6b2
+  - name: huggingface/jais-family-6p7b-chat
+    model_name: inceptionai/jais-family-6p7b-chat
+    tokenizer_name: inceptionai/jais-family-590m-chat
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        trust_remote_code: true
+        revision: 683805efe6126c6536feb4aa23317e70222ac94c
+  - name: huggingface/jais-family-13b-chat
+    model_name: inceptionai/jais-family-13b-chat
+    tokenizer_name: inceptionai/jais-family-590m-chat
+    max_sequence_length: 2048
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        trust_remote_code: true
+        revision: 0ef8b4f80429609890816d912b331d3b95864707
+  - name: huggingface/jais-family-30b-8k-chat
+    model_name: inceptionai/jais-family-30b-8k-chat
+    tokenizer_name: inceptionai/jais-family-590m-chat
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        trust_remote_code: true
+        revision: dab185164dd3b79ec9201d7f4cf878ce91ae7e14
+  - name: huggingface/jais-family-30b-16k-chat
+    model_name: inceptionai/jais-family-30b-16k-chat
+    tokenizer_name: inceptionai/jais-family-590m-chat
+    max_sequence_length: 16384
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+        trust_remote_code: true
+        revision: 369f88eeee4d313155f1b1dca4ebec90f9f9f2a4
+  # Jais Adapter
+  - name: huggingface/jais-adapted-7b-chat
+    model_name: inceptionai/jais-adapted-7b-chat
+    tokenizer_name: inceptionai/jais-adapted-7b-chat
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+  - name: huggingface/jais-adapted-13b-chat
+    model_name: inceptionai/jais-adapted-13b-chat
+    tokenizer_name: inceptionai/jais-adapted-7b-chat
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+  - name: huggingface/jais-adapted-70b-chat
+    model_name: inceptionai/jais-adapted-70b-chat
+    tokenizer_name: inceptionai/jais-adapted-7b-chat
+    max_sequence_length: 4096
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+  - name: huggingface/falcon3-1b-instruct
+    model_name: tiiuae/falcon3-1b-instruct
+    tokenizer_name: tiiuae/falcon3-1b-instruct
+    max_sequence_length: 8192
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+  - name: huggingface/falcon3-3b-instruct
+    model_name: tiiuae/falcon3-3b-instruct
+    tokenizer_name: tiiuae/falcon3-1b-instruct
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+  - name: huggingface/falcon3-7b-instruct
+    model_name: tiiuae/falcon3-7b-instruct
+    tokenizer_name: tiiuae/falcon3-7b-instruct
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
+  - name: huggingface/falcon3-10b-instruct
+    model_name: tiiuae/falcon3-10b-instruct
+    tokenizer_name: tiiuae/falcon3-1b-instruct
+    max_sequence_length: 32768
+    client_spec:
+      class_name: "helm.clients.huggingface_client.HuggingFaceClient"
+      args:
+        device_map: auto
 # IBM WatsonX
   - name: ibm/llama-3.3-70b-instruct
     model_name: meta/llama-3.3-70b-instruct
@@ -4590,3 +4880,12 @@ model_deployments:
       class_name: "helm.clients.huggingface_client.HuggingFaceClient"
       args:
         pretrained_model_name_or_path: nicholasKluge/TeenyTinyLlama-460m
+  - name: openrouter/mistral-medium-3.1
+    model_name: mistralai/mistral-medium-3.1
+    tokenizer_name: mistralai/Mistral-7B-v0.1
+    max_sequence_length: 128000
+    client_spec:
+      class_name: "helm.clients.openrouter_client.OpenRouterClient"
+      args:
+        model_name: mistralai/mistral-medium-3.1

crfm-helm 0.5.7__py3-none-any.whl → 0.5.9__py3-none-any.whl

Potentially problematic release.

crfm-helm 0.5.7py3-none-any.whl → 0.5.9py3-none-any.whl