PyPI - crfm-helm - Versions diffs - 0.5.7__py3-none-any.whl → 0.5.9__py3-none-any.whl - Mend

crfm-helm 0.5.7py3-none-any.whl → 0.5.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (333) hide show

helm/clients/image_generation/together_image_generation_client.py CHANGED Viewed

@@ -4,6 +4,7 @@ import requests
 from helm.common.cache import CacheConfig, Cache
 from helm.common.file_caches.file_cache import FileCache
+from helm.common.hierarchical_logger import hexception
 from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
 from helm.common.tokenization_request import (
     TokenizationRequest,
@@ -84,6 +85,7 @@ class TogetherImageGenerationClient(Client):
             response, cached = self._cache.get(cache_key, wrap_request_time(do_it))
         except RuntimeError as e:
+            hexception(e)
             error: str = f"TogetherVisionClient error: {e}"
             return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])

helm/clients/megatron_client.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import Any, Dict, List
 import traceback
 from helm.common.cache import CacheConfig
+from helm.common.hierarchical_logger import hexception
 from helm.common.request import (
     wrap_request_time,
     EMBEDDING_UNAVAILABLE_REQUEST_RESULT,
@@ -103,6 +104,7 @@ class MegatronClient(CachingClient):
         try:
             return self._make_request(request)
         except Exception as e:
+            hexception(e)
             return RequestResult(
                 success=False,
                 cached=False,

helm/clients/mistral_client.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import requests
 from typing import Any, Dict, List, Optional, TypedDict, Union
+from helm.common.hierarchical_logger import hexception
 from helm.proxy.retry import NonRetriableException
 from helm.common.cache import CacheConfig
 from helm.common.media_object import IMAGE_TYPE, TEXT_TYPE
@@ -156,6 +157,7 @@ class MistralAIClient(CachingClient):
                 response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
             except (requests.exceptions.RequestException, AssertionError) as e:
+                hexception(e)
                 error: str = f"MistralClient error: {e}"
                 return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])

helm/clients/moderation_api_client.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import Any, Dict
+from helm.common.hierarchical_logger import hexception
 from helm.common.request import wrap_request_time
 from helm.common.cache import Cache, CacheConfig
 from helm.common.moderations_api_request import (
@@ -64,6 +65,7 @@ class ModerationAPIClient:
             response, cached = self.cache.get(raw_request, wrap_request_time(do_it))
         except openai.OpenAIError as e:
+            hexception(e)
             error: str = f"Moderation API error: {e}"
             return ModerationAPIRequestResult(
                 success=False, cached=False, error=error, flagged=None, flagged_results=None, scores=None

helm/clients/openai_client.py CHANGED Viewed

@@ -10,7 +10,7 @@ from helm.common import multimodal_request_utils
 from helm.common.cache import CacheConfig
 from helm.common.media_object import TEXT_TYPE, MultimediaObject, MediaObject
 from helm.common.request import ErrorFlags, Thinking, wrap_request_time, Request, RequestResult, GeneratedOutput, Token
-from helm.common.hierarchical_logger import hlog, hwarn
+from helm.common.hierarchical_logger import hlog, hwarn, hexception
 from helm.common.object_spec import get_class_by_name
 from helm.common.optional_dependencies import handle_module_not_found_error
 from helm.common.tokenization_request import (
@@ -33,9 +33,12 @@ class OpenAIClientUtils:
     @classmethod
     def is_reasoning_model(cls, model_engine: str) -> bool:
         # All OpenAI  reasoning models start "o[somenumber]", so we regexp for that to future proof things
-        return bool(re.match(r"^o\d+", model_engine))
+        return bool(re.match(r"^o\d+", model_engine)) or bool(re.match(r"^gpt-5", model_engine))
     # Error OpenAI throws when the image in the prompt violates their content policy
+    HARMFUL_INFORMATION_ERROR: str = (
+        "Invalid prompt: we've limited access to this content for safety reasons. This type of information may be used to benefit or to harm people."  # noqa: E501
+    )
     INAPPROPRIATE_IMAGE_ERROR: str = "Your input image may contain content that is not allowed by our safety system"
     INAPPROPRIATE_PROMPT_ERROR: str = "Invalid prompt: your prompt was flagged"
     INAPPROPRIATE_PROMPT_AZURE_ERROR: str = (
@@ -44,12 +47,10 @@ class OpenAIClientUtils:
     INAPPROPRIATE_PROMPT_MICROSOFT_ERROR: str = (
         "The response was filtered due to the prompt triggering Microsoft's content management policy."
     )
-    # OpenAI server error
-    OPENAI_SERVER_ERROR: str = (
-        "The server had an error processing your request. Sorry about that! You can retry your request, "
-        "or contact us through our help center at help.openai.com if you keep seeing this error."
-    )
+    # Grok content safety guidelines error message
+    # TODO: Refactor so that this is owned by the Grok client instead.
+    SAFETY_GUIDELINES_GROK_ERROR: str = "Content violates safety guidelines."
+    USAGE_GUIDELINES_GROK_ERROR: str = "Content violates usage guidelines."
     # Set the finish reason to this if the prompt violates OpenAI's content policy
     CONTENT_POLICY_VIOLATED_FINISH_REASON: str = (
@@ -74,21 +75,14 @@ class OpenAIClientUtils:
                 completions=[empty_completion] * request.num_completions,
                 embedding=[],
             )
-        elif cls.OPENAI_SERVER_ERROR in str(e):
-            # Handle these errors by returning an empty completion to unblock
-            hwarn(f"OpenAI server error for request: {str(request)}")
-            empty_completion = GeneratedOutput(
-                text="",
-                logprob=0,
-                tokens=[],
-                finish_reason={"reason": cls.OPENAI_SERVER_ERROR},
-            )
+        elif cls.HARMFUL_INFORMATION_ERROR in str(e):
             return RequestResult(
-                success=True,
+                success=False,
                 cached=False,
-                request_time=0,
-                completions=[empty_completion] * request.num_completions,
+                error="Prompt blocked by OpenAI's safety filter",
+                completions=[],
                 embedding=[],
+                error_flags=ErrorFlags(is_retriable=False, is_fatal=False),
             )
         elif cls.INAPPROPRIATE_PROMPT_AZURE_ERROR in str(e) or cls.INAPPROPRIATE_PROMPT_MICROSOFT_ERROR in str(e):
             return RequestResult(
@@ -99,7 +93,26 @@ class OpenAIClientUtils:
                 embedding=[],
                 error_flags=ErrorFlags(is_retriable=False, is_fatal=False),
             )
+        elif cls.SAFETY_GUIDELINES_GROK_ERROR in str(e):
+            return RequestResult(
+                success=False,
+                cached=False,
+                error="Grok API error: Content violates safety guidelines",
+                completions=[],
+                embedding=[],
+                error_flags=ErrorFlags(is_retriable=False, is_fatal=False),
+            )
+        elif cls.USAGE_GUIDELINES_GROK_ERROR in str(e):
+            return RequestResult(
+                success=False,
+                cached=False,
+                error="Grok API error: Content violates usage guidelines",
+                completions=[],
+                embedding=[],
+                error_flags=ErrorFlags(is_retriable=False, is_fatal=False),
+            )
+        hexception(e)
         error: str = f"OpenAI error: {e}"
         return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
@@ -158,6 +171,7 @@ class OpenAIClient(CachingClient):
             cache_key = self._get_cache_key(raw_request, request)
             response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
         except openai.OpenAIError as e:
+            hexception(e)
             error: str = f"OpenAI error: {e}"
             return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
@@ -424,6 +438,7 @@ class OpenAIClient(CachingClient):
             cache_key = self._get_cache_key(raw_request, request)
             response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
         except openai.OpenAIError as e:
+            hexception(e)
             error: str = f"OpenAI error: {e}"
             return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
@@ -479,6 +494,7 @@ class OpenAIClient(CachingClient):
             cache_key = self._get_cache_key({"audio": audio_path, "model": model}, request)
             response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
         except openai.OpenAIError as e:
+            hexception(e)
             error: str = f"OpenAI error: {e}"
             return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])

helm/clients/openai_responses_client.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Any, Dict, List, Optional, Union
 from helm.clients.openai_client import OpenAIClientUtils
 from helm.common.cache import CacheConfig
+from helm.common.hierarchical_logger import hwarn
 from helm.common.media_object import TEXT_TYPE
 from helm.common.request import (
     Thinking,
@@ -60,7 +61,28 @@ class OpenAIResponseClient(CachingClient):
     def _make_raw_request(self, request: Request) -> dict[str, Any]:
         input: Union[str, List[Dict[str, Any]]]
-        if request.multimodal_prompt is not None:
+        if (
+            (request.prompt and request.messages)
+            or (request.prompt and request.multimodal_prompt)
+            or (request.messages and request.multimodal_prompt)
+        ):
+            raise ValueError(
+                f"More than one of `prompt`, `messages` and `multimodal_prompt` was set in request: {request}"
+            )
+        if request.messages is not None:
+            # Checks that all messages have a role and some content
+            for message in request.messages:
+                if not message.get("role") or not message.get("content"):
+                    raise ValueError("All messages must have a role and content")
+            # Checks that the last role is "user"
+            if request.messages[-1]["role"] != "user":
+                raise ValueError("Last message must have role 'user'")
+            if request.prompt != "":
+                hwarn("Since message is set, prompt will be ignored")
+            input = request.messages
+        elif request.multimodal_prompt is not None:
             content = []
             request.validate()
             for media_object in request.multimodal_prompt.media_objects:
@@ -101,6 +123,8 @@ class OpenAIResponseClient(CachingClient):
         # Plus other changes
         model_engine: str = request.model_engine
         if OpenAIClientUtils.is_reasoning_model(model_engine):
+            if "reasoning" not in raw_request:
+                raw_request["reasoning"] = {}
             raw_request["reasoning"]["summary"] = "detailed"
             # Avoid error:
             # "Error code: 400 - {'error': {'message': "Unsupported parameter: 'temperature' is
@@ -150,9 +174,9 @@ class OpenAIResponseClient(CachingClient):
                 ]  # one of "message" or "reasoning" from API observation, but can also include tool calls
                 if output_type == "reasoning":
-                    reasoning_output += "\n".join([raw_output["text"] for raw_output in output["summary"]])
+                    reasoning_output += "\n\n".join([raw_output["text"] for raw_output in output["summary"]])
                 elif output_type == "message":
-                    text_output += "\n".join([raw_output["text"] for raw_output in output["content"]])
+                    text_output += "\n\n".join([raw_output["text"] for raw_output in output["content"]])
                 # (Other output types are ignored)
             completion = truncate_and_tokenize_response_text(

helm/clients/openrouter_client.py ADDED Viewed

@@ -0,0 +1,31 @@
+import os
+from typing import Optional
+from helm.clients.openai_client import OpenAIClient
+from helm.common.cache import CacheConfig
+from helm.tokenizers.tokenizer import Tokenizer
+class OpenRouterClient(OpenAIClient):
+    def __init__(
+        self,
+        tokenizer_name: str,
+        tokenizer: Tokenizer,
+        cache_config: CacheConfig,
+        api_key: Optional[str] = None,
+        model_name: Optional[str] = None,
+        output_processor: Optional[str] = None,
+    ):
+        self.api_key = api_key or os.getenv("OPENROUTER_API_KEY")
+        self.base_url = "https://openrouter.ai/api/v1/"
+        super().__init__(
+            tokenizer,
+            tokenizer_name,
+            cache_config=cache_config,
+            output_processor=output_processor,
+            base_url=self.base_url,
+            api_key=self.api_key,
+        )
+        self.model_name = model_name
+    def _get_model_for_request(self, request):
+        return self.model_name or request.model

helm/clients/palmyra_client.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Any, Dict, List
 from helm.clients.openai_client import OpenAIClient
 from helm.common.cache import CacheConfig
-from helm.common.hierarchical_logger import hwarn
+from helm.common.hierarchical_logger import hexception, hwarn
 from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput, Token, ErrorFlags
 from helm.common.tokenization_request import (
     TokenizationRequest,
@@ -99,6 +99,7 @@ class PalmyraClient(CachingClient):
                 response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
             except (requests.exceptions.RequestException, AssertionError) as e:
+                hexception(e)
                 error: str = f"PalmyraClient error: {e}"
                 return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])

helm/clients/reka_client.py CHANGED Viewed

@@ -6,7 +6,7 @@ from helm.proxy.retry import NonRetriableException
 from helm.common.cache import CacheConfig
 from helm.common.media_object import TEXT_TYPE
 from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput
-from helm.common.hierarchical_logger import hwarn
+from helm.common.hierarchical_logger import hexception, hwarn
 from helm.common.optional_dependencies import handle_module_not_found_error
 from helm.tokenizers.tokenizer import Tokenizer
 from helm.clients.client import CachingClient, truncate_and_tokenize_response_text
@@ -167,6 +167,7 @@ class RekaClient(CachingClient):
                 response, cached = self.cache.get(raw_request, wrap_request_time(do_it))
             except (requests.exceptions.RequestException, AssertionError) as e:
+                hexception(e)
                 error: str = f"RekaClient error: {e}"
                 return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])

helm/clients/stanfordhealthcare_azure_openai_client.py CHANGED Viewed

@@ -39,7 +39,7 @@ class StanfordHealthCareAzureOpenAIClient(AzureOpenAIClient):
                 tokenizer=tokenizer,
                 tokenizer_name=tokenizer_name,
                 cache_config=cache_config,
-                api_key="unused",
+                api_key=api_key,
                 base_url=base_url,
                 azure_openai_deployment_name=openai_model_name,
                 api_version=api_version,
@@ -50,7 +50,7 @@ class StanfordHealthCareAzureOpenAIClient(AzureOpenAIClient):
                 tokenizer=tokenizer,
                 tokenizer_name=tokenizer_name,
                 cache_config=cache_config,
-                api_key="unused",
+                api_key=api_key,
                 endpoint=endpoint,
                 azure_openai_deployment_name=openai_model_name,
                 api_version=api_version,

helm/clients/stanfordhealthcare_http_model_client.py CHANGED Viewed

@@ -5,6 +5,7 @@ from dataclasses import asdict
 from typing import Any, Dict, List, Optional
 from helm.common.cache import CacheConfig
+from helm.common.hierarchical_logger import hexception
 from helm.common.request import (
     wrap_request_time,
     Request,
@@ -82,6 +83,7 @@ class StanfordHealthCareHTTPModelClient(CachingClient, ABC):
                 request_time=response["request_time"],
             )
         except requests.exceptions.RequestException as e:
+            hexception(e)
             return RequestResult(success=False, cached=False, error=f"Request error: {e}", completions=[], embedding=[])
     @abstractmethod

helm/clients/test_openrouter_client.py ADDED Viewed

@@ -0,0 +1,69 @@
+import os
+import pytest
+import tempfile
+from helm.common.cache import BlackHoleCacheConfig, SqliteCacheConfig
+from helm.common.request import Request
+from helm.clients.openrouter_client import OpenRouterClient
+from helm.tokenizers.huggingface_tokenizer import HuggingFaceTokenizer
+class TestOpenRouterClient:
+    def setup_method(self, method):
+        cache_file = tempfile.NamedTemporaryFile(delete=False)
+        self.cache_path: str = cache_file.name
+        self.tokenizer_name = "mistralai/Mistral-7B-v0.1"
+        self.tokenizer = HuggingFaceTokenizer(
+            cache_config=BlackHoleCacheConfig(),
+            tokenizer_name=self.tokenizer_name,
+        )
+    def teardown_method(self, method):
+        os.remove(self.cache_path)
+    @pytest.mark.parametrize(
+        "model_name,test_input,expected_model",
+        [
+            (
+                "mistralai/mistral-medium-3.1",
+                Request(
+                    model="mistralai/mistral-medium-3.1",
+                    model_deployment="openrouter/mistral-medium-3.1",
+                ),
+                "mistralai/mistral-medium-3.1",
+            ),
+            (
+                None,
+                Request(model="openai/gpt-oss-20b:free", model_deployment="openrouter/gpt-oss-20b:free"),
+                "openai/gpt-oss-20b:free",
+            ),
+        ],
+    )
+    def test_get_model_for_request(self, model_name, test_input, expected_model):
+        client = OpenRouterClient(
+            tokenizer_name=self.tokenizer_name,
+            tokenizer=self.tokenizer,
+            cache_config=SqliteCacheConfig(self.cache_path),
+            model_name=model_name,
+            api_key="test_key",
+        )
+        assert client._get_model_for_request(test_input) == expected_model
+    def test_api_key_env_var(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "test_key")
+        client = OpenRouterClient(
+            tokenizer_name=self.tokenizer_name,
+            tokenizer=self.tokenizer,
+            cache_config=SqliteCacheConfig(self.cache_path),
+        )
+        assert client.api_key == "test_key"
+    def test_api_key_argument(self):
+        client = OpenRouterClient(
+            tokenizer_name=self.tokenizer_name,
+            tokenizer=self.tokenizer,
+            cache_config=BlackHoleCacheConfig(),
+            api_key="explicit_key",
+        )
+        assert client.api_key == "explicit_key"

helm/clients/together_client.py CHANGED Viewed

@@ -9,6 +9,7 @@ import requests
 from retrying import retry
 from helm.common.cache import CacheConfig
+from helm.common.hierarchical_logger import hexception
 from helm.common.media_object import IMAGE_TYPE, TEXT_TYPE
 from helm.common.object_spec import get_class_by_name
 from helm.common.optional_dependencies import handle_module_not_found_error
@@ -99,7 +100,7 @@ class JobNotFinishedError(TogetherClientError):
     pass
-def _parse_thinking(input: str) -> Tuple[str, str]:
+def _parse_thinking_deepseek_r1(input: str) -> Tuple[str, str]:
     """Return a tuple of thinking text and output text."""
     match = re.match(r"<think>\n(.*)\n</think>\n{0,2}(.*)", input, re.DOTALL)
     if match:
@@ -112,6 +113,44 @@ def _parse_thinking(input: str) -> Tuple[str, str]:
     return (input, "")
+def _parse_thinking_qwen3(input: str) -> Tuple[str, str]:
+    """Return a tuple of thinking text and output text."""
+    match = re.match(r"<think>\n(.*)\n</think>\n{0,2}(.*)", input, re.DOTALL)
+    if match:
+        return (match.group(1), match.group(2))
+    match = re.match(r"<think>\n?(.*)", input, re.DOTALL)
+    if match:
+        return (match.group(1), "")
+    return (input, "")
+def _parse_thinking_glm_4_5(input: str) -> Tuple[str, str]:
+    """Return a tuple of thinking text and output text."""
+    match = re.match(r"\n<think>(.*)</think>(.*)", input, re.DOTALL)
+    if match:
+        return (match.group(1), match.group(2))
+    match = re.match(r"\n<think>(.*)", input, re.DOTALL)
+    if match:
+        return (match.group(1), "")
+    return (input, "")
+def _parse_thinking(input: str, model_name: str) -> Tuple[str, str]:
+    # TODO: Come up with a more sustainable extensible way of doing this.
+    if "deepseek-r1" in model_name:
+        return _parse_thinking_deepseek_r1(input)
+    elif "qwen3" in model_name:
+        return _parse_thinking_qwen3(input)
+    elif "glm-4.5" in model_name:
+        return _parse_thinking_glm_4_5(input)
+    else:
+        raise Exception(f"No thinking parser available for model {model_name}")
 class TogetherClient(CachingClient):
     """
     Client for the models where we evaluate offline. Since the queries are handled offline, the `TogetherClient` just
@@ -235,6 +274,7 @@ class TogetherClient(CachingClient):
             try:
                 response, cached = self.cache.get(cache_key, wrap_request_time(do_it_sync))
             except Exception as error:
+                hexception(error)
                 return RequestResult(
                     success=False,
                     cached=False,
@@ -346,9 +386,8 @@ class TogetherChatClient(CachingClient):
         self._client = Together(api_key=api_key)
         self._together_model = together_model
         self._disable_logprobs = bool(disable_logprobs)
-        # self.output_processor is actually a function, not a class
         self._parse_thinking = bool(parse_thinking)
+        # self.output_processor is actually a function, not a class
         self.output_processor: Optional[Callable[[str], str]] = (
             get_class_by_name(output_processor) if output_processor else None
         )
@@ -418,6 +457,7 @@ class TogetherChatClient(CachingClient):
             raw_response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
             response = ChatCompletionResponse.model_validate(raw_response)
         except Exception as error:
+            hexception(error)
             return RequestResult(
                 success=False,
                 cached=False,
@@ -444,15 +484,15 @@ class TogetherChatClient(CachingClient):
             if self.output_processor:
                 output_text = self.output_processor(output_text)
+            thinking: Optional[Thinking] = None
             if self._parse_thinking:
-                thinking_text, output_text = _parse_thinking(output_text)
-                generated_outputs.append(
-                    GeneratedOutput(
-                        text=output_text, logprob=logprob, tokens=tokens, thinking=Thinking(text=thinking_text)
-                    )
-                )
-            else:
-                generated_outputs.append(GeneratedOutput(text=output_text, logprob=logprob, tokens=tokens))
+                thinking_text, output_text = _parse_thinking(output_text, request.model)
+                thinking = Thinking(text=thinking_text)
+            elif hasattr(choice.message, "reasoning_content"):
+                thinking = Thinking(text=choice.message.reasoning_content)
+            generated_outputs.append(
+                GeneratedOutput(text=output_text, logprob=logprob, tokens=tokens, thinking=thinking)
+            )
         return RequestResult(
             success=True,
             cached=cached,
@@ -525,6 +565,7 @@ class TogetherCompletionClient(CachingClient):
             raw_response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
             response = CompletionResponse.model_validate(raw_response)
         except Exception as error:
+            hexception(error)
             return RequestResult(
                 success=False,
                 cached=False,

helm/clients/vertexai_client.py CHANGED Viewed

@@ -4,6 +4,7 @@ from threading import Lock
 from typing import Any, Dict, Mapping, Optional, List, Union, cast
 from helm.common.cache import CacheConfig
+from helm.common.hierarchical_logger import hexception
 from helm.common.multimodal_request_utils import get_contents_as_bytes
 from helm.common.media_object import TEXT_TYPE
 from helm.common.optional_dependencies import handle_module_not_found_error
@@ -152,6 +153,7 @@ class VertexAITextClient(VertexAIClient):
             response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
         except (requests.exceptions.RequestException, AssertionError) as e:
+            hexception(e)
             error: str = f"VertexAITextClient error: {e}"
             return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
@@ -276,8 +278,14 @@ class VertexAIChatClient(VertexAIClient):
                     if not candidate.content:
                         raise VertexAIContentBlockedError(f"No content in candidate: {candidate}")
                     if not candidate.content.parts:
-                        raise VertexAIContentBlockedError(f"No content parts in candidate: {candidate}")
-                    predictions.append({"text": candidate.content.text})
+                        if candidate.finish_reason == 2:  # MAX_TOKENS
+                            # This means that there is no text output because the maximum number of tokens were
+                            # reached during thinking.
+                            predictions.append({"text": ""})
+                        else:
+                            raise VertexAIContentBlockedError(f"No content parts in candidate: {candidate}")
+                    else:
+                        predictions.append({"text": candidate.content.text})
                     # TODO: Extract more information from the response
                 return {"predictions": predictions}
@@ -304,6 +312,7 @@ class VertexAIChatClient(VertexAIClient):
                 error_flags=ErrorFlags(is_retriable=False, is_fatal=False),
             )
         except (requests.exceptions.RequestException, AssertionError) as e:
+            hexception(e)
             error: str = f"VertexAITextClient error: {e}"
             return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
@@ -434,6 +443,7 @@ class VertexAIChatClient(VertexAIClient):
                 cache_key = self.make_cache_key_with_safety_settings_preset(raw_cache_key, request)
                 response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
             except requests.exceptions.RequestException as e:
+                hexception(e)
                 error: str = f"Gemini Vision error: {e}"
                 return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
             except VertexAIContentBlockedError as e:

helm/clients/vision_language/huggingface_vision2seq_client.py CHANGED Viewed

@@ -8,7 +8,7 @@ import torch
 from helm.common.cache import CacheConfig
 from helm.common.gpu_utils import get_torch_device_name, is_cuda_available
-from helm.common.hierarchical_logger import hlog, htrack_block
+from helm.common.hierarchical_logger import hexception, hlog, htrack_block
 from helm.common.media_object import TEXT_TYPE
 from helm.common.request import Request, RequestResult, GeneratedOutput, Token
 from helm.common.request import wrap_request_time
@@ -125,6 +125,7 @@ class HuggingFaceVision2SeqClient(CachingClient):
                 )
                 result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
             except RuntimeError as model_error:
+                hexception(model_error)
                 return RequestResult(success=False, cached=False, error=str(model_error), completions=[], embedding=[])
             for text in result["output"]:

helm/clients/vision_language/huggingface_vlm_client.py CHANGED Viewed

@@ -5,6 +5,7 @@ from transformers import pipeline
 from transformers.pipelines import ImageToTextPipeline
 from helm.common.cache import CacheConfig
+from helm.common.hierarchical_logger import hexception
 from helm.common.images_utils import open_image
 from helm.common.media_object import TEXT_TYPE
 from helm.common.optional_dependencies import handle_module_not_found_error
@@ -93,6 +94,7 @@ class HuggingFaceVLMClient(CachingClient):
             )
             result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
         except RuntimeError as e:
+            hexception(e)
             return RequestResult(success=False, cached=False, error=str(e), completions=[], embedding=[])
         output: str = result["generated_text"]

helm/clients/vision_language/idefics_client.py CHANGED Viewed

@@ -8,7 +8,7 @@ from transformers import IdeficsForVisionText2Text, AutoProcessor, IdeficsProces
 from helm.common.cache import CacheConfig
 from helm.common.images_utils import open_image
 from helm.common.gpu_utils import get_torch_device_name
-from helm.common.hierarchical_logger import hlog, htrack_block
+from helm.common.hierarchical_logger import hexception, hlog, htrack_block
 from helm.common.media_object import TEXT_TYPE
 from helm.common.optional_dependencies import handle_module_not_found_error
 from helm.common.request import Request, RequestResult, GeneratedOutput, Token
@@ -137,6 +137,7 @@ class IDEFICSClient(CachingClient):
                 )
                 result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
             except RuntimeError as model_error:
+                hexception(model_error)
                 return RequestResult(success=False, cached=False, error=str(model_error), completions=[], embedding=[])
             for text in result["output"]:

helm/clients/vision_language/open_flamingo_client.py CHANGED Viewed

@@ -5,7 +5,7 @@ import torch
 from huggingface_hub import hf_hub_download
 from helm.common.cache import CacheConfig
-from helm.common.hierarchical_logger import hlog, htrack_block
+from helm.common.hierarchical_logger import hexception, hlog, htrack_block
 from helm.common.images_utils import open_image
 from helm.common.gpu_utils import get_torch_device_name
 from helm.common.media_object import TEXT_TYPE
@@ -131,6 +131,7 @@ class OpenFlamingoClient(CachingClient):
             )
             result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
         except RuntimeError as ex:
+            hexception(ex)
             return RequestResult(success=False, cached=False, error=str(ex), completions=[], embedding=[])
         completions: List[GeneratedOutput] = []

crfm-helm 0.5.7__py3-none-any.whl → 0.5.9__py3-none-any.whl

Potentially problematic release.

crfm-helm 0.5.7py3-none-any.whl → 0.5.9py3-none-any.whl