PyPI - crfm-helm - Versions diffs - 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

crfm-helm 0.5.1py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (98) hide show

{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/METADATA +13 -3
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/RECORD +96 -63
helm/benchmark/adaptation/adapter_spec.py +32 -31
helm/benchmark/annotation/air_bench_annotator.py +64 -0
helm/benchmark/annotation/annotator_factory.py +6 -0
helm/benchmark/annotation/live_qa_annotator.py +84 -0
helm/benchmark/annotation/medication_qa_annotator.py +81 -0
helm/benchmark/augmentations/translate_perturbation.py +1 -0
helm/benchmark/huggingface_registration.py +16 -6
helm/benchmark/metrics/air_bench_metrics.py +56 -0
helm/benchmark/metrics/fin_qa_metrics.py +60 -0
helm/benchmark/metrics/fin_qa_metrics_helper.py +398 -0
helm/benchmark/metrics/gpt4v_originality_critique_metrics.py +126 -0
helm/benchmark/metrics/instruction_following_critique_metrics.py +1 -0
helm/benchmark/metrics/live_qa_metrics.py +23 -0
helm/benchmark/metrics/medication_qa_metrics.py +23 -0
helm/benchmark/metrics/prometheus_vision_critique_metrics.py +185 -0
helm/benchmark/metrics/reka_vibe_critique_metrics.py +158 -0
helm/benchmark/metrics/unitxt_metrics.py +20 -10
helm/benchmark/metrics/vision_language/emd_utils.py +4 -0
helm/benchmark/metrics/vision_language/image_metrics.py +29 -71
helm/benchmark/presentation/schema.py +54 -4
helm/benchmark/presentation/test_schema.py +11 -0
helm/benchmark/run.py +16 -2
helm/benchmark/run_expander.py +77 -0
helm/benchmark/run_spec_factory.py +4 -0
helm/benchmark/run_specs/air_bench_run_specs.py +40 -0
helm/benchmark/run_specs/classic_run_specs.py +15 -11
helm/benchmark/run_specs/decodingtrust_run_specs.py +3 -1
helm/benchmark/run_specs/experimental_run_specs.py +33 -0
helm/benchmark/run_specs/finance_run_specs.py +33 -0
helm/benchmark/run_specs/vlm_run_specs.py +168 -45
helm/benchmark/scenarios/air_bench_scenario.py +50 -0
helm/benchmark/scenarios/ci_mcqa_scenario.py +80 -0
helm/benchmark/scenarios/entity_data_imputation_scenario.py +8 -2
helm/benchmark/scenarios/fin_qa_scenario.py +117 -0
helm/benchmark/scenarios/test_air_bench_scenario.py +27 -0
helm/benchmark/scenarios/vision_language/bingo_scenario.py +3 -3
helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +13 -2
helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +1 -5
helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +0 -4
helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +4 -2
helm/benchmark/scenarios/vision_language/pairs_scenario.py +6 -5
helm/benchmark/scenarios/vision_language/unicorn_scenario.py +3 -3
helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +95 -0
helm/benchmark/static/schema_air_bench.yaml +3149 -0
helm/benchmark/static/schema_classic.yaml +3 -59
helm/benchmark/static/schema_finance.yaml +143 -0
helm/benchmark/static/schema_image2structure.yaml +254 -111
helm/benchmark/static/schema_instruction_following.yaml +3 -52
helm/benchmark/static/schema_lite.yaml +3 -61
helm/benchmark/static/schema_medical.yaml +255 -0
helm/benchmark/static/schema_mmlu.yaml +3 -61
helm/benchmark/static/schema_tables.yaml +200 -0
helm/benchmark/static/schema_thai.yaml +223 -0
helm/benchmark/static/schema_unitxt.yaml +3 -61
helm/benchmark/static/{schema_vlm.yaml → schema_vhelm.yaml} +294 -293
helm/benchmark/static/schema_vhelm_lite.yaml +4 -59
helm/benchmark/static_build/assets/air-overview-d2e6c49f.png +0 -0
helm/benchmark/static_build/assets/index-30dbceba.js +10 -0
helm/benchmark/static_build/assets/index-66b02d40.css +1 -0
helm/benchmark/static_build/assets/overview-74aea3d8.png +0 -0
helm/benchmark/static_build/assets/process-flow-bd2eba96.png +0 -0
helm/benchmark/static_build/index.html +2 -2
helm/clients/anthropic_client.py +43 -9
helm/clients/auto_client.py +11 -0
helm/clients/client.py +24 -7
helm/clients/cohere_client.py +98 -3
helm/clients/huggingface_client.py +71 -12
helm/clients/openai_client.py +9 -2
helm/clients/reka_client.py +189 -0
helm/clients/test_client.py +3 -3
helm/clients/test_huggingface_client.py +19 -3
helm/clients/test_together_client.py +72 -2
helm/clients/together_client.py +129 -23
helm/clients/vertexai_client.py +62 -18
helm/clients/vision_language/huggingface_vlm_client.py +1 -0
helm/clients/vision_language/paligemma_client.py +146 -0
helm/clients/vision_language/palmyra_vision_client.py +84 -0
helm/clients/yi_client.py +31 -0
helm/common/critique_request.py +10 -1
helm/common/images_utils.py +19 -0
helm/config/model_deployments.yaml +412 -18
helm/config/model_metadata.yaml +447 -25
helm/config/tokenizer_configs.yaml +93 -1
helm/proxy/critique/model_critique_client.py +32 -4
helm/proxy/services/server_service.py +1 -1
helm/tokenizers/auto_tokenizer.py +1 -1
helm/tokenizers/cohere_tokenizer.py +44 -2
helm/tokenizers/huggingface_tokenizer.py +36 -13
helm/tokenizers/test_cohere_tokenizer.py +39 -0
helm/tokenizers/test_huggingface_tokenizer.py +5 -1
helm/benchmark/static_build/assets/index-737eef9e.js +0 -10
helm/benchmark/static_build/assets/index-878a1094.css +0 -1
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/LICENSE +0 -0
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/WHEEL +0 -0
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/entry_points.txt +0 -0
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/top_level.txt +0 -0

helm/clients/test_client.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from helm.common.cache import BlackHoleCacheConfig
-from helm.tokenizers.huggingface_tokenizer import HuggingFaceTokenizer
+from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
+from helm.tokenizers.auto_tokenizer import AutoTokenizer
 from .client import truncate_sequence, truncate_and_tokenize_response_text
 from typing import List
 from helm.common.request import Request, GeneratedOutput, Token
@@ -52,8 +52,8 @@ def test_truncate_sequence():
 def test_truncate_and_tokenize_response_text():
-    tokenizer = HuggingFaceTokenizer(BlackHoleCacheConfig())
     tokenizer_name = "huggingface/gpt2"
+    tokenizer = AutoTokenizer(credentials={}, cache_backend_config=BlackHoleCacheBackendConfig())
     # No truncation
     response = truncate_and_tokenize_response_text(

helm/clients/test_huggingface_client.py CHANGED Viewed

@@ -3,12 +3,18 @@ import pytest
 from helm.common.cache import BlackHoleCacheConfig
 from helm.common.request import Request, RequestResult
 from helm.clients.huggingface_client import HuggingFaceClient
+from helm.tokenizers.huggingface_tokenizer import HuggingFaceTokenizer
 class TestHuggingFaceClient:
     def test_gpt2(self):
+        tokenizer = HuggingFaceTokenizer(
+            BlackHoleCacheConfig(), "huggingface/gpt2", pretrained_model_name_or_path="openai/gpt2"
+        )
         client = HuggingFaceClient(
-            cache_config=BlackHoleCacheConfig(), pretrained_model_name_or_path="openai-community/gpt2"
+            cache_config=BlackHoleCacheConfig(),
+            tokenizer=tokenizer,
+            pretrained_model_name_or_path="openai-community/gpt2",
         )
         prompt: str = "I am a computer scientist."
         result: RequestResult = client.make_request(
@@ -29,8 +35,13 @@ class TestHuggingFaceClient:
     @pytest.mark.skip(reason="GPT-J 6B is 22 GB and extremely slow without a GPU.")
     def test_gptj_6b(self):
+        tokenizer = HuggingFaceTokenizer(
+            BlackHoleCacheConfig(), "huggingface/gpt2", pretrained_model_name_or_path="openai/gpt2"
+        )
         client = HuggingFaceClient(
-            cache_config=BlackHoleCacheConfig(), pretrained_model_name_or_path="openai-community/gpt2"
+            cache_config=BlackHoleCacheConfig(),
+            tokenizer=tokenizer,
+            pretrained_model_name_or_path="openai-community/gpt2",
         )
         result: RequestResult = client.make_request(
             Request(
@@ -45,8 +56,13 @@ class TestHuggingFaceClient:
         assert len(result.completions) == 3
     def test_logprob(self):
+        tokenizer = HuggingFaceTokenizer(
+            BlackHoleCacheConfig(), "huggingface/gpt2", pretrained_model_name_or_path="openai/gpt2"
+        )
         client = HuggingFaceClient(
-            cache_config=BlackHoleCacheConfig(), pretrained_model_name_or_path="openai-community/gpt2"
+            cache_config=BlackHoleCacheConfig(),
+            tokenizer=tokenizer,
+            pretrained_model_name_or_path="openai-community/gpt2",
         )
         prompt: str = "I am a computer scientist."
         result: RequestResult = client.make_request(

helm/clients/test_together_client.py CHANGED Viewed

@@ -2,10 +2,10 @@ import os
 import pytest
 import tempfile
-from helm.common.cache import SqliteCacheConfig
+from helm.common.cache import BlackHoleCacheConfig, SqliteCacheConfig
 from helm.common.request import Request
-from .together_client import TogetherClient, TogetherClientError
+from .together_client import TogetherClient, TogetherChatClient, TogetherCompletionClient, TogetherClientError
 class TestTogetherClient:
@@ -107,3 +107,73 @@ class TestTogetherClient:
                     model_deployment="together/redpajama-incite-base-3b-v1",
                 )
             )
+@pytest.mark.models
+def test_together_chat_client_make_request():
+    # Requires setting TOGETHER_API_KEY environment variable.
+    client = TogetherChatClient(
+        cache_config=BlackHoleCacheConfig(), api_key=None, together_model="meta-llama/Llama-3-8b-chat-hf"
+    )
+    request = Request(
+        model="meta/llama-3-8b-instruct",
+        model_deployment="together/llama-3-8b-instruct",
+        prompt="Elephants are one of the most",
+        temperature=0.0,
+        max_tokens=10,
+    )
+    result = client.make_request(request)
+    assert result.success
+    assert not result.cached
+    assert result.embedding == []
+    assert len(result.completions) == 1
+    assert result.completions[0].text == "...intelligent animals on Earth!assistant"
+    assert result.completions[0].logprob == 0.0
+    result_token_strings = [token.text for token in result.completions[0].tokens]
+    assert result_token_strings == [
+        "...",
+        "int",
+        "elligent",
+        " animals",
+        " on",
+        " Earth",
+        "!",
+        "<|eot_id|>",
+        "<|start_header_id|>",
+        "assistant",
+    ]
+@pytest.mark.models
+def test_together_completion_client_make_request():
+    # Requires setting TOGETHER_API_KEY environment variable.
+    client = TogetherCompletionClient(
+        cache_config=BlackHoleCacheConfig(), api_key=None, together_model="meta-llama/Llama-3-8b-hf"
+    )
+    request = Request(
+        model="meta/llama-3-8b",
+        model_deployment="together/llama-3-8b",
+        prompt="Elephants are one of the most",
+        temperature=0.0,
+        max_tokens=10,
+    )
+    result = client.make_request(request)
+    assert result.success
+    assert not result.cached
+    assert result.embedding == []
+    assert len(result.completions) == 1
+    assert result.completions[0].text == " popular animals in the world. They are known for"
+    assert result.completions[0].logprob == 0.0
+    result_token_strings = [token.text for token in result.completions[0].tokens]
+    assert result_token_strings == [
+        " popular",
+        " animals",
+        " in",
+        " the",
+        " world",
+        ".",
+        " They",
+        " are",
+        " known",
+        " for",
+    ]

helm/clients/together_client.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from copy import deepcopy
 from itertools import zip_longest
-from typing import List, Dict, Any, Optional, TypedDict, Union
+import threading
+from typing import List, Dict, Any, Mapping, Optional, TypedDict, Union
 import requests
 from retrying import retry
@@ -12,7 +13,7 @@ from helm.clients.client import CachingClient, truncate_sequence, cleanup_str
 try:
     from together import Together
-    from together.types import ChatCompletionResponse
+    from together.types import ChatCompletionResponse, CompletionResponse
 except ModuleNotFoundError as e:
     handle_module_not_found_error(e, ["together"])
@@ -282,6 +283,24 @@ class TogetherClient(CachingClient):
             )
+_MODEL_TO_DEFAULT_STOP_TOKENS: Optional[Mapping[str, List[str]]] = None
+_MODEL_TO_DEFAULT_STOP_TOKENS_LOCK = threading.Lock()
+def get_default_stop_tokens_for_model(together_model: str, together_client: Together) -> List[str]:
+    global _MODEL_TO_DEFAULT_STOP_TOKENS
+    global _MODEL_TO_DEFAULT_STOP_TOKENS_LOCK
+    with _MODEL_TO_DEFAULT_STOP_TOKENS_LOCK:
+        if _MODEL_TO_DEFAULT_STOP_TOKENS is None:
+            _MODEL_TO_DEFAULT_STOP_TOKENS = {}
+            for model in together_client.models.list():
+                _MODEL_TO_DEFAULT_STOP_TOKENS[model.id.lower()] = model.config["stop"]
+    stop_tokens = _MODEL_TO_DEFAULT_STOP_TOKENS.get(together_model.lower())
+    if stop_tokens is None:
+        raise ValueError(f"Unknown together_model {together_model}")
+    return stop_tokens
 class TogetherRawChatRequest(TypedDict):
     messages: List[Dict[str, str]]
     model: str
@@ -295,34 +314,38 @@ class TogetherRawChatRequest(TypedDict):
     n: int
-def convert_to_raw_chat_request(request: Request) -> TogetherRawChatRequest:
-    if request.messages:
-        messages = request.messages
-    else:
-        messages = [{"role": "user", "content": request.prompt}]
-    return {
-        "messages": messages,
-        "model": request.model,
-        "max_tokens": request.max_tokens,
-        "stop": request.stop_sequences,
-        "temperature": request.temperature,
-        "top_p": request.top_p,
-        "top_k": request.top_k_per_token,
-        "logprobs": min(request.top_k_per_token, 1),
-        "echo": request.echo_prompt,
-        "n": request.num_completions,
-    }
 class TogetherChatClient(CachingClient):
     """Client that uses the Python Together library for chat models."""
-    def __init__(self, cache_config: CacheConfig, api_key: str, together_model: Optional[str] = None):
+    def __init__(self, cache_config: CacheConfig, api_key: Optional[str], together_model: Optional[str] = None):
         super().__init__(cache_config=cache_config)
         self._client = Together(api_key=api_key)
+        self._together_model = together_model
+    def convert_to_raw_chat_request(self, request: Request) -> TogetherRawChatRequest:
+        if request.messages:
+            messages = request.messages
+        else:
+            messages = [{"role": "user", "content": request.prompt}]
+        if self._together_model is not None:
+            model = self._together_model
+        else:
+            model = request.model
+        return {
+            "messages": messages,
+            "model": model,
+            "max_tokens": request.max_tokens,
+            "stop": request.stop_sequences + get_default_stop_tokens_for_model(model, self._client),
+            "temperature": request.temperature,
+            "top_p": request.top_p,
+            "top_k": request.top_k_per_token,
+            "logprobs": min(request.top_k_per_token, 1),
+            "echo": request.echo_prompt,
+            "n": request.num_completions,
+        }
     def make_request(self, request: Request) -> RequestResult:
-        raw_request = convert_to_raw_chat_request(request)
+        raw_request = self.convert_to_raw_chat_request(request)
         cache_key = CachingClient.make_cache_key(raw_request, request)
         def do_it() -> Dict[Any, Any]:
@@ -363,3 +386,86 @@ class TogetherChatClient(CachingClient):
             completions=generated_outputs,
             embedding=[],
         )
+class TogetherRawCompletionRequest(TypedDict):
+    prompt: str
+    model: str
+    max_tokens: int
+    stop: List[str]
+    temperature: float
+    top_p: float
+    top_k: int
+    logprobs: int
+    echo: bool
+    n: int
+class TogetherCompletionClient(CachingClient):
+    """Client that uses the Python Together library for text completion models."""
+    def __init__(self, cache_config: CacheConfig, api_key: Optional[str], together_model: Optional[str] = None):
+        super().__init__(cache_config=cache_config)
+        self._client = Together(api_key=api_key)
+        self._together_model = together_model
+    def convert_to_raw_completion_request(self, request: Request) -> TogetherRawCompletionRequest:
+        if self._together_model is not None:
+            model = self._together_model
+        else:
+            model = request.model
+        return {
+            "prompt": request.prompt,
+            "model": model,
+            "max_tokens": request.max_tokens,
+            "stop": request.stop_sequences + get_default_stop_tokens_for_model(model, self._client),
+            "temperature": request.temperature,
+            "top_p": request.top_p,
+            "top_k": request.top_k_per_token,
+            "logprobs": min(request.top_k_per_token, 1),
+            "echo": request.echo_prompt,
+            "n": request.num_completions,
+        }
+    def make_request(self, request: Request) -> RequestResult:
+        raw_request = self.convert_to_raw_completion_request(request)
+        cache_key = CachingClient.make_cache_key(raw_request, request)
+        def do_it() -> Dict[Any, Any]:
+            response = self._client.completions.create(**raw_request)
+            return response.model_dump(mode="json")
+        try:
+            raw_response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
+            response = CompletionResponse.model_validate(raw_response)
+        except Exception as error:
+            return RequestResult(
+                success=False,
+                cached=False,
+                error=str(error),
+                completions=[],
+                embedding=[],
+            )
+        generated_outputs: List[GeneratedOutput] = []
+        for choice in response.choices:
+            # NOTE: Together always returns None for choice.finish_reason
+            # NOTE: Together does not return logprobs for the whole generated output, only for individual tokens
+            tokens: List[Token] = []
+            if choice.logprobs:
+                for token_text, token_logprob in zip_longest(
+                    choice.logprobs.tokens or [], choice.logprobs.token_logprobs or []
+                ):
+                    if token_text is None:
+                        break
+                    tokens.append(Token(text=token_text, logprob=token_logprob or 0.0))
+            assert choice.text
+            generated_outputs.append(GeneratedOutput(text=choice.text, logprob=0.0, tokens=tokens))
+        return RequestResult(
+            success=True,
+            cached=cached,
+            request_time=raw_response["request_time"],
+            request_datetime=raw_response["request_datetime"],
+            completions=generated_outputs,
+            embedding=[],
+        )

helm/clients/vertexai_client.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import requests
 from abc import ABC, abstractmethod
 from threading import Lock
-from typing import Any, Dict, Optional, List, Union
+from typing import Any, Dict, Mapping, Optional, List, Union
 from helm.common.cache import CacheConfig
 from helm.common.media_object import TEXT_TYPE
@@ -26,22 +26,62 @@ class VertexAIContentBlockedError(Exception):
     pass
+class SafetySettingPresets:
+    BLOCK_NONE = "block_none"  # Disable all blocking
+    DEFAULT = "default"  # Use default safety settings
+def _get_safety_settings_for_preset(
+    safety_settings_preset: Optional[str],
+) -> Optional[Dict[HarmCategory, SafetySetting.HarmBlockThreshold]]:
+    """Get the safety settings for the safety_settings_preset.
+    If safety_settings_preset is None, use the default value of BLOCK_NONE (*not* DEFAULT)."""
+    if safety_settings_preset is None or safety_settings_preset == SafetySettingPresets.BLOCK_NONE:
+        return {
+            harm_category: SafetySetting.HarmBlockThreshold(SafetySetting.HarmBlockThreshold.BLOCK_NONE)
+            for harm_category in iter(HarmCategory)
+        }
+    elif safety_settings_preset == SafetySettingPresets.DEFAULT:
+        return None
+    else:
+        raise ValueError(f"Unknown safety_settings_preset: {safety_settings_preset}")
+def _get_model_name_for_request(request: Request) -> str:
+    # We have to strip "-safety-" suffixes from model names because they are not part of the Vertex AI model name
+    # TODO: Clean up this hack
+    return request.model_engine.split("-safety-")[0]
 class VertexAIClient(CachingClient, ABC):
     """Client for Vertex AI models"""
-    def __init__(self, cache_config: CacheConfig, project_id: str, location: str) -> None:
+    def __init__(
+        self, cache_config: CacheConfig, project_id: str, location: str, safety_settings_preset: Optional[str] = None
+    ) -> None:
         super().__init__(cache_config=cache_config)
         self.project_id = project_id
         self.location = location
-        # VertexAI's default safety filter is overly sensitive, so we disable it.
-        self.safety_settings: Dict[HarmCategory, SafetySetting.HarmBlockThreshold] = {
-            harm_category: SafetySetting.HarmBlockThreshold(SafetySetting.HarmBlockThreshold.BLOCK_NONE)
-            for harm_category in iter(HarmCategory)
-        }
+        self.safety_settings_preset = safety_settings_preset
+        self.safety_settings = _get_safety_settings_for_preset(safety_settings_preset)
         vertexai.init(project=self.project_id, location=self.location)
+    def make_cache_key_with_safety_settings_preset(self, raw_request: Mapping, request: Request) -> Mapping:
+        """Construct the key for the cache using the raw request.
+        Add `self.safety_settings_preset` to the key, if not None."""
+        if self.safety_settings_preset is not None:
+            assert "safety_settings_preset" not in raw_request
+            return {
+                **CachingClient.make_cache_key(raw_request, request),
+                "safety_settings_preset": self.safety_settings_preset,
+            }
+        else:
+            return CachingClient.make_cache_key(raw_request, request)
     @abstractmethod
     def make_request(self, request: Request) -> RequestResult:
         raise NotImplementedError
@@ -71,7 +111,7 @@ class VertexAITextClient(VertexAIClient):
         }
         completions: List[GeneratedOutput] = []
-        model_name: str = request.model_engine
+        model_name: str = _get_model_name_for_request(request)
         try:
@@ -87,9 +127,9 @@ class VertexAITextClient(VertexAIClient):
             # We need to include the engine's name to differentiate among requests made for different model
             # engines since the engine name is not included in the request itself.
             # Same for the prompt.
-            cache_key = CachingClient.make_cache_key(
+            cache_key = self.make_cache_key_with_safety_settings_preset(
                 {
-                    "engine": request.model_engine,
+                    "engine": model_name,
                     "prompt": request.prompt,
                     **parameters,
                 },
@@ -177,7 +217,7 @@ class VertexAIChatClient(VertexAIClient):
         }
         completions: List[GeneratedOutput] = []
-        model_name: str = request.model_engine
+        model_name: str = _get_model_name_for_request(request)
         model = self.get_model(model_name)
         try:
@@ -197,7 +237,7 @@ class VertexAIChatClient(VertexAIClient):
                 # Depending on the version of the Vertex AI library and the type of prompt blocking,
                 # prompt blocking can show up in many ways, so this defensively handles most of these ways
-                if response.prompt_feedback.block_reason:
+                if response.prompt_feedback and response.prompt_feedback.block_reason:
                     raise VertexAIContentBlockedError(
                         f"Prompt blocked with reason: {response.prompt_feedback.block_reason}"
                     )
@@ -209,8 +249,10 @@ class VertexAIChatClient(VertexAIClient):
                     # content blocking can show up in many ways, so this defensively handles most of these ways
                     if candidate.finish_reason in VertexAIChatClient.CONTENT_BLOCKED_FINISH_REASONS:
                         raise VertexAIContentBlockedError(f"Content blocked with reason: {candidate.finish_reason}")
+                    if not candidate.content:
+                        raise VertexAIContentBlockedError(f"No content in candidate: {candidate}")
                     if not candidate.content.parts:
-                        raise VertexAIContentBlockedError(f"No parts in candidate: {candidate}")
+                        raise VertexAIContentBlockedError(f"No content parts in candidate: {candidate}")
                     predictions.append({"text": candidate.content.text})
                     # TODO: Extract more information from the response
                 return {"predictions": predictions}
@@ -218,7 +260,7 @@ class VertexAIChatClient(VertexAIClient):
             # We need to include the engine's name to differentiate among requests made for different model
             # engines since the engine name is not included in the request itself.
             # Same for the prompt.
-            cache_key = CachingClient.make_cache_key(
+            cache_key = self.make_cache_key_with_safety_settings_preset(
                 {
                     "model_name": model_name,
                     "prompt": request.prompt,
@@ -313,7 +355,7 @@ class VertexAIChatClient(VertexAIClient):
         }
         completions: List[GeneratedOutput] = []
-        model_name: str = request.model_engine
+        model_name: str = _get_model_name_for_request(request)
         model = self.get_model(model_name)
         request_time = 0
@@ -330,7 +372,7 @@ class VertexAIChatClient(VertexAIClient):
                     )
                     # Depending on the version of the Vertex AI library and the type of prompt blocking,
                     # prompt blocking can show up in many ways, so this defensively handles most of these ways
-                    if response.prompt_feedback.block_reason:
+                    if response.prompt_feedback and response.prompt_feedback.block_reason:
                         raise VertexAIContentBlockedError(
                             f"Prompt blocked with reason: {response.prompt_feedback.block_reason}"
                         )
@@ -345,15 +387,17 @@ class VertexAIChatClient(VertexAIClient):
                     # content blocking can show up in many ways, so this defensively handles most of these ways
                     if candidate.finish_reason in VertexAIChatClient.CONTENT_BLOCKED_FINISH_REASONS:
                         raise VertexAIContentBlockedError(f"Content blocked with reason: {candidate.finish_reason}")
+                    if not candidate.content:
+                        raise VertexAIContentBlockedError(f"No content in candidate: {candidate}")
                     if not candidate.content.parts:
-                        raise VertexAIContentBlockedError(f"No parts in candidate: {candidate}")
+                        raise VertexAIContentBlockedError(f"No content parts in candidate: {candidate}")
                     return {"predictions": [{"text": candidate.text}]}
                 raw_cache_key = {"model_name": model_name, "prompt": prompt_key, **parameters}
                 if completion_index > 0:
                     raw_cache_key["completion_index"] = completion_index
-                cache_key = CachingClient.make_cache_key(raw_cache_key, request)
+                cache_key = self.make_cache_key_with_safety_settings_preset(raw_cache_key, request)
                 response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
             except requests.exceptions.RequestException as e:
                 error: str = f"Gemini Vision error: {e}"

helm/clients/vision_language/huggingface_vlm_client.py CHANGED Viewed

@@ -38,6 +38,7 @@ class HuggingFaceVLMClient(CachingClient):
         "huggingface/llava-v1.6-vicuna-13b-hf": "llava-hf/llava-v1.6-vicuna-13b-hf",
         "huggingface/llava-v1.6-mistral-7b-hf": "llava-hf/llava-v1.6-mistral-7b-hf",
         "huggingface/llava-v1.6-34b-hf": "llava-hf/llava-v1.6-34b-hf",
+        "huggingface/prometheus-vision-13b-v1.0-hf": "PahaII/prometheus-vision-13b-v1.0-hf",
     }
     def __init__(self, tokenizer: Tokenizer, tokenizer_name: str, cache_config: CacheConfig):

helm/clients/vision_language/paligemma_client.py ADDED Viewed

@@ -0,0 +1,146 @@
+from threading import Lock
+from typing import Any, Dict, List, Optional
+import torch
+from dataclasses import dataclass
+from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
+from helm.common.cache import CacheConfig
+from helm.common.images_utils import open_image
+from helm.common.gpu_utils import get_torch_device_name
+from helm.common.hierarchical_logger import hlog, htrack_block
+from helm.common.media_object import TEXT_TYPE
+from helm.common.optional_dependencies import handle_module_not_found_error
+from helm.common.request import Request, RequestResult, GeneratedOutput, Token
+from helm.common.tokenization_request import TokenizationRequest
+from helm.common.request import wrap_request_time
+from helm.clients.client import CachingClient, generate_uid_for_multimodal_prompt
+from helm.tokenizers.tokenizer import Tokenizer
+try:
+    from PIL import Image
+except ModuleNotFoundError as e:
+    handle_module_not_found_error(e, ["images"])
+# Added to solve: cutlassF: no kernel found to launch!
+torch.backends.cuda.enable_mem_efficient_sdp(False)
+torch.backends.cuda.enable_flash_sdp(False)
+@dataclass(frozen=True)
+class LoadedPaliGemmaForConditionalGeneration:
+    """Loaded model and processor for PaliGemma."""
+    model: PaliGemmaForConditionalGeneration
+    processor: AutoProcessor
+_models_lock: Lock = Lock()
+_models: Dict[str, Optional[LoadedPaliGemmaForConditionalGeneration]] = {}
+class PaliGemmaClient(CachingClient):
+    """
+    PaliGemma is a versatile and lightweight vision-language model (VLM) inspired by PaLI-3
+    and based on open components such as the SigLIP vision model and the Gemma language model.
+    It takes both image and text as input and generates text as output, supporting multiple languages.
+    It is designed for class-leading fine-tune performance on a wide range of vision-language tasks
+    such as image and short video caption, visual question answering, text reading, object detection
+    and object segmentation.
+    """
+    def __init__(self, tokenizer: Tokenizer, tokenizer_name: str, cache_config: CacheConfig):
+        super().__init__(cache_config=cache_config)
+        self.tokenizer = tokenizer
+        self.tokenizer_name = tokenizer_name
+        self._device: str = get_torch_device_name()
+    def _get_model(self, checkpoint: str) -> LoadedPaliGemmaForConditionalGeneration:
+        global _models_lock
+        global _models
+        # Ensure that only one thread is loading the model at a time
+        with _models_lock:
+            if checkpoint not in _models or _models[checkpoint] is None:
+                hlog(f"Loading model {checkpoint} and caching in memory...")
+                model = PaliGemmaForConditionalGeneration.from_pretrained(
+                    checkpoint, torch_dtype=torch.bfloat16, device_map="auto"
+                ).eval()
+                processor = AutoProcessor.from_pretrained(checkpoint)
+                _models[checkpoint] = LoadedPaliGemmaForConditionalGeneration(model, processor)
+            loaded_model_processor = _models[checkpoint]
+        assert loaded_model_processor is not None
+        return loaded_model_processor
+    def make_request(self, request: Request) -> RequestResult:
+        assert request.multimodal_prompt is not None, "Multimodal prompt is required"
+        loaded_model_processor: LoadedPaliGemmaForConditionalGeneration = self._get_model(request.model_deployment)
+        model = loaded_model_processor.model
+        processor = loaded_model_processor.processor
+        generation_args = {"max_new_tokens": request.max_tokens}
+        images: List[Image.Image] = []
+        prompt_pieces: List[str] = []
+        for media_object in request.multimodal_prompt.media_objects:
+            if media_object.is_type("image") and media_object.location:
+                images += [open_image(media_object.location).convert("RGB")]
+            elif media_object.is_type(TEXT_TYPE):
+                if media_object.text is None:
+                    raise ValueError("MediaObject of text type has missing text field value")
+                prompt_pieces.append(media_object.text)
+            else:
+                raise ValueError(f"Unrecognized MediaObject type {media_object.type}")
+        prompt_text: str = "\n".join(prompt_pieces)
+        model_inputs = processor(text=prompt_text, images=images, return_tensors="pt").to(self._device)
+        input_len = model_inputs["input_ids"].shape[-1]
+        completions: List[GeneratedOutput] = []
+        with htrack_block(f"Generating for prompt: {prompt_text}"):
+            try:
+                concat_results = []
+                for i_completion in range(request.num_completions):
+                    def do_it() -> Dict[str, Any]:
+                        with torch.inference_mode():
+                            generation = model.generate(
+                                **model_inputs, max_new_tokens=request.max_tokens, do_sample=False
+                            )[0]
+                            if not request.echo_prompt:
+                                generation = generation[input_len:]
+                            decoded = processor.decode(generation, skip_special_tokens=True)
+                            return {"output": decoded}
+                    # Include the prompt and model name in the cache key
+                    cache_key = CachingClient.make_cache_key(
+                        raw_request={
+                            "n": request.num_completions,
+                            "i": i_completion,
+                            "model": request.model,
+                            "prompt": generate_uid_for_multimodal_prompt(request.multimodal_prompt),
+                            **generation_args,
+                        },
+                        request=request,
+                    )
+                    result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
+                    concat_results.append(result)
+            except RuntimeError as model_error:
+                return RequestResult(success=False, cached=False, error=str(model_error), completions=[], embedding=[])
+            for result in concat_results:
+                text = result["output"]
+                hlog(f"Generated text: {text}")
+                tokenization_result = self.tokenizer.tokenize(
+                    TokenizationRequest(text, tokenizer=self.tokenizer_name, encode=False)
+                )
+                tokens: List[Token] = [Token(text=str(text), logprob=0) for text in tokenization_result.raw_tokens]
+                completions.append(GeneratedOutput(text=text, logprob=0, tokens=tokens))
+        return RequestResult(
+            success=True,
+            cached=cached,
+            request_time=result["request_time"],
+            completions=completions,
+            embedding=[],
+        )

crfm-helm 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl

Potentially problematic release.

crfm-helm 0.5.1py3-none-any.whl → 0.5.2py3-none-any.whl