PyPI - crfm-helm - Versions diffs - 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

crfm-helm 0.5.1py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (98) hide show

{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/METADATA +13 -3
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/RECORD +96 -63
helm/benchmark/adaptation/adapter_spec.py +32 -31
helm/benchmark/annotation/air_bench_annotator.py +64 -0
helm/benchmark/annotation/annotator_factory.py +6 -0
helm/benchmark/annotation/live_qa_annotator.py +84 -0
helm/benchmark/annotation/medication_qa_annotator.py +81 -0
helm/benchmark/augmentations/translate_perturbation.py +1 -0
helm/benchmark/huggingface_registration.py +16 -6
helm/benchmark/metrics/air_bench_metrics.py +56 -0
helm/benchmark/metrics/fin_qa_metrics.py +60 -0
helm/benchmark/metrics/fin_qa_metrics_helper.py +398 -0
helm/benchmark/metrics/gpt4v_originality_critique_metrics.py +126 -0
helm/benchmark/metrics/instruction_following_critique_metrics.py +1 -0
helm/benchmark/metrics/live_qa_metrics.py +23 -0
helm/benchmark/metrics/medication_qa_metrics.py +23 -0
helm/benchmark/metrics/prometheus_vision_critique_metrics.py +185 -0
helm/benchmark/metrics/reka_vibe_critique_metrics.py +158 -0
helm/benchmark/metrics/unitxt_metrics.py +20 -10
helm/benchmark/metrics/vision_language/emd_utils.py +4 -0
helm/benchmark/metrics/vision_language/image_metrics.py +29 -71
helm/benchmark/presentation/schema.py +54 -4
helm/benchmark/presentation/test_schema.py +11 -0
helm/benchmark/run.py +16 -2
helm/benchmark/run_expander.py +77 -0
helm/benchmark/run_spec_factory.py +4 -0
helm/benchmark/run_specs/air_bench_run_specs.py +40 -0
helm/benchmark/run_specs/classic_run_specs.py +15 -11
helm/benchmark/run_specs/decodingtrust_run_specs.py +3 -1
helm/benchmark/run_specs/experimental_run_specs.py +33 -0
helm/benchmark/run_specs/finance_run_specs.py +33 -0
helm/benchmark/run_specs/vlm_run_specs.py +168 -45
helm/benchmark/scenarios/air_bench_scenario.py +50 -0
helm/benchmark/scenarios/ci_mcqa_scenario.py +80 -0
helm/benchmark/scenarios/entity_data_imputation_scenario.py +8 -2
helm/benchmark/scenarios/fin_qa_scenario.py +117 -0
helm/benchmark/scenarios/test_air_bench_scenario.py +27 -0
helm/benchmark/scenarios/vision_language/bingo_scenario.py +3 -3
helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +13 -2
helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +1 -5
helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +0 -4
helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +4 -2
helm/benchmark/scenarios/vision_language/pairs_scenario.py +6 -5
helm/benchmark/scenarios/vision_language/unicorn_scenario.py +3 -3
helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +95 -0
helm/benchmark/static/schema_air_bench.yaml +3149 -0
helm/benchmark/static/schema_classic.yaml +3 -59
helm/benchmark/static/schema_finance.yaml +143 -0
helm/benchmark/static/schema_image2structure.yaml +254 -111
helm/benchmark/static/schema_instruction_following.yaml +3 -52
helm/benchmark/static/schema_lite.yaml +3 -61
helm/benchmark/static/schema_medical.yaml +255 -0
helm/benchmark/static/schema_mmlu.yaml +3 -61
helm/benchmark/static/schema_tables.yaml +200 -0
helm/benchmark/static/schema_thai.yaml +223 -0
helm/benchmark/static/schema_unitxt.yaml +3 -61
helm/benchmark/static/{schema_vlm.yaml → schema_vhelm.yaml} +294 -293
helm/benchmark/static/schema_vhelm_lite.yaml +4 -59
helm/benchmark/static_build/assets/air-overview-d2e6c49f.png +0 -0
helm/benchmark/static_build/assets/index-30dbceba.js +10 -0
helm/benchmark/static_build/assets/index-66b02d40.css +1 -0
helm/benchmark/static_build/assets/overview-74aea3d8.png +0 -0
helm/benchmark/static_build/assets/process-flow-bd2eba96.png +0 -0
helm/benchmark/static_build/index.html +2 -2
helm/clients/anthropic_client.py +43 -9
helm/clients/auto_client.py +11 -0
helm/clients/client.py +24 -7
helm/clients/cohere_client.py +98 -3
helm/clients/huggingface_client.py +71 -12
helm/clients/openai_client.py +9 -2
helm/clients/reka_client.py +189 -0
helm/clients/test_client.py +3 -3
helm/clients/test_huggingface_client.py +19 -3
helm/clients/test_together_client.py +72 -2
helm/clients/together_client.py +129 -23
helm/clients/vertexai_client.py +62 -18
helm/clients/vision_language/huggingface_vlm_client.py +1 -0
helm/clients/vision_language/paligemma_client.py +146 -0
helm/clients/vision_language/palmyra_vision_client.py +84 -0
helm/clients/yi_client.py +31 -0
helm/common/critique_request.py +10 -1
helm/common/images_utils.py +19 -0
helm/config/model_deployments.yaml +412 -18
helm/config/model_metadata.yaml +447 -25
helm/config/tokenizer_configs.yaml +93 -1
helm/proxy/critique/model_critique_client.py +32 -4
helm/proxy/services/server_service.py +1 -1
helm/tokenizers/auto_tokenizer.py +1 -1
helm/tokenizers/cohere_tokenizer.py +44 -2
helm/tokenizers/huggingface_tokenizer.py +36 -13
helm/tokenizers/test_cohere_tokenizer.py +39 -0
helm/tokenizers/test_huggingface_tokenizer.py +5 -1
helm/benchmark/static_build/assets/index-737eef9e.js +0 -10
helm/benchmark/static_build/assets/index-878a1094.css +0 -1
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/LICENSE +0 -0
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/WHEEL +0 -0
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/entry_points.txt +0 -0
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.2.dist-info}/top_level.txt +0 -0

helm/benchmark/static_build/assets/overview-74aea3d8.png ADDED Viewed

Binary file

helm/benchmark/static_build/assets/process-flow-bd2eba96.png ADDED Viewed

Binary file

helm/benchmark/static_build/index.html CHANGED Viewed

@@ -7,11 +7,11 @@
     <title>Holistic Evaluation of Language Models (HELM)</title>
     <meta name="description" content="The Holistic Evaluation of Language Models (HELM) serves as a living benchmark for transparency in language models. Providing broad coverage and recognizing incompleteness, multi-metric measurements, and standardization. All data and analysis are freely accessible on the website for exploration and study." />
     <script type="text/javascript" src="./config.js"></script>
-    <script type="module" crossorigin src="./assets/index-737eef9e.js"></script>
+    <script type="module" crossorigin src="./assets/index-30dbceba.js"></script>
     <link rel="modulepreload" crossorigin href="./assets/react-d4a0b69b.js">
     <link rel="modulepreload" crossorigin href="./assets/recharts-6d337683.js">
     <link rel="modulepreload" crossorigin href="./assets/tremor-54a99cc4.js">
-    <link rel="stylesheet" href="./assets/index-878a1094.css">
+    <link rel="stylesheet" href="./assets/index-66b02d40.css">
   </head>
   <body class="block">
     <div id="root"></div>

helm/clients/anthropic_client.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import Any, Dict, List, Optional, TypedDict, Union, cast
 import json
+import os
 import requests
 import tempfile
 import time
@@ -244,6 +245,8 @@ class AnthropicMessagesClient(CachingClient):
     # Source: https://docs.anthropic.com/claude/docs/models-overview
     MAX_OUTPUT_TOKENS: int = 4096
+    MAX_IMAGE_SIZE_BYTES: int = 5242880  # 5MB
     def __init__(
         self, tokenizer: Tokenizer, tokenizer_name: str, cache_config: CacheConfig, api_key: Optional[str] = None
     ):
@@ -286,7 +289,12 @@ class AnthropicMessagesClient(CachingClient):
                     if not media_object.location:
                         raise Exception("MediaObject of image type has missing location field value")
-                    from helm.common.images_utils import encode_base64, get_dimensions, copy_image
+                    from helm.common.images_utils import (
+                        encode_base64,
+                        get_dimensions,
+                        copy_image,
+                        resize_image_to_max_file_size,
+                    )
                     image_location: str = media_object.location
                     base64_image: str
@@ -310,6 +318,21 @@ class AnthropicMessagesClient(CachingClient):
                                 height=min(image_height, AnthropicClient.MAX_IMAGE_DIMENSION),
                             )
                             base64_image = encode_base64(temp_file.name, format="JPEG")
+                    elif os.path.getsize(image_location) > AnthropicMessagesClient.MAX_IMAGE_SIZE_BYTES:
+                        hlog(
+                            f"WARNING: Image {image_location} exceeds max allowed size: "
+                            f"{AnthropicMessagesClient.MAX_IMAGE_SIZE_BYTES} bytes"
+                        )
+                        # Resize the image so it is smaller than the max allowed size
+                        with tempfile.NamedTemporaryFile(suffix=".jpg") as temp_file:
+                            hlog(f"Resizing image to temporary path: {temp_file.name}")
+                            resize_image_to_max_file_size(
+                                src=image_location,
+                                dest=temp_file.name,
+                                max_size_in_bytes=AnthropicMessagesClient.MAX_IMAGE_SIZE_BYTES,
+                            )
+                            base64_image = encode_base64(temp_file.name, format="JPEG")
                     else:
                         base64_image = encode_base64(image_location, format="JPEG")
@@ -368,14 +391,25 @@ class AnthropicMessagesClient(CachingClient):
                         return response
                     raise
-            cache_key = CachingClient.make_cache_key(
-                {
-                    "completion_index": completion_index,
-                    **raw_request,
-                },
-                request,
-            )
-            response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
+            try:
+                cache_key = CachingClient.make_cache_key(
+                    {
+                        "completion_index": completion_index,
+                        **raw_request,
+                    },
+                    request,
+                )
+                response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
+            except AnthropicMessagesResponseError:
+                hlog("WARNING: Response has empty content")
+                return RequestResult(
+                    success=False,
+                    cached=False,
+                    error="Anthropic response has empty content",
+                    completions=[],
+                    embedding=[],
+                    error_flags=ErrorFlags(is_retriable=False, is_fatal=False),
+                )
             if _is_content_moderation_failure(response):
                 hlog(

helm/clients/auto_client.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Any, Dict, Mapping, Optional
 from retrying import Attempt, RetryError
 from helm.benchmark.model_deployment_registry import ModelDeployment, get_model_deployment
+from helm.benchmark.tokenizer_config_registry import get_tokenizer_config
 from helm.common.file_caches.file_cache import FileCache
 from helm.common.file_caches.local_file_cache import LocalFileCache
 from helm.common.credentials_utils import provide_api_key
@@ -88,6 +89,10 @@ class AutoClient(Client):
                     "location": lambda: self.credentials.get(host_organization + "Location", None),  # VertexAI
                     "hf_auth_token": lambda: self.credentials.get("huggingfaceAuthToken", None),  # HuggingFace
                     "file_cache": lambda: self._get_file_cache(host_organization),  # Text-to-image models
+                    "endpoint": lambda: self.credentials.get(host_organization + "Endpoint", None),  # Palmyra
+                    "end_of_text_token": lambda: self._get_end_of_text_token(
+                        tokenizer_name=model_deployment.tokenizer_name or model_deployment.name
+                    ),
                 },
             )
             client = create_object(client_spec)
@@ -213,3 +218,9 @@ class AutoClient(Client):
         # Initialize `FileCache` for text-to-image model APIs
         local_file_cache_path: str = os.path.join(self.file_storage_path, "output", host_organization)
         return LocalFileCache(local_file_cache_path, file_extension="png")
+    def _get_end_of_text_token(self, tokenizer_name: str) -> Optional[str]:
+        tokenizer_config = get_tokenizer_config(tokenizer_name)
+        if tokenizer_config is None:
+            raise ValueError(f"Could not find tokenizer_config for tokenizer {tokenizer_name}")
+        return tokenizer_config.end_of_text_token

helm/clients/client.py CHANGED Viewed

@@ -39,13 +39,17 @@ class CachingClient(Client):
         """
         if request.random is not None:
             assert "random" not in raw_request
-            cache_key: Mapping = {**raw_request, "random": request.random}
+            return {**raw_request, "random": request.random}
         else:
-            cache_key = raw_request
-        return cache_key
+            return {**raw_request}
-def truncate_sequence(sequence: GeneratedOutput, request: Request, print_warning: bool = True) -> GeneratedOutput:
+def truncate_sequence(
+    sequence: GeneratedOutput,
+    request: Request,
+    end_of_text_token: Optional[str] = None,
+    print_warning: bool = True,
+) -> GeneratedOutput:
     """
     Certain providers have bugs where they aren't respecting max_tokens,
     stop_sequences and the end of text token, so as a hack, we have to manually
@@ -64,7 +68,11 @@ def truncate_sequence(sequence: GeneratedOutput, request: Request, print_warning
             hlog("WARNING: don't know how to handle echo_prompt and max_tokens > 0, not truncating")
         return sequence
-    for stop in request.stop_sequences:
+    if end_of_text_token:
+        stop_sequences = request.stop_sequences + [end_of_text_token]
+    else:
+        stop_sequences = request.stop_sequences
+    for stop in stop_sequences:
         # Find `stop` in the text
         try:
             new_text = sequence.text[: sequence.text.index(stop)]
@@ -116,7 +124,12 @@ def truncate_sequence(sequence: GeneratedOutput, request: Request, print_warning
 def truncate_and_tokenize_response_text(
-    text: str, request: Request, tokenizer: Tokenizer, tokenizer_name: str, original_finish_reason: str = "endoftext"
+    text: str,
+    request: Request,
+    tokenizer: Tokenizer,
+    tokenizer_name: str,
+    end_of_text_token: Optional[str] = None,
+    original_finish_reason: str = "endoftext",
 ) -> GeneratedOutput:
     """Truncate a string-only response to respect stop_sequences and max_tokens.
@@ -139,7 +152,11 @@ def truncate_and_tokenize_response_text(
     if request.echo_prompt:
         raise Exception("truncate_and_tokenize_response_text() does not support requests with echo_prompt = True")
-    for stop_sequence in request.stop_sequences:
+    if end_of_text_token:
+        stop_sequences = request.stop_sequences + [end_of_text_token]
+    else:
+        stop_sequences = request.stop_sequences
+    for stop_sequence in stop_sequences:
         try:
             text = text[: text.index(stop_sequence)]
             finish_reason = "stop"

helm/clients/cohere_client.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import json
 import requests
-from typing import List
+from typing import List, Optional, Sequence, TypedDict
 from helm.common.cache import CacheConfig
+from helm.common.optional_dependencies import handle_module_not_found_error
 from helm.common.request import (
     wrap_request_time,
     EMBEDDING_UNAVAILABLE_REQUEST_RESULT,
@@ -11,8 +12,13 @@ from helm.common.request import (
     GeneratedOutput,
     Token,
 )
-from .client import CachingClient, truncate_sequence
-from .cohere_utils import get_cohere_url, DEFAULT_COHERE_API_VERSION
+from helm.clients.client import CachingClient, truncate_sequence
+from helm.clients.cohere_utils import get_cohere_url, DEFAULT_COHERE_API_VERSION
+try:
+    import cohere
+except ModuleNotFoundError as e:
+    handle_module_not_found_error(e, ["cohere"])
 class CohereClient(CachingClient):
@@ -152,3 +158,92 @@ class CohereClient(CachingClient):
             completions=completions,
             embedding=[],
         )
+class CohereRawChatRequest(TypedDict):
+    message: str
+    model: Optional[str]
+    preamble: Optional[str]
+    chat_history: Optional[Sequence[cohere.ChatMessage]]
+    temperature: Optional[float]
+    max_tokens: Optional[int]
+    k: Optional[int]
+    p: Optional[float]
+    seed: Optional[float]
+    stop_sequences: Optional[Sequence[str]]
+    frequency_penalty: Optional[float]
+    presence_penalty: Optional[float]
+def convert_to_raw_chat_request(request: Request) -> CohereRawChatRequest:
+    # TODO: Support chat
+    model = request.model.replace("cohere/", "")
+    return {
+        "message": request.prompt,
+        "model": model,
+        "preamble": None,
+        "chat_history": None,
+        "temperature": request.temperature,
+        "max_tokens": request.max_tokens,
+        "k": request.top_k_per_token,
+        "p": request.top_p,
+        "stop_sequences": request.stop_sequences,
+        "seed": float(request.random) if request.random is not None else None,
+        "frequency_penalty": request.frequency_penalty,
+        "presence_penalty": request.presence_penalty,
+    }
+class CohereChatClient(CachingClient):
+    """
+    Leverages the chat endpoint: https://docs.cohere.com/reference/chat
+    Cohere models will only support chat soon: https://docs.cohere.com/docs/migrating-from-cogenerate-to-cochat
+    """
+    def __init__(self, api_key: str, cache_config: CacheConfig):
+        super().__init__(cache_config=cache_config)
+        self.client = cohere.Client(api_key=api_key)
+    def make_request(self, request: Request) -> RequestResult:
+        if request.embedding:
+            return EMBEDDING_UNAVAILABLE_REQUEST_RESULT
+        # TODO: Support multiple completions
+        assert request.num_completions == 1, "CohereChatClient only supports num_completions=1"
+        # TODO: Support messages
+        assert not request.messages, "CohereChatClient currently does not support the messages API"
+        raw_request: CohereRawChatRequest = convert_to_raw_chat_request(request)
+        try:
+            def do_it():
+                """
+                Send the request to the Cohere Chat API. Responses will be structured like this:
+                cohere.Chat {
+                    message: What's up?
+                    text: Hey there! How's it going? I'm doing well, thank you for asking 😊.
+                    ...
+                }
+                """
+                raw_response = self.client.chat(**raw_request).dict()
+                assert "text" in raw_response, f"Response does not contain text: {raw_response}"
+                return raw_response
+            response, cached = self.cache.get(raw_request, wrap_request_time(do_it))
+        except (requests.exceptions.RequestException, AssertionError) as e:
+            error: str = f"CohereClient error: {e}"
+            return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
+        completions: List[GeneratedOutput] = []
+        completion: GeneratedOutput = GeneratedOutput(text=response["text"], logprob=0.0, tokens=[])
+        completions.append(completion)
+        return RequestResult(
+            success=True,
+            cached=cached,
+            request_time=response["request_time"],
+            request_datetime=response["request_datetime"],
+            completions=completions,
+            embedding=[],
+        )

helm/clients/huggingface_client.py CHANGED Viewed

@@ -17,6 +17,7 @@ from helm.common.request import (
     GeneratedOutput,
     Token,
 )
+from helm.tokenizers.tokenizer import Tokenizer
 from .client import CachingClient, truncate_sequence
 from helm.tokenizers.huggingface_tokenizer import HuggingFaceTokenizer, WrappedPreTrainedTokenizer
 from threading import Lock
@@ -53,7 +54,13 @@ class HuggingFaceRequest(TypedDict):
 class HuggingFaceServer:
     """A thin wrapper around a Hugging Face AutoModelForCausalLM for HuggingFaceClient to call."""
-    def __init__(self, pretrained_model_name_or_path: str, **kwargs):
+    def __init__(
+        self,
+        pretrained_model_name_or_path: str,
+        wrapped_tokenizer: WrappedPreTrainedTokenizer,
+        openvino=False,
+        **kwargs,
+    ):
         if torch.cuda.is_available():
             hlog("CUDA is available, initializing with a GPU...")
             self.device: str = "cuda:0"
@@ -61,13 +68,44 @@ class HuggingFaceServer:
             self.device = "cpu"
         with htrack_block(f"Loading Hugging Face model {pretrained_model_name_or_path}"):
             # WARNING this may fail if your GPU does not have enough memory
-            self.model = AutoModelForCausalLM.from_pretrained(
-                pretrained_model_name_or_path, trust_remote_code=True, **kwargs
-            ).to(self.device)
-        with htrack_block(f"Loading Hugging Face tokenizer for model {pretrained_model_name_or_path}"):
-            self.wrapped_tokenizer: WrappedPreTrainedTokenizer = HuggingFaceTokenizer.create_tokenizer(
-                pretrained_model_name_or_path, **kwargs
-            )
+            if openvino:
+                """
+                Optimum Intel provides a simple interface to optimize Transformer models and convert them to \
+                OpenVINO™ Intermediate Representation (IR) format to accelerate end-to-end pipelines on \
+                Intel® architectures using OpenVINO™ runtime.
+                """
+                from helm.common.optional_dependencies import handle_module_not_found_error
+                try:
+                    from optimum.intel.openvino import OVModelForCausalLM
+                except ModuleNotFoundError as e:
+                    handle_module_not_found_error(e, ["openvino"])
+                self.device = "cpu"
+                # Security issue: currently we trust remote code by default.
+                # We retain this temporarily to maintain reverse compatibility.
+                # TODO: Delete if-else and don't set trust_remote_code=True
+                if "trust_remote_code" in kwargs:
+                    self.model = OVModelForCausalLM.from_pretrained(
+                        pretrained_model_name_or_path, export=True, **kwargs
+                    ).to(self.device)
+                else:
+                    self.model = OVModelForCausalLM.from_pretrained(
+                        pretrained_model_name_or_path, export=True, trust_remote_code=True, **kwargs
+                    ).to(self.device)
+            else:
+                # Security issue: currently we trust remote code by default.
+                # We retain this temporarily to maintain reverse compatibility.
+                # TODO: Delete if-else and don't set trust_remote_code=True
+                if "trust_remote_code" in kwargs:
+                    self.model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path, **kwargs).to(
+                        self.device
+                    )
+                else:
+                    self.model = AutoModelForCausalLM.from_pretrained(
+                        pretrained_model_name_or_path, trust_remote_code=True, **kwargs
+                    ).to(self.device)
+        self.wrapped_tokenizer = wrapped_tokenizer
     def serve_request(self, raw_request: HuggingFaceRequest) -> Dict:
         with self.wrapped_tokenizer as tokenizer:
@@ -170,7 +208,12 @@ class HuggingFaceServerFactory:
     _servers_lock: Lock = Lock()
     @staticmethod
-    def get_server(helm_model_name: str, pretrained_model_name_or_path: str, **kwargs) -> Any:
+    def get_server(
+        helm_model_name: str,
+        pretrained_model_name_or_path: str,
+        wrapped_tokenizer: WrappedPreTrainedTokenizer,
+        **kwargs,
+    ) -> Any:
         """
         Checks if the desired HuggingFaceModel is cached. Creates the HuggingFaceModel if it's not cached.
         Returns the HuggingFaceModel.
@@ -182,7 +225,7 @@ class HuggingFaceServerFactory:
                     f"for HELM model {helm_model_name} with Hugging Face Transformers"
                 ):
                     HuggingFaceServerFactory._servers[helm_model_name] = HuggingFaceServer(
-                        pretrained_model_name_or_path, **kwargs
+                        pretrained_model_name_or_path, wrapped_tokenizer, **kwargs
                     )
         return HuggingFaceServerFactory._servers[helm_model_name]
@@ -214,10 +257,25 @@ def _process_huggingface_client_kwargs(raw_kwargs: Dict[str, Any]):
 class HuggingFaceClient(CachingClient):
-    def __init__(self, cache_config: CacheConfig, pretrained_model_name_or_path: Optional[str] = None, **kwargs):
+    def __init__(
+        self,
+        cache_config: CacheConfig,
+        tokenizer: Tokenizer,
+        pretrained_model_name_or_path: Optional[str] = None,
+        end_of_text_token: Optional[str] = None,
+        **kwargs,
+    ):
         super().__init__(cache_config=cache_config)
         self._pretrained_model_name_or_path = pretrained_model_name_or_path
+        if not isinstance(tokenizer, HuggingFaceTokenizer):
+            raise ValueError(
+                f"Tokenizer for Hugging Face model {pretrained_model_name_or_path} must be a HuggingFaceTokenizer, "
+                "but instead it is {tokenizer}"
+            )
+        self._wrapped_tokenizer: WrappedPreTrainedTokenizer = tokenizer.get_wrapped_tokenizer()
+        self._tokenizer = tokenizer
         self._kwargs = _process_huggingface_client_kwargs(kwargs)
+        self._end_of_text_token = end_of_text_token
     def make_request(self, request: Request) -> RequestResult:
         # Embedding not supported for this model
@@ -242,6 +300,7 @@ class HuggingFaceClient(CachingClient):
         huggingface_model: HuggingFaceServer = HuggingFaceServerFactory.get_server(
             helm_model_name=request.model,
             pretrained_model_name_or_path=pretrained_model_name_or_path,
+            wrapped_tokenizer=self._wrapped_tokenizer,
             **self._kwargs,
         )
@@ -284,7 +343,7 @@ class HuggingFaceClient(CachingClient):
                 sequence_logprob += logprob
             completion = GeneratedOutput(text=raw_completion["text"], logprob=sequence_logprob, tokens=tokens)
-            completion = truncate_sequence(completion, request)
+            completion = truncate_sequence(completion, request, end_of_text_token=self._end_of_text_token)
             completions.append(completion)
         return RequestResult(

helm/clients/openai_client.py CHANGED Viewed

@@ -60,8 +60,7 @@ class OpenAIClient(CachingClient):
     def _get_cache_key(self, raw_request: Dict, request: Request):
         cache_key = CachingClient.make_cache_key(raw_request, request)
-        if is_vlm(request.model):
-            assert request.multimodal_prompt is not None
+        if request.multimodal_prompt:
             prompt_key: str = generate_uid_for_multimodal_prompt(request.multimodal_prompt)
             cache_key = {**cache_key, "multimodal_prompt": prompt_key}
             del cache_key["messages"]
@@ -103,6 +102,14 @@ class OpenAIClient(CachingClient):
     def _make_chat_request(self, request: Request) -> RequestResult:
         messages: Optional[List[Dict[str, Union[str, Any]]]] = request.messages
+        if (
+            (request.prompt and request.messages)
+            or (request.prompt and request.multimodal_prompt)
+            or (request.messages and request.multimodal_prompt)
+        ):
+            raise ValueError(
+                f"More than one of `prompt`, `messages` and `multimodal_prompt` was set in request: {request}"
+            )
         if request.messages is not None:
             # Checks that all messages have a role and some content
             for message in request.messages:

helm/clients/reka_client.py ADDED Viewed

@@ -0,0 +1,189 @@
+# mypy: check_untyped_defs = False
+import requests
+from typing import Any, Dict, List, Optional, TypedDict
+from helm.proxy.retry import NonRetriableException
+from helm.common.cache import CacheConfig
+from helm.common.media_object import TEXT_TYPE
+from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput
+from helm.common.hierarchical_logger import hlog
+from helm.common.optional_dependencies import handle_module_not_found_error
+from helm.tokenizers.tokenizer import Tokenizer
+from .client import CachingClient, truncate_and_tokenize_response_text
+try:
+    import reka
+except ModuleNotFoundError as e:
+    handle_module_not_found_error(e, ["reka-api"])
+class RekaAIRequest(TypedDict):
+    """Data passed between make_request and _send_request. Used as the cache key."""
+    model_name: str
+    conversation_history: List[Dict[str, str]]
+    request_output_len: int
+    temperature: float
+    runtime_top_p: float
+    random_seed: Optional[int]
+    stop_words: Optional[List[str]]
+    presence_penalty: float
+    frequency_penalty: float
+class RekaClient(CachingClient):
+    REKA_CHAT_ROLE_MAPPING: Dict[str, str] = {
+        "user": "human",
+        "assistant": "model",
+    }
+    def __init__(
+        self,
+        tokenizer: Tokenizer,
+        tokenizer_name: str,
+        cache_config: CacheConfig,
+        api_key: Optional[str] = None,
+    ):
+        super().__init__(cache_config=cache_config)
+        self.tokenizer = tokenizer
+        self.tokenizer_name = tokenizer_name
+        self.client = reka
+        self.client.API_KEY = api_key
+    def _is_reka_model_engine(self, model_engine: str) -> bool:
+        if (
+            model_engine.startswith("reka-edge")
+            or model_engine.startswith("reka-flash")
+            or model_engine.startswith("reka-core")
+        ):
+            return True
+        else:
+            return False
+    def _get_model_for_request(self, request: Request) -> str:
+        return request.model_engine
+    def _get_random_seed(self, request: Request, completion_index: int) -> Optional[int]:
+        if request.random is None and completion_index == 0:
+            return None
+        # Treat the user's request.random as an integer for the random seed.
+        try:
+            request_random_seed = int(request.random) if request.random is not None else 0
+        except ValueError:
+            raise NonRetriableException("RekaAIClient only supports integer values for request.random")
+        # A large prime is used so that the resulting values are unlikely to collide
+        # with request.random values chosen by the user.
+        fixed_large_prime = 1911011
+        completion_index_random_seed = completion_index * fixed_large_prime
+        return request_random_seed + completion_index_random_seed
+    def _convert_messages_to_reka_chat_history(self, messages: List[Dict[str, Any]]):
+        chat_history = []
+        num_images: int = 0
+        for chat_turn, message in enumerate(messages):
+            role = message["role"]
+            content = message["content"]
+            current_chat_history: Dict[str, Any] = {
+                "type": self.REKA_CHAT_ROLE_MAPPING[role],
+                "text": "",  # text placeholder
+                "media_url": None,
+            }
+            for item in content:
+                if item["type"] == "image_url":
+                    if chat_turn == 0 and num_images == 0:
+                        current_chat_history["media_url"] = item["image_url"]["url"]
+                        num_images += 1
+                    else:
+                        raise ValueError(
+                            f"Only the first message can contain one image. Found image input "
+                            f"in message {chat_turn + 1}"
+                        )
+                elif item["type"] == "text":
+                    current_chat_history["text"] = item["text"]
+                else:
+                    raise ValueError(f"Unrecognized message type {item['type']}")
+            chat_history.append(current_chat_history)
+        return chat_history
+    def make_request(self, request: Request) -> RequestResult:
+        completions: List[GeneratedOutput] = []
+        messages: Optional[List[Dict[str, Any]]] = request.messages
+        reka_chat_history: List[Dict[str, Any]]
+        if messages is not None:
+            # Checks that all messages have a role and some content
+            for message in messages:
+                if not message.get("role") or not message.get("content"):
+                    raise ValueError("All messages must have a role and content")
+            # Checks that the last role is "user"
+            if messages[-1]["role"] != "user":
+                raise ValueError("Last message must have role 'user'")
+            if request.prompt != "":
+                hlog("WARNING: Since message is set, prompt will be ignored")
+            reka_chat_history = self._convert_messages_to_reka_chat_history(messages)
+        else:
+            current_chat_history: Dict[str, Any] = {
+                "type": "human",
+                "text": "",
+                "media_url": None,
+            }
+            if request.multimodal_prompt is not None:
+                for media_object in request.multimodal_prompt.media_objects:
+                    if media_object.is_type("image") and media_object.location:
+                        from helm.common.images_utils import encode_base64
+                        base64_image: str = encode_base64(media_object.location)
+                        current_chat_history["media_url"] = f"data:image/jpeg;base64,{base64_image}"
+                    elif media_object.is_type(TEXT_TYPE):
+                        if media_object.text is None:
+                            raise ValueError("MediaObject of text type has missing text field value")
+                        current_chat_history["text"] = media_object.text
+                    else:
+                        raise ValueError(f"Unrecognized MediaObject type {media_object.type}")
+            else:
+                current_chat_history["text"] = request.prompt
+            reka_chat_history = [current_chat_history]
+        # `num_completions` is not supported, so instead make `num_completions` separate requests.
+        for completion_index in range(request.num_completions):
+            try:
+                raw_request: RekaAIRequest = {
+                    "model_name": self._get_model_for_request(request),
+                    "conversation_history": reka_chat_history,  # we only use chat_history as the input
+                    "request_output_len": request.max_tokens,
+                    "temperature": request.temperature,
+                    "random_seed": self._get_random_seed(request, completion_index),
+                    "stop_words": request.stop_sequences or None,  # API doesn't like empty list
+                    "runtime_top_p": request.top_p,
+                    "presence_penalty": request.presence_penalty,
+                    "frequency_penalty": request.frequency_penalty,
+                }
+                def do_it() -> Dict[str, Any]:
+                    return self.client.chat(**raw_request)
+                response, cached = self.cache.get(raw_request, wrap_request_time(do_it))
+            except (requests.exceptions.RequestException, AssertionError) as e:
+                error: str = f"RekaClient error: {e}"
+                return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
+            response_message: Dict[str, Any] = response
+            assert response_message["type"] == "model"
+            response_text: str = response_message["text"]
+            # The Reka API doesn't support echo. If `echo_prompt` is true, combine the prompt and completion.
+            text: str = request.prompt + response_text if request.echo_prompt else response_text
+            completion = truncate_and_tokenize_response_text(text, request, self.tokenizer, self.tokenizer_name)
+            completions.append(completion)
+        return RequestResult(
+            success=True,
+            cached=cached,
+            request_time=response["request_time"],
+            request_datetime=response.get("request_datetime"),
+            completions=completions,
+            embedding=[],
+        )

crfm-helm 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl

Potentially problematic release.

crfm-helm 0.5.1py3-none-any.whl → 0.5.2py3-none-any.whl