PyPI - crfm-helm - Versions diffs - 0.5.2__py3-none-any.whl → 0.5.3__py3-none-any.whl - Mend

crfm-helm 0.5.2py3-none-any.whl → 0.5.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (184) hide show

helm/clients/nvidia_nim_client.py ADDED Viewed

@@ -0,0 +1,35 @@
+from typing import Optional
+from helm.clients.openai_client import OpenAIClient
+from helm.common.cache import CacheConfig
+from helm.common.request import Request
+from helm.tokenizers.tokenizer import Tokenizer
+class NvidiaNimClient(OpenAIClient):
+    BASE_URL = "https://integrate.api.nvidia.com/v1"
+    def __init__(
+        self,
+        tokenizer: Tokenizer,
+        tokenizer_name: str,
+        cache_config: CacheConfig,
+        api_key: Optional[str] = None,
+    ):
+        self.tokenizer = tokenizer
+        self.tokenizer_name = tokenizer_name
+        super().__init__(
+            tokenizer=tokenizer,
+            tokenizer_name=tokenizer_name,
+            cache_config=cache_config,
+            api_key=api_key,
+            org_id=None,
+            base_url=NvidiaNimClient.BASE_URL,
+        )
+    def _get_model_for_request(self, request: Request) -> str:
+        return request.model
+    def _is_chat_model_engine(self, model_engine: str) -> bool:
+        return True

helm/clients/openai_client.py CHANGED Viewed

@@ -12,8 +12,8 @@ from helm.common.tokenization_request import (
     TokenizationRequest,
     TokenizationRequestResult,
 )
-from helm.tokenizers.tokenizer import Tokenizer
 from .client import CachingClient, truncate_sequence, generate_uid_for_multimodal_prompt
+from helm.tokenizers.tokenizer import Tokenizer
 try:
     import openai
@@ -132,6 +132,7 @@ class OpenAIClient(CachingClient):
             content: Union[str, List[Union[str, Any]]]
             if request.multimodal_prompt is not None:
                 content = []
+                request.validate()
                 for media_object in request.multimodal_prompt.media_objects:
                     if media_object.is_type("image") and media_object.location:
                         from helm.common.images_utils import encode_base64
@@ -140,8 +141,6 @@ class OpenAIClient(CachingClient):
                         image_object: Dict[str, str] = {"url": f"data:image/jpeg;base64,{base64_image}"}
                         content.append({"type": "image_url", "image_url": image_object})
                     elif media_object.is_type(TEXT_TYPE):
-                        if media_object.text is None:
-                            raise ValueError("MediaObject of text type has missing text field value")
                         content.append({"type": media_object.type, "text": media_object.text})
                     else:
                         raise ValueError(f"Unrecognized MediaObject type {media_object.type}")

helm/clients/palmyra_client.py CHANGED Viewed

@@ -3,6 +3,7 @@ import json
 import requests
 from typing import Any, Dict, List
+from helm.clients.openai_client import OpenAIClient
 from helm.common.cache import CacheConfig
 from helm.common.hierarchical_logger import hlog
 from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput, Token, ErrorFlags
@@ -142,3 +143,27 @@ class PalmyraClient(CachingClient):
             completions=completions,
             embedding=[],
         )
+class PalmyraChatClient(OpenAIClient):
+    """Sends request to a Palmyra model using a OpenAI-compatible Chat API."""
+    def __init__(
+        self,
+        tokenizer: Tokenizer,
+        tokenizer_name: str,
+        cache_config: CacheConfig,
+        api_key: str,
+        base_url: str,
+    ):
+        super().__init__(
+            tokenizer=tokenizer,
+            tokenizer_name=tokenizer_name,
+            cache_config=cache_config,
+            api_key=api_key,
+            org_id=None,
+            base_url=base_url,
+        )
+    def _is_chat_model_engine(self, model_engine: str) -> bool:
+        return True

helm/clients/perspective_api_client.py CHANGED Viewed

@@ -4,16 +4,21 @@ from dataclasses import asdict
 from typing import Any, List, Dict, Optional
 from dacite import from_dict
-from googleapiclient import discovery
-from googleapiclient.errors import BatchError, HttpError
-from googleapiclient.http import BatchHttpRequest
-from httplib2 import HttpLib2Error
 from helm.clients.toxicity_classifier_client import ToxicityClassifierClient
+from helm.common.optional_dependencies import handle_module_not_found_error
 from helm.proxy.retry import NonRetriableException
 from helm.common.cache import Cache, CacheConfig
 from helm.common.perspective_api_request import ToxicityAttributes, PerspectiveAPIRequest, PerspectiveAPIRequestResult
-from google.auth.exceptions import DefaultCredentialsError
+try:
+    from googleapiclient import discovery
+    from googleapiclient.errors import BatchError, HttpError
+    from googleapiclient.http import BatchHttpRequest
+    from httplib2 import HttpLib2Error
+    from google.auth.exceptions import DefaultCredentialsError
+except ModuleNotFoundError as e:
+    handle_module_not_found_error(e, ["metrics"])
 class PerspectiveAPIClientCredentialsError(NonRetriableException):

helm/clients/test_client.py CHANGED Viewed

@@ -23,30 +23,28 @@ def test_truncate_sequence():
     # echo_prompt = True, nothing gets truncated
     truncate_sequence_helper(
         ["a", "b", "c"],
-        Request(
-            model="openai/text-davinci-002", model_deployment="openai/text-davinci-002", prompt="abc", echo_prompt=True
-        ),
+        Request(model="openai/gpt2", model_deployment="huggingface/gpt2", prompt="abc", echo_prompt=True),
         ["a", "b", "c"],
     )
     # Nothing gets truncated
     truncate_sequence_helper(
         ["hello", " world"],
-        Request(model="openai/text-davinci-002", model_deployment="openai/text-davinci-002", stop_sequences=["#"]),
+        Request(model="openai/gpt2", model_deployment="huggingface/gpt2", stop_sequences=["#"]),
         ["hello", " world"],
     )
     # Truncate using stop sequences
     truncate_sequence_helper(
         ["hello", " world", "\n", "what"],
-        Request(model="openai/text-davinci-002", model_deployment="openai/text-davinci-002", stop_sequences=["\n"]),
+        Request(model="openai/gpt2", model_deployment="huggingface/gpt2", stop_sequences=["\n"]),
         ["hello", " world"],
     )
     # Truncate using max tokens
     truncate_sequence_helper(
         ["a", "b", "c"],
-        Request(model="openai/text-davinci-002", model_deployment="openai/text-davinci-002", max_tokens=2),
+        Request(model="openai/gpt2", model_deployment="huggingface/gpt2", max_tokens=2),
         ["a", "b"],
     )

helm/clients/vision_language/open_flamingo_client.py CHANGED Viewed

@@ -82,13 +82,12 @@ class OpenFlamingoClient(CachingClient):
         # Build the prompt
         prompt_text: str = ""
         images: List[Image.Image] = []
+        request.validate()
         for media_object in request.multimodal_prompt.media_objects:
             if media_object.is_type("image") and media_object.location:
                 images.append(open_image(media_object.location))
                 prompt_text += self.IMAGE_TOKEN
             elif media_object.is_type(TEXT_TYPE):
-                if media_object.text is None:
-                    raise ValueError("MediaObject of text type has missing text field value")
                 prompt_text += media_object.text
             else:
                 raise ValueError(f"Unrecognized MediaObject type {media_object.type}")

helm/clients/vision_language/palmyra_vision_client.py CHANGED Viewed

@@ -6,13 +6,19 @@ import requests
 from helm.common.cache import CacheConfig
 from helm.common.images_utils import encode_base64
 from helm.common.media_object import TEXT_TYPE
-from helm.common.request import Request, RequestResult, GeneratedOutput
+from helm.common.request import Request, RequestResult, GeneratedOutput, ErrorFlags
 from helm.common.request import wrap_request_time
 from helm.clients.client import CachingClient, generate_uid_for_multimodal_prompt, truncate_and_tokenize_response_text
 from helm.tokenizers.tokenizer import Tokenizer
+class PalmyraVisionContentBlockedError(Exception):
+    pass
 class PalmyraVisionClient(CachingClient):
+    CONTENT_BLOCKED_ERROR: str = "fail.input.content.moderation"
     def __init__(self, tokenizer: Tokenizer, tokenizer_name: str, endpoint: str, cache_config: CacheConfig):
         super().__init__(cache_config)
         self.tokenizer: Tokenizer = tokenizer
@@ -49,17 +55,19 @@ class PalmyraVisionClient(CachingClient):
                 response = requests.post(
                     self.endpoint, headers={"Content-Type": "application/json"}, data=json.dumps({"parts": prompt})
                 )
-                if response.status_code != 200:
-                    curl_command: str = (
-                        f"curl --location '{self.endpoint}' --header 'Content-Type: application/json' "
-                        f"--data '{json.dumps({'parts': prompt})}'"
-                    )
-                    assert False, f"Got status code {response.status_code}. Try {curl_command}"
                 json_response = json.loads(response.text)
-                assert (
-                    "choices" in json_response and "errors" not in json_response
-                ), f"Invalid response: {response.text}"
+                # Check for content blocked error
+                if (
+                    "errors" in json_response
+                    and "tpe" in json_response
+                    and json_response["tpe"] == self.CONTENT_BLOCKED_ERROR
+                ):
+                    raise PalmyraVisionContentBlockedError(json_response["errors"])
+                # Hard fail if the `choices` is missing from the response
+                assert "choices" in json_response, f"Invalid response: {response.text}"
                 return json_response
             cache_key = CachingClient.make_cache_key(
@@ -67,8 +75,15 @@ class PalmyraVisionClient(CachingClient):
                 request=request,
             )
             result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
-        except RuntimeError as ex:
-            return RequestResult(success=False, cached=False, error=str(ex), completions=[], embedding=[])
+        except PalmyraVisionContentBlockedError as ex:
+            return RequestResult(
+                success=False,
+                cached=False,
+                error=f"Content blocked: {str(ex)}",
+                completions=[],
+                embedding=[],
+                error_flags=ErrorFlags(is_retriable=False, is_fatal=False),
+            )
         # The internal endpoint doesn't support any other parameters, so we have to truncate ourselves
         completions: List[GeneratedOutput] = [

helm/common/images_utils.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from hashlib import md5
 import base64
 import io
 import os
@@ -44,6 +45,11 @@ def encode_base64(image_location: str, format="JPEG") -> str:
     return base64.b64encode(image_file.getvalue()).decode("ascii")
+def generate_hash(image: Image.Image) -> str:
+    """Generates a hash for the image."""
+    return md5(image.tobytes()).hexdigest()
 def copy_image(src: str, dest: str, width: Optional[int] = None, height: Optional[int] = None) -> None:
     """
     Copies the image file from `src` path to `dest` path. If dimensions `width` and `height`

helm/common/mongo_key_value_store.py CHANGED Viewed

@@ -85,4 +85,5 @@ class MongoKeyValueStore(KeyValueStore):
         self._collection.bulk_write(operations)
     def remove(self, key: Dict) -> None:
-        self._collection.delete_one(key)
+        query = {self._REQUEST_KEY: self._canonicalize_key(key)}
+        self._collection.delete_one(query)

helm/common/request.py CHANGED Viewed

@@ -72,6 +72,22 @@ class Request:
     image_generation_parameters: Optional[ImageGenerationParameters] = None
     """Parameters for image generation."""
+    def validate(self):
+        if (
+            (self.messages and self.prompt)
+            or (self.messages and self.multimodal_prompt)
+            or (self.prompt and self.multimodal_prompt)
+        ):
+            raise ValueError("Exactly one of the messages, prompt, multimodal_prompt fields should be set")
+        if self.multimodal_prompt:
+            for media_object in self.multimodal_prompt.media_objects:
+                if media_object.content_type == "text" and media_object.text is None:
+                    raise ValueError("Media object with text content type must have text set")
+                if media_object.content_type == "image" and media_object.location is None:
+                    raise ValueError("Media object with image content type must have location set")
     @property
     def model_host(self) -> str:
         """Returns the model host (referring to the deployment).

crfm-helm 0.5.2__py3-none-any.whl → 0.5.3__py3-none-any.whl

Potentially problematic release.

crfm-helm 0.5.2py3-none-any.whl → 0.5.3py3-none-any.whl