PyPI - crfm-helm - Versions diffs - 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (546) hide show

helm/{proxy/clients → clients}/anthropic_client.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, TypedDict, Union, cast
 import json
 import requests
 import time
@@ -6,13 +6,14 @@ import urllib.parse
 from helm.common.cache import CacheConfig
 from helm.common.hierarchical_logger import htrack_block, hlog
+from helm.common.media_object import IMAGE_TYPE, TEXT_TYPE
 from helm.common.optional_dependencies import handle_module_not_found_error
 from helm.common.request import (
     wrap_request_time,
     EMBEDDING_UNAVAILABLE_REQUEST_RESULT,
     Request,
     RequestResult,
-    Sequence,
+    GeneratedOutput,
     Token,
     ErrorFlags,
 )
@@ -20,16 +21,30 @@ from helm.common.tokenization_request import (
     TokenizationRequest,
     TokenizationRequestResult,
 )
-from helm.proxy.tokenizers.tokenizer import Tokenizer
-from .client import CachingClient, truncate_sequence
+from helm.proxy.retry import NonRetriableException
+from helm.tokenizers.tokenizer import Tokenizer
+from helm.clients.client import CachingClient, truncate_sequence, truncate_and_tokenize_response_text
 try:
-    import anthropic
+    from anthropic import Anthropic, BadRequestError
+    from anthropic.types import MessageParam
+    from anthropic.types.image_block_param import ImageBlockParam
+    from anthropic.types.text_block_param import TextBlockParam
     import websocket
 except ModuleNotFoundError as e:
     handle_module_not_found_error(e, ["anthropic"])
+class AnthropicCompletionRequest(TypedDict):
+    prompt: str
+    stop_sequences: List[str]
+    model: str
+    max_tokens_to_sample: int
+    temperature: float
+    top_p: float
+    top_k: int
 class AnthropicClient(CachingClient):
     """
     Client for the Anthropic models (https://arxiv.org/abs/2204.05862).
@@ -56,15 +71,19 @@ class AnthropicClient(CachingClient):
     ADDITIONAL_TOKENS: int = 5
     PROMPT_ANSWER_START: str = "The answer is "
-    def __init__(self, tokenizer: Tokenizer, cache_config: CacheConfig, api_key: Optional[str] = None):
-        super().__init__(cache_config=cache_config, tokenizer=tokenizer)
+    def __init__(
+        self, tokenizer: Tokenizer, tokenizer_name: str, cache_config: CacheConfig, api_key: Optional[str] = None
+    ):
+        super().__init__(cache_config=cache_config)
+        self.tokenizer = tokenizer
+        self.tokenizer_name = tokenizer_name
         self.api_key: Optional[str] = api_key
-        self._client = anthropic.Client(api_key) if api_key else None
+        self.client = Anthropic(api_key=api_key)
-    def _send_request(self, raw_request: Dict[str, Any]) -> Dict[str, Any]:
+    def _send_request(self, raw_request: AnthropicCompletionRequest) -> Dict[str, Any]:
         if self.api_key is None:
             raise Exception("API key is not set. Please set it in the HELM config file.")
-        result = self._client.completion(**raw_request)
+        result = self.client.completions.create(**raw_request).model_dump()
         assert "error" not in result, f"Request failed with error: {result['error']}"
         return result
@@ -99,7 +118,7 @@ class AnthropicClient(CachingClient):
         if request.max_tokens == 0 and not request.echo_prompt:
             raise ValueError("echo_prompt must be True when max_tokens=0.")
-        raw_request = {
+        raw_request: AnthropicCompletionRequest = {
             "prompt": request.prompt,
             "stop_sequences": request.stop_sequences,
             "model": request.model_engine,
@@ -109,7 +128,7 @@ class AnthropicClient(CachingClient):
             "top_k": request.top_k_per_token,
         }
-        completions: List[Sequence] = []
+        completions: List[GeneratedOutput] = []
         # `num_completions` is not supported, so instead make `num_completions` separate requests.
         for completion_index in range(request.num_completions):
@@ -164,15 +183,13 @@ class AnthropicClient(CachingClient):
             # The Anthropic API doesn't return us tokens or logprobs, so we tokenize ourselves.
             tokenization_result: TokenizationRequestResult = self.tokenizer.tokenize(
                 # Anthropic uses their own tokenizer
-                TokenizationRequest(text, tokenizer=request.model_engine)
+                TokenizationRequest(text, tokenizer=self.tokenizer_name)
             )
             # Log probs are not currently not supported by the Anthropic, so set to 0 for now.
-            tokens: List[Token] = [
-                Token(text=str(text), logprob=0, top_logprobs={}) for text in tokenization_result.raw_tokens
-            ]
+            tokens: List[Token] = [Token(text=str(text), logprob=0) for text in tokenization_result.raw_tokens]
-            completion = Sequence(text=response["completion"], logprob=0, tokens=tokens)
+            completion = GeneratedOutput(text=response["completion"], logprob=0, tokens=tokens)
             # See NOTE() in _filter_completion() to understand why warnings are printed for truncation.
             # TODO(#1512): Fix this with post-processing.
             sequence = truncate_sequence(completion, request, print_warning=True)
@@ -188,6 +205,179 @@ class AnthropicClient(CachingClient):
         )
+def _is_content_moderation_failure(response: Dict) -> bool:
+    """Return whether a a response failed because of the content moderation filter."""
+    if (
+        "error" in response
+        and "message" in response["error"]
+        and response["error"]["message"] == "Output blocked by content filtering policy"
+    ):
+        hlog(f"Anthropic - output blocked by content filtering policy: {response}")
+        return True
+    return False
+class AnthropicMessagesRequest(TypedDict, total=False):
+    messages: List[MessageParam]
+    model: str
+    stop_sequences: List[str]
+    system: str
+    max_tokens: int
+    temperature: float
+    top_k: int
+    top_p: float
+class AnthropicMessagesRequestError(NonRetriableException):
+    pass
+class AnthropicMessagesResponseError(Exception):
+    pass
+class AnthropicMessagesClient(CachingClient):
+    # Source: https://docs.anthropic.com/claude/docs/models-overview
+    MAX_OUTPUT_TOKENS = 4096
+    def __init__(
+        self, tokenizer: Tokenizer, tokenizer_name: str, cache_config: CacheConfig, api_key: Optional[str] = None
+    ):
+        super().__init__(cache_config=cache_config)
+        self.tokenizer = tokenizer
+        self.tokenizer_name = tokenizer_name
+        self.client = Anthropic(api_key=api_key)
+        self.api_key: Optional[str] = api_key
+    def make_request(self, request: Request) -> RequestResult:
+        if request.max_tokens > AnthropicMessagesClient.MAX_OUTPUT_TOKENS:
+            raise AnthropicMessagesRequestError(
+                f"Request.max_tokens must be <= {AnthropicMessagesClient.MAX_OUTPUT_TOKENS}"
+            )
+        messages: List[MessageParam] = []
+        system_message: Optional[MessageParam] = None
+        if request.messages is not None:
+            # TODO(#2439): Refactor out Request validation
+            if request.multimodal_prompt is not None or request.prompt:
+                raise AnthropicMessagesRequestError(
+                    "Exactly one of Request.messages, Request.prompt or Request.multimodel_prompt should be set"
+                )
+            messages = cast(List[MessageParam], request.messages)
+            if messages[0]["role"] == "system":
+                system_message = messages[0]
+                messages = messages[1:]
+        elif request.multimodal_prompt is not None:
+            # TODO(#2439): Refactor out Request validation
+            if request.messages is not None or request.prompt:
+                raise AnthropicMessagesRequestError(
+                    "Exactly one of Request.messages, Request.prompt or Request.multimodel_prompt should be set"
+                )
+            blocks: List[Union[TextBlockParam, ImageBlockParam]] = []
+            for media_object in request.multimodal_prompt.media_objects:
+                if media_object.is_type(IMAGE_TYPE):
+                    # TODO(#2439): Refactor out Request validation
+                    if not media_object.location:
+                        raise Exception("MediaObject of image type has missing location field value")
+                    from helm.common.images_utils import encode_base64
+                    base64_image: str = encode_base64(media_object.location, format="JPEG")
+                    image_block: ImageBlockParam = {
+                        "type": "image",
+                        "source": {
+                            "type": "base64",
+                            "media_type": "image/jpeg",
+                            "data": base64_image,
+                        },
+                    }
+                    blocks.append(image_block)
+                if media_object.is_type(TEXT_TYPE):
+                    # TODO(#2439): Refactor out Request validation
+                    if media_object.text is None:
+                        raise ValueError("MediaObject of text type has missing text field value")
+                    text_block: TextBlockParam = {
+                        "type": "text",
+                        "text": media_object.text,
+                    }
+                    blocks.append(text_block)
+            messages = [{"role": "user", "content": blocks}]
+        else:
+            messages = [{"role": "user", "content": request.prompt}]
+        raw_request: AnthropicMessagesRequest = {
+            "messages": messages,
+            "model": request.model_engine,
+            "stop_sequences": request.stop_sequences,
+            "max_tokens": request.max_tokens,
+            "temperature": request.temperature,
+            "top_p": request.top_p,
+            "top_k": request.top_k_per_token,
+        }
+        if system_message is not None:
+            raw_request["system"] = cast(str, system_message["content"])
+        completions: List[GeneratedOutput] = []
+        # `num_completions` is not supported, so instead make `num_completions` separate requests.
+        for completion_index in range(request.num_completions):
+            def do_it() -> Dict[str, Any]:
+                try:
+                    result = self.client.messages.create(**raw_request).model_dump()
+                    if "content" not in result or not result["content"]:
+                        raise AnthropicMessagesResponseError(f"Anthropic response has empty content: {result}")
+                    elif "text" not in result["content"][0]:
+                        raise AnthropicMessagesResponseError(f"Anthropic response has non-text content: {result}")
+                    return result
+                except BadRequestError as e:
+                    response = e.response.json()
+                    if _is_content_moderation_failure(response):
+                        return response
+                    raise
+            cache_key = CachingClient.make_cache_key(
+                {
+                    "completion_index": completion_index,
+                    **raw_request,
+                },
+                request,
+            )
+            response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
+            if _is_content_moderation_failure(response):
+                hlog(
+                    f"WARNING: Returning empty request for {request.model_deployment} "
+                    "due to content moderation filter"
+                )
+                return RequestResult(
+                    success=False,
+                    cached=cached,
+                    error=response["error"]["message"],
+                    completions=[],
+                    embedding=[],
+                    error_flags=ErrorFlags(is_retriable=False, is_fatal=False),
+                    request_time=response["request_time"],
+                    request_datetime=response["request_datetime"],
+                )
+            completion = truncate_and_tokenize_response_text(
+                response["content"][0]["text"], request, self.tokenizer, self.tokenizer_name, original_finish_reason=""
+            )
+            completions.append(completion)
+        return RequestResult(
+            success=True,
+            cached=cached,
+            request_time=response["request_time"],
+            request_datetime=response["request_datetime"],
+            completions=completions,
+            embedding=[],
+        )
 class AnthropicRequestError(Exception):
     pass
@@ -239,9 +429,9 @@ class AnthropicLegacyClient(CachingClient):
             hlog(f"Invalid logprobs response: {raw_response}")
             return False
-    def __init__(self, api_key: str, tokenizer: Tokenizer, cache_config: CacheConfig):
+    def __init__(self, api_key: str, cache_config: CacheConfig):
         hlog("This client is deprecated. Please use AnthropicClient instead.")
-        super().__init__(cache_config=cache_config, tokenizer=tokenizer)
+        super().__init__(cache_config=cache_config)
         self.api_key = api_key
     def make_request(self, request: Request) -> RequestResult:
@@ -249,7 +439,7 @@ class AnthropicLegacyClient(CachingClient):
         if request.embedding:
             return EMBEDDING_UNAVAILABLE_REQUEST_RESULT
         # Validate the fields of `Request`
-        if request.model != "anthropic/stanford-online-all-v4-s3":
+        if request.model_engine != "stanford-online-all-v4-s3":
             raise ValueError(f"Invalid model: {request.model}")
         if request.max_tokens > AnthropicLegacyClient.MAX_COMPLETION_LENGTH:
             raise ValueError(
@@ -390,7 +580,7 @@ class AnthropicLegacyClient(CachingClient):
         # Since Anthropic doesn't support multiple completions, we have to manually call it multiple times,
         # and aggregate the results into `completions` and `request_time`.
-        completions: List[Sequence] = []
+        completions: List[GeneratedOutput] = []
         all_cached = True
         request_time = 0
         request_datetime: Optional[int] = None
@@ -423,8 +613,7 @@ class AnthropicLegacyClient(CachingClient):
             for text, token_logprob, all_logprobs, all_tokens in zip(
                 log_probs["tokens"], log_probs["logprobs"], log_probs["topk_logprobs"], log_probs["topk_tokens"]
             ):
-                top_logprobs: Dict[str, float] = {text: logprob for text, logprob in zip(all_tokens, all_logprobs)}
-                tokens.append(Token(text=text, logprob=token_logprob, top_logprobs=top_logprobs))
+                tokens.append(Token(text=text, logprob=token_logprob))
                 sequence_logprob += token_logprob
             finish_reason: str = response["stop_reason"]
@@ -432,7 +621,7 @@ class AnthropicLegacyClient(CachingClient):
             if finish_reason == AnthropicLegacyClient.STOP_SEQUENCE_STOP_REASON:
                 finish_reason = "stop"
-            completion = Sequence(
+            completion = GeneratedOutput(
                 text=response["text"],
                 logprob=sequence_logprob,
                 tokens=tokens,

helm/clients/auto_client.py ADDED Viewed

@@ -0,0 +1,215 @@
+from dataclasses import replace
+import os
+from typing import Any, Dict, Mapping, Optional
+from retrying import Attempt, RetryError
+from helm.benchmark.model_deployment_registry import ModelDeployment, get_model_deployment
+from helm.common.file_caches.file_cache import FileCache
+from helm.common.file_caches.local_file_cache import LocalFileCache
+from helm.common.credentials_utils import provide_api_key
+from helm.common.cache_backend_config import CacheBackendConfig, CacheConfig
+from helm.common.hierarchical_logger import hlog
+from helm.common.object_spec import create_object, inject_object_spec_args
+from helm.common.request import Request, RequestResult
+from helm.clients.client import Client
+from helm.clients.moderation_api_client import ModerationAPIClient
+from helm.proxy.critique.critique_client import CritiqueClient
+from helm.clients.toxicity_classifier_client import ToxicityClassifierClient
+from helm.proxy.retry import NonRetriableException, retry_request
+from helm.tokenizers.auto_tokenizer import AutoTokenizer
+class AuthenticationError(NonRetriableException):
+    pass
+class AutoClient(Client):
+    """Automatically dispatch to the proper `Client` based on the model deployment name."""
+    def __init__(
+        self, credentials: Mapping[str, Any], file_storage_path: str, cache_backend_config: CacheBackendConfig
+    ):
+        self._auto_tokenizer = AutoTokenizer(credentials, cache_backend_config)
+        self.credentials = credentials
+        self.file_storage_path = file_storage_path
+        self.cache_backend_config = cache_backend_config
+        self.clients: Dict[str, Client] = {}
+        self._critique_client: Optional[CritiqueClient] = None
+        hlog(f"AutoClient: file_storage_path = {file_storage_path}")
+        hlog(f"AutoClient: cache_backend_config = {cache_backend_config}")
+    def _get_client(self, model_deployment_name: str) -> Client:
+        """Return a client based on the model, creating it if necessary."""
+        # First try to find the client in the cache
+        client: Optional[Client] = self.clients.get(model_deployment_name)
+        if client is not None:
+            return client
+        # Otherwise, create the client
+        model_deployment: ModelDeployment = get_model_deployment(model_deployment_name)
+        if model_deployment:
+            # Perform dependency injection to fill in remaining arguments.
+            # Dependency injection is needed here for these reasons:
+            #
+            # 1. Different clients have different parameters. Dependency injection provides arguments
+            #    that match the parameters of the client.
+            # 2. Some arguments, such as the tokenizer, are not static data objects that can be
+            #    in the users configuration file. Instead, they have to be constructed dynamically at
+            #    runtime.
+            # 3. The providers must be lazily-evaluated, because eager evaluation can result in an
+            #    exception. For instance, some clients do not require an API key, so trying to fetch
+            #    the API key from configuration eagerly will result in an exception because the user
+            #    will not have configured an API key.
+            # Prepare a cache
+            host_organization: str = model_deployment.host_organization
+            cache_config: CacheConfig = self.cache_backend_config.get_cache_config(host_organization)
+            client_spec = inject_object_spec_args(
+                model_deployment.client_spec,
+                constant_bindings={
+                    "cache_config": cache_config,
+                    "tokenizer_name": model_deployment.tokenizer_name,
+                },
+                provider_bindings={
+                    "api_key": lambda: provide_api_key(self.credentials, host_organization, model_deployment_name),
+                    "tokenizer": lambda: self._auto_tokenizer._get_tokenizer(
+                        tokenizer_name=model_deployment.tokenizer_name or model_deployment.name
+                    ),
+                    "org_id": lambda: self.credentials.get(
+                        host_organization + "OrgId", None
+                    ),  # OpenAI, GooseAI, Microsoft
+                    "moderation_api_client": lambda: self.get_moderation_api_client(),  # OpenAI DALL-E
+                    "lock_file_path": lambda: os.path.join(
+                        self.file_storage_path, f"{host_organization}.lock"
+                    ),  # Microsoft
+                    "project_id": lambda: self.credentials.get(host_organization + "ProjectId", None),  # VertexAI
+                    "location": lambda: self.credentials.get(host_organization + "Location", None),  # VertexAI
+                    "hf_auth_token": lambda: self.credentials.get("huggingfaceAuthToken", None),  # HuggingFace
+                    "file_cache": lambda: self._get_file_cache(host_organization),  # Text-to-image models
+                },
+            )
+            client = create_object(client_spec)
+        else:
+            raise ValueError(f"Could not find client for model deployment: {model_deployment_name}")
+        # Cache the client
+        self.clients[model_deployment_name] = client
+        return client
+    def make_request(self, request: Request) -> RequestResult:
+        """
+        Dispatch based on the name of the model (e.g., openai/davinci).
+        Retries if request fails.
+        """
+        # TODO: need to revisit this because this swallows up any exceptions that are raised.
+        @retry_request
+        def make_request_with_retry(client: Client, request: Request) -> RequestResult:
+            return client.make_request(request)
+        client: Client = self._get_client(request.model_deployment)
+        try:
+            return make_request_with_retry(client=client, request=request)
+        except RetryError as e:
+            last_attempt: Attempt = e.last_attempt
+            retry_error: str = (
+                f"Failed to make request to {request.model_deployment} after retrying "
+                f"{last_attempt.attempt_number} times"
+            )
+            hlog(retry_error)
+            # Notify our user that we failed to make the request even after retrying.
+            return replace(last_attempt.value, error=f"{retry_error}. Error: {last_attempt.value.error}")
+    def get_gcs_client(self):
+        from .gcs_client import GCSClient
+        bucket_name: str = self.credentials["gcsBucketName"]
+        cache_config: CacheConfig = self.cache_backend_config.get_cache_config("gcs")
+        return GCSClient(bucket_name, cache_config)
+    def get_nudity_check_client(self):
+        from helm.clients.image_generation.nudity_check_client import NudityCheckClient
+        cache_config: CacheConfig = self.cache_backend_config.get_cache_config("nudity")
+        return NudityCheckClient(cache_config)
+    def get_clip_score_client(self):
+        from .clip_score_client import CLIPScoreClient
+        cache_config: CacheConfig = self.cache_backend_config.get_cache_config("clip_score")
+        return CLIPScoreClient(cache_config)
+    def get_toxicity_classifier_client(self) -> ToxicityClassifierClient:
+        """Get the toxicity classifier client. We currently only support Perspective API."""
+        from helm.clients.perspective_api_client import PerspectiveAPIClient
+        cache_config: CacheConfig = self.cache_backend_config.get_cache_config("perspectiveapi")
+        return PerspectiveAPIClient(self.credentials.get("perspectiveApiKey", ""), cache_config)
+    def get_moderation_api_client(self) -> ModerationAPIClient:
+        """Get the ModerationAPI client."""
+        cache_config: CacheConfig = self.cache_backend_config.get_cache_config("ModerationAPI")
+        return ModerationAPIClient(self.credentials.get("openaiApiKey", ""), cache_config)
+    def get_critique_client(self) -> CritiqueClient:
+        """Get the critique client."""
+        if self._critique_client:
+            return self._critique_client
+        critique_type = self.credentials.get("critiqueType")
+        if critique_type == "random":
+            from helm.proxy.critique.critique_client import RandomCritiqueClient
+            self._critique_client = RandomCritiqueClient()
+        elif critique_type == "mturk":
+            from helm.proxy.critique.mechanical_turk_critique_client import (
+                MechanicalTurkCritiqueClient,
+            )
+            self._critique_client = MechanicalTurkCritiqueClient()
+        elif critique_type == "surgeai":
+            from helm.proxy.critique.surge_ai_critique_client import (
+                SurgeAICritiqueClient,
+            )
+            surgeai_credentials = self.credentials.get("surgeaiApiKey")
+            if not surgeai_credentials:
+                raise ValueError("surgeaiApiKey credentials are required for SurgeAICritiqueClient")
+            self._critique_client = SurgeAICritiqueClient(
+                surgeai_credentials, self.cache_backend_config.get_cache_config("surgeai")
+            )
+        elif critique_type == "model":
+            from helm.proxy.critique.model_critique_client import ModelCritiqueClient
+            model_name: Optional[str] = self.credentials.get("critiqueModelName")
+            if model_name is None:
+                raise ValueError("critiqueModelName is required for ModelCritiqueClient")
+            client: Client = self._get_client(model_name)
+            self._critique_client = ModelCritiqueClient(client, model_name)
+        elif critique_type == "scale":
+            from helm.proxy.critique.scale_critique_client import ScaleCritiqueClient
+            scale_credentials = self.credentials.get("scaleApiKey")
+            scale_project = self.credentials.get("scaleProject", None)
+            if not scale_project:
+                raise ValueError("scaleProject is required for ScaleCritiqueClient.")
+            if not scale_credentials:
+                raise ValueError("scaleApiKey is required for ScaleCritiqueClient")
+            self._critique_client = ScaleCritiqueClient(
+                scale_credentials, self.cache_backend_config.get_cache_config("scale"), scale_project
+            )
+        else:
+            raise ValueError(
+                "CritiqueClient is not configured; set critiqueType to 'mturk',"
+                "'mturk-sandbox', 'surgeai', 'scale' or 'random'"
+            )
+        return self._critique_client
+    def _get_file_cache(self, host_organization: str) -> FileCache:
+        # Initialize `FileCache` for text-to-image model APIs
+        local_file_cache_path: str = os.path.join(self.file_storage_path, "output", host_organization)
+        return LocalFileCache(local_file_cache_path, file_extension="png")

crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl