PyPI - crfm-helm - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

crfm-helm 0.4.0py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (499) hide show

helm/{proxy/clients → clients}/anthropic_client.py RENAMED Viewed

@@ -1,18 +1,20 @@
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, TypedDict, Union, cast
 import json
 import requests
+import tempfile
 import time
 import urllib.parse
 from helm.common.cache import CacheConfig
 from helm.common.hierarchical_logger import htrack_block, hlog
+from helm.common.media_object import IMAGE_TYPE, TEXT_TYPE
 from helm.common.optional_dependencies import handle_module_not_found_error
 from helm.common.request import (
     wrap_request_time,
     EMBEDDING_UNAVAILABLE_REQUEST_RESULT,
     Request,
     RequestResult,
-    Sequence,
+    GeneratedOutput,
     Token,
     ErrorFlags,
 )
@@ -20,16 +22,30 @@ from helm.common.tokenization_request import (
     TokenizationRequest,
     TokenizationRequestResult,
 )
-from helm.proxy.tokenizers.tokenizer import Tokenizer
-from .client import CachingClient, truncate_sequence
+from helm.proxy.retry import NonRetriableException
+from helm.tokenizers.tokenizer import Tokenizer
+from helm.clients.client import CachingClient, truncate_sequence, truncate_and_tokenize_response_text
 try:
-    import anthropic
+    from anthropic import Anthropic, BadRequestError
+    from anthropic.types import MessageParam
+    from anthropic.types.image_block_param import ImageBlockParam
+    from anthropic.types.text_block_param import TextBlockParam
     import websocket
 except ModuleNotFoundError as e:
     handle_module_not_found_error(e, ["anthropic"])
+class AnthropicCompletionRequest(TypedDict):
+    prompt: str
+    stop_sequences: List[str]
+    model: str
+    max_tokens_to_sample: int
+    temperature: float
+    top_p: float
+    top_k: int
 class AnthropicClient(CachingClient):
     """
     Client for the Anthropic models (https://arxiv.org/abs/2204.05862).
@@ -53,6 +69,9 @@ class AnthropicClient(CachingClient):
     MAX_COMPLETION_LENGTH: int = (
         8192  # See https://docs.google.com/document/d/1vX6xgoA-KEKxqtMlBVAqYvE8KUfZ7ABCjTxAjf1T5kI/edit#
     )
+    # An Anthropic error message: "At least one of the image dimensions exceed max allowed size: 8000 pixels"
+    MAX_IMAGE_DIMENSION: int = 8000
     ADDITIONAL_TOKENS: int = 5
     PROMPT_ANSWER_START: str = "The answer is "
@@ -63,12 +82,12 @@ class AnthropicClient(CachingClient):
         self.tokenizer = tokenizer
         self.tokenizer_name = tokenizer_name
         self.api_key: Optional[str] = api_key
-        self._client = anthropic.Client(api_key) if api_key else None
+        self.client = Anthropic(api_key=api_key)
-    def _send_request(self, raw_request: Dict[str, Any]) -> Dict[str, Any]:
+    def _send_request(self, raw_request: AnthropicCompletionRequest) -> Dict[str, Any]:
         if self.api_key is None:
             raise Exception("API key is not set. Please set it in the HELM config file.")
-        result = self._client.completion(**raw_request)
+        result = self.client.completions.create(**raw_request).model_dump()
         assert "error" not in result, f"Request failed with error: {result['error']}"
         return result
@@ -103,7 +122,7 @@ class AnthropicClient(CachingClient):
         if request.max_tokens == 0 and not request.echo_prompt:
             raise ValueError("echo_prompt must be True when max_tokens=0.")
-        raw_request = {
+        raw_request: AnthropicCompletionRequest = {
             "prompt": request.prompt,
             "stop_sequences": request.stop_sequences,
             "model": request.model_engine,
@@ -113,7 +132,7 @@ class AnthropicClient(CachingClient):
             "top_k": request.top_k_per_token,
         }
-        completions: List[Sequence] = []
+        completions: List[GeneratedOutput] = []
         # `num_completions` is not supported, so instead make `num_completions` separate requests.
         for completion_index in range(request.num_completions):
@@ -172,11 +191,9 @@ class AnthropicClient(CachingClient):
             )
             # Log probs are not currently not supported by the Anthropic, so set to 0 for now.
-            tokens: List[Token] = [
-                Token(text=str(text), logprob=0, top_logprobs={}) for text in tokenization_result.raw_tokens
-            ]
+            tokens: List[Token] = [Token(text=str(text), logprob=0) for text in tokenization_result.raw_tokens]
-            completion = Sequence(text=response["completion"], logprob=0, tokens=tokens)
+            completion = GeneratedOutput(text=response["completion"], logprob=0, tokens=tokens)
             # See NOTE() in _filter_completion() to understand why warnings are printed for truncation.
             # TODO(#1512): Fix this with post-processing.
             sequence = truncate_sequence(completion, request, print_warning=True)
@@ -192,6 +209,205 @@ class AnthropicClient(CachingClient):
         )
+def _is_content_moderation_failure(response: Dict) -> bool:
+    """Return whether a response failed because of the content moderation filter."""
+    if (
+        "error" in response
+        and "message" in response["error"]
+        and response["error"]["message"] == "Output blocked by content filtering policy"
+    ):
+        hlog(f"Anthropic - output blocked by content filtering policy: {response}")
+        return True
+    return False
+class AnthropicMessagesRequest(TypedDict, total=False):
+    messages: List[MessageParam]
+    model: str
+    stop_sequences: List[str]
+    system: str
+    max_tokens: int
+    temperature: float
+    top_k: int
+    top_p: float
+class AnthropicMessagesRequestError(NonRetriableException):
+    pass
+class AnthropicMessagesResponseError(Exception):
+    pass
+class AnthropicMessagesClient(CachingClient):
+    # Source: https://docs.anthropic.com/claude/docs/models-overview
+    MAX_OUTPUT_TOKENS: int = 4096
+    def __init__(
+        self, tokenizer: Tokenizer, tokenizer_name: str, cache_config: CacheConfig, api_key: Optional[str] = None
+    ):
+        super().__init__(cache_config=cache_config)
+        self.tokenizer = tokenizer
+        self.tokenizer_name = tokenizer_name
+        self.client = Anthropic(api_key=api_key)
+        self.api_key: Optional[str] = api_key
+    def make_request(self, request: Request) -> RequestResult:
+        if request.max_tokens > AnthropicMessagesClient.MAX_OUTPUT_TOKENS:
+            raise AnthropicMessagesRequestError(
+                f"Request.max_tokens must be <= {AnthropicMessagesClient.MAX_OUTPUT_TOKENS}"
+            )
+        messages: List[MessageParam] = []
+        system_message: Optional[MessageParam] = None
+        if request.messages is not None:
+            # TODO(#2439): Refactor out Request validation
+            if request.multimodal_prompt is not None or request.prompt:
+                raise AnthropicMessagesRequestError(
+                    "Exactly one of Request.messages, Request.prompt or Request.multimodel_prompt should be set"
+                )
+            messages = cast(List[MessageParam], request.messages)
+            if messages[0]["role"] == "system":
+                system_message = messages[0]
+                messages = messages[1:]
+        elif request.multimodal_prompt is not None:
+            # TODO(#2439): Refactor out Request validation
+            if request.messages is not None or request.prompt:
+                raise AnthropicMessagesRequestError(
+                    "Exactly one of Request.messages, Request.prompt or Request.multimodal_prompt should be set"
+                )
+            blocks: List[Union[TextBlockParam, ImageBlockParam]] = []
+            for media_object in request.multimodal_prompt.media_objects:
+                if media_object.is_type(IMAGE_TYPE):
+                    # TODO(#2439): Refactor out Request validation
+                    if not media_object.location:
+                        raise Exception("MediaObject of image type has missing location field value")
+                    from helm.common.images_utils import encode_base64, get_dimensions, copy_image
+                    image_location: str = media_object.location
+                    base64_image: str
+                    image_width, image_height = get_dimensions(media_object.location)
+                    if (
+                        image_width > AnthropicClient.MAX_IMAGE_DIMENSION
+                        or image_height > AnthropicClient.MAX_IMAGE_DIMENSION
+                    ):
+                        hlog(
+                            f"WARNING: Image {image_location} exceeds max allowed size: "
+                            f"{AnthropicClient.MAX_IMAGE_DIMENSION} pixels"
+                        )
+                        # Save the resized image to a temporary file
+                        with tempfile.NamedTemporaryFile(suffix=".jpg") as temp_file:
+                            hlog(f"Resizing image to temporary path: {temp_file.name}")
+                            copy_image(
+                                src=image_location,
+                                dest=temp_file.name,
+                                width=min(image_width, AnthropicClient.MAX_IMAGE_DIMENSION),
+                                height=min(image_height, AnthropicClient.MAX_IMAGE_DIMENSION),
+                            )
+                            base64_image = encode_base64(temp_file.name, format="JPEG")
+                    else:
+                        base64_image = encode_base64(image_location, format="JPEG")
+                    image_block: ImageBlockParam = {
+                        "type": "image",
+                        "source": {
+                            "type": "base64",
+                            "media_type": "image/jpeg",
+                            "data": base64_image,
+                        },
+                    }
+                    blocks.append(image_block)
+                if media_object.is_type(TEXT_TYPE):
+                    # TODO(#2439): Refactor out Request validation
+                    if media_object.text is None:
+                        raise ValueError("MediaObject of text type has missing text field value")
+                    text_block: TextBlockParam = {
+                        "type": "text",
+                        "text": media_object.text,
+                    }
+                    # Anthropic does not support empty text blocks
+                    if media_object.text.strip():
+                        blocks.append(text_block)
+            messages = [{"role": "user", "content": blocks}]
+        else:
+            messages = [{"role": "user", "content": request.prompt}]
+        raw_request: AnthropicMessagesRequest = {
+            "messages": messages,
+            "model": request.model_engine,
+            "stop_sequences": request.stop_sequences,
+            "max_tokens": request.max_tokens,
+            "temperature": request.temperature,
+            "top_p": request.top_p,
+            "top_k": request.top_k_per_token,
+        }
+        if system_message is not None:
+            raw_request["system"] = cast(str, system_message["content"])
+        completions: List[GeneratedOutput] = []
+        # `num_completions` is not supported, so instead make `num_completions` separate requests.
+        for completion_index in range(request.num_completions):
+            def do_it() -> Dict[str, Any]:
+                try:
+                    result = self.client.messages.create(**raw_request).model_dump()
+                    if "content" not in result or not result["content"]:
+                        raise AnthropicMessagesResponseError(f"Anthropic response has empty content: {result}")
+                    elif "text" not in result["content"][0]:
+                        raise AnthropicMessagesResponseError(f"Anthropic response has non-text content: {result}")
+                    return result
+                except BadRequestError as e:
+                    response = e.response.json()
+                    if _is_content_moderation_failure(response):
+                        return response
+                    raise
+            cache_key = CachingClient.make_cache_key(
+                {
+                    "completion_index": completion_index,
+                    **raw_request,
+                },
+                request,
+            )
+            response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
+            if _is_content_moderation_failure(response):
+                hlog(
+                    f"WARNING: Returning empty request for {request.model_deployment} "
+                    "due to content moderation filter"
+                )
+                return RequestResult(
+                    success=False,
+                    cached=cached,
+                    error=response["error"]["message"],
+                    completions=[],
+                    embedding=[],
+                    error_flags=ErrorFlags(is_retriable=False, is_fatal=False),
+                    request_time=response["request_time"],
+                    request_datetime=response["request_datetime"],
+                )
+            completion = truncate_and_tokenize_response_text(
+                response["content"][0]["text"], request, self.tokenizer, self.tokenizer_name, original_finish_reason=""
+            )
+            completions.append(completion)
+        return RequestResult(
+            success=True,
+            cached=cached,
+            request_time=response["request_time"],
+            request_datetime=response["request_datetime"],
+            completions=completions,
+            embedding=[],
+        )
 class AnthropicRequestError(Exception):
     pass
@@ -394,7 +610,7 @@ class AnthropicLegacyClient(CachingClient):
         # Since Anthropic doesn't support multiple completions, we have to manually call it multiple times,
         # and aggregate the results into `completions` and `request_time`.
-        completions: List[Sequence] = []
+        completions: List[GeneratedOutput] = []
         all_cached = True
         request_time = 0
         request_datetime: Optional[int] = None
@@ -427,8 +643,7 @@ class AnthropicLegacyClient(CachingClient):
             for text, token_logprob, all_logprobs, all_tokens in zip(
                 log_probs["tokens"], log_probs["logprobs"], log_probs["topk_logprobs"], log_probs["topk_tokens"]
             ):
-                top_logprobs: Dict[str, float] = {text: logprob for text, logprob in zip(all_tokens, all_logprobs)}
-                tokens.append(Token(text=text, logprob=token_logprob, top_logprobs=top_logprobs))
+                tokens.append(Token(text=text, logprob=token_logprob))
                 sequence_logprob += token_logprob
             finish_reason: str = response["stop_reason"]
@@ -436,7 +651,7 @@ class AnthropicLegacyClient(CachingClient):
             if finish_reason == AnthropicLegacyClient.STOP_SEQUENCE_STOP_REASON:
                 finish_reason = "stop"
-            completion = Sequence(
+            completion = GeneratedOutput(
                 text=response["text"],
                 logprob=sequence_logprob,
                 tokens=tokens,

helm/{proxy/clients → clients}/auto_client.py RENAMED Viewed

@@ -1,22 +1,23 @@
-import os
 from dataclasses import replace
+import os
 from typing import Any, Dict, Mapping, Optional
 from retrying import Attempt, RetryError
 from helm.benchmark.model_deployment_registry import ModelDeployment, get_model_deployment
-from helm.common.cache_utils import build_cache_config
+from helm.common.file_caches.file_cache import FileCache
+from helm.common.file_caches.local_file_cache import LocalFileCache
 from helm.common.credentials_utils import provide_api_key
-from helm.common.cache import CacheConfig
+from helm.common.cache_backend_config import CacheBackendConfig, CacheConfig
 from helm.common.hierarchical_logger import hlog
 from helm.common.object_spec import create_object, inject_object_spec_args
 from helm.common.request import Request, RequestResult
-from helm.proxy.clients.client import Client
+from helm.clients.client import Client
+from helm.clients.moderation_api_client import ModerationAPIClient
 from helm.proxy.critique.critique_client import CritiqueClient
-from helm.proxy.clients.huggingface_client import HuggingFaceClient
-from helm.proxy.clients.toxicity_classifier_client import ToxicityClassifierClient
+from helm.clients.toxicity_classifier_client import ToxicityClassifierClient
 from helm.proxy.retry import NonRetriableException, retry_request
-from helm.proxy.tokenizers.auto_tokenizer import AutoTokenizer
+from helm.tokenizers.auto_tokenizer import AutoTokenizer
 class AuthenticationError(NonRetriableException):
@@ -26,18 +27,17 @@ class AuthenticationError(NonRetriableException):
 class AutoClient(Client):
     """Automatically dispatch to the proper `Client` based on the model deployment name."""
-    def __init__(self, credentials: Mapping[str, Any], cache_path: str, mongo_uri: str = ""):
-        self._auto_tokenizer = AutoTokenizer(credentials, cache_path, mongo_uri)
+    def __init__(
+        self, credentials: Mapping[str, Any], file_storage_path: str, cache_backend_config: CacheBackendConfig
+    ):
+        self._auto_tokenizer = AutoTokenizer(credentials, cache_backend_config)
         self.credentials = credentials
-        self.cache_path = cache_path
-        self.mongo_uri = mongo_uri
+        self.file_storage_path = file_storage_path
+        self.cache_backend_config = cache_backend_config
         self.clients: Dict[str, Client] = {}
-        # self._huggingface_client is lazily instantiated by get_huggingface_client()
-        self._huggingface_client: Optional[HuggingFaceClient] = None
-        # self._critique_client is lazily instantiated by get_critique_client()
         self._critique_client: Optional[CritiqueClient] = None
-        hlog(f"AutoClient: cache_path = {cache_path}")
-        hlog(f"AutoClient: mongo_uri = {mongo_uri}")
+        hlog(f"AutoClient: file_storage_path = {file_storage_path}")
+        hlog(f"AutoClient: cache_backend_config = {cache_backend_config}")
     def _get_client(self, model_deployment_name: str) -> Client:
         """Return a client based on the model, creating it if necessary."""
@@ -64,11 +64,14 @@ class AutoClient(Client):
             # Prepare a cache
             host_organization: str = model_deployment.host_organization
-            cache_config: CacheConfig = build_cache_config(self.cache_path, self.mongo_uri, host_organization)
+            cache_config: CacheConfig = self.cache_backend_config.get_cache_config(host_organization)
             client_spec = inject_object_spec_args(
                 model_deployment.client_spec,
-                constant_bindings={"cache_config": cache_config, "tokenizer_name": model_deployment.tokenizer_name},
+                constant_bindings={
+                    "cache_config": cache_config,
+                    "tokenizer_name": model_deployment.tokenizer_name,
+                },
                 provider_bindings={
                     "api_key": lambda: provide_api_key(self.credentials, host_organization, model_deployment_name),
                     "tokenizer": lambda: self._auto_tokenizer._get_tokenizer(
@@ -77,9 +80,14 @@ class AutoClient(Client):
                     "org_id": lambda: self.credentials.get(
                         host_organization + "OrgId", None
                     ),  # OpenAI, GooseAI, Microsoft
-                    "lock_file_path": lambda: os.path.join(self.cache_path, f"{host_organization}.lock"),  # Microsoft
+                    "moderation_api_client": lambda: self.get_moderation_api_client(),  # OpenAI DALL-E
+                    "lock_file_path": lambda: os.path.join(
+                        self.file_storage_path, f"{host_organization}.lock"
+                    ),  # Microsoft
                     "project_id": lambda: self.credentials.get(host_organization + "ProjectId", None),  # VertexAI
                     "location": lambda: self.credentials.get(host_organization + "Location", None),  # VertexAI
+                    "hf_auth_token": lambda: self.credentials.get("huggingfaceAuthToken", None),  # HuggingFace
+                    "file_cache": lambda: self._get_file_cache(host_organization),  # Text-to-image models
                 },
             )
             client = create_object(client_spec)
@@ -117,13 +125,37 @@ class AutoClient(Client):
             # Notify our user that we failed to make the request even after retrying.
             return replace(last_attempt.value, error=f"{retry_error}. Error: {last_attempt.value.error}")
+    def get_gcs_client(self):
+        from .gcs_client import GCSClient
+        bucket_name: str = self.credentials["gcsBucketName"]
+        cache_config: CacheConfig = self.cache_backend_config.get_cache_config("gcs")
+        return GCSClient(bucket_name, cache_config)
+    def get_nudity_check_client(self):
+        from helm.clients.image_generation.nudity_check_client import NudityCheckClient
+        cache_config: CacheConfig = self.cache_backend_config.get_cache_config("nudity")
+        return NudityCheckClient(cache_config)
+    def get_clip_score_client(self):
+        from .clip_score_client import CLIPScoreClient
+        cache_config: CacheConfig = self.cache_backend_config.get_cache_config("clip_score")
+        return CLIPScoreClient(cache_config)
     def get_toxicity_classifier_client(self) -> ToxicityClassifierClient:
         """Get the toxicity classifier client. We currently only support Perspective API."""
-        from helm.proxy.clients.perspective_api_client import PerspectiveAPIClient
+        from helm.clients.perspective_api_client import PerspectiveAPIClient
-        cache_config: CacheConfig = build_cache_config(self.cache_path, self.mongo_uri, "perspectiveapi")
+        cache_config: CacheConfig = self.cache_backend_config.get_cache_config("perspectiveapi")
         return PerspectiveAPIClient(self.credentials.get("perspectiveApiKey", ""), cache_config)
+    def get_moderation_api_client(self) -> ModerationAPIClient:
+        """Get the ModerationAPI client."""
+        cache_config: CacheConfig = self.cache_backend_config.get_cache_config("ModerationAPI")
+        return ModerationAPIClient(self.credentials.get("openaiApiKey", ""), cache_config)
     def get_critique_client(self) -> CritiqueClient:
         """Get the critique client."""
         if self._critique_client:
@@ -148,7 +180,7 @@ class AutoClient(Client):
             if not surgeai_credentials:
                 raise ValueError("surgeaiApiKey credentials are required for SurgeAICritiqueClient")
             self._critique_client = SurgeAICritiqueClient(
-                surgeai_credentials, build_cache_config(self.cache_path, self.mongo_uri, "surgeai")
+                surgeai_credentials, self.cache_backend_config.get_cache_config("surgeai")
             )
         elif critique_type == "model":
             from helm.proxy.critique.model_critique_client import ModelCritiqueClient
@@ -168,7 +200,7 @@ class AutoClient(Client):
             if not scale_credentials:
                 raise ValueError("scaleApiKey is required for ScaleCritiqueClient")
             self._critique_client = ScaleCritiqueClient(
-                scale_credentials, build_cache_config(self.cache_path, self.mongo_uri, "scale"), scale_project
+                scale_credentials, self.cache_backend_config.get_cache_config("scale"), scale_project
             )
         else:
             raise ValueError(
@@ -177,11 +209,7 @@ class AutoClient(Client):
             )
         return self._critique_client
-    def get_huggingface_client(self) -> HuggingFaceClient:
-        """Get the Hugging Face client."""
-        if self._huggingface_client:
-            assert isinstance(self._huggingface_client, HuggingFaceClient)
-            return self._huggingface_client
-        cache_config = build_cache_config(self.cache_path, self.mongo_uri, "huggingface")
-        self._huggingface_client = HuggingFaceClient(cache_config=cache_config)
-        return self._huggingface_client
+    def _get_file_cache(self, host_organization: str) -> FileCache:
+        # Initialize `FileCache` for text-to-image model APIs
+        local_file_cache_path: str = os.path.join(self.file_storage_path, "output", host_organization)
+        return LocalFileCache(local_file_cache_path, file_extension="png")

helm/clients/bedrock_client.py ADDED Viewed

@@ -0,0 +1,128 @@
+from abc import abstractmethod
+from copy import deepcopy
+import json
+import os
+from typing import Any, Dict, List, Mapping, Optional
+from helm.common.cache import CacheConfig
+from helm.clients.client import CachingClient, truncate_and_tokenize_response_text
+from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
+from helm.clients.bedrock_utils import get_bedrock_client
+from helm.tokenizers.tokenizer import Tokenizer
+JSON_CONTENT_TYPE = "application/json"
+class BedrockClient(CachingClient):
+    @abstractmethod
+    def convert_request_to_raw_request(self, request: Request) -> Dict:
+        raise NotImplementedError()
+    @abstractmethod
+    def convert_raw_response_to_completions(self, response: Dict, request: Request) -> List[GeneratedOutput]:
+        raise NotImplementedError()
+    def __init__(
+        self,
+        cache_config: CacheConfig,
+        tokenizer: Tokenizer,
+        tokenizer_name: str,
+        bedrock_model_id: Optional[str] = None,
+        assumed_role: Optional[str] = None,
+        region: Optional[str] = None,
+    ):
+        super().__init__(cache_config=cache_config)
+        self.tokenizer = tokenizer
+        self.tokenizer_name = tokenizer_name
+        self.bedrock_model_id = bedrock_model_id
+        self.bedrock_client = get_bedrock_client(
+            assumed_role=assumed_role or os.environ.get("BEDROCK_ASSUME_ROLE", None),
+            region=region or os.environ.get("AWS_DEFAULT_REGION", None),
+        )
+    def make_request(self, request: Request) -> RequestResult:
+        # model_id should be something like "amazon.titan-tg1-large"
+        model_id = self.bedrock_model_id if self.bedrock_model_id else request.model.replace("/", ".")
+        raw_request = self.convert_request_to_raw_request(request)
+        # modelId isn't part of raw_request, so it must be explicitly passed into the input to
+        raw_request_for_cache: Dict = {"modelId": model_id, **deepcopy(raw_request)}
+        cache_key: Mapping = CachingClient.make_cache_key(raw_request_for_cache, request)
+        def do_it() -> Dict[Any, Any]:
+            response = self.bedrock_client.invoke_model(
+                body=json.dumps(raw_request), modelId=model_id, accept=JSON_CONTENT_TYPE, contentType=JSON_CONTENT_TYPE
+            )
+            return json.loads(response.get("body").read())
+        try:
+            response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
+        except Exception as error:
+            return RequestResult(
+                success=False,
+                cached=False,
+                error=str(error),
+                completions=[],
+                embedding=[],
+            )
+        completions = self.convert_raw_response_to_completions(response, request)
+        return RequestResult(
+            success=True,
+            cached=cached,
+            request_time=response["request_time"],
+            request_datetime=response["request_datetime"],
+            completions=completions,
+            embedding=[],
+        )
+class BedrockTitanClient(BedrockClient):
+    _COMPLETION_REASON_TO_FINISH_REASON = {
+        "LENGTH": "length",
+        "FINISH": "endoftext",
+    }
+    def convert_request_to_raw_request(self, request: Request) -> Dict:
+        # TODO: Support the following:
+        # - top_k_per_token
+        # - echo_prompt
+        # - num_completions
+        return {
+            "inputText": request.prompt,
+            "textGenerationConfig": {
+                "maxTokenCount": request.max_tokens,
+                # We ignore stop sequences in the request and always set stop sequences to the empty list.
+                # This is because:
+                #
+                # 1. The only permitted stop sequences are "|" and "User:"
+                #     - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-text.html
+                #     - https://github.com/boto/boto3/issues/3993
+                #     - https://github.com/aws/aws-sdk/issues/692
+                #
+                # 2. Titan has the tendency to emit "\n" as the first token in the generated text output,
+                #    which would cause the output to stop immediately if "\n" is in the stop_sequences.
+                "stopSequences": [],
+                "temperature": request.temperature,
+                "topP": request.top_p,
+            },
+        }
+    def convert_raw_response_to_completions(self, response: Dict, request: Request) -> List[GeneratedOutput]:
+        # TODO: Support the following:
+        # - tokens
+        # - logprob
+        completions: List[GeneratedOutput] = []
+        for raw_completion in response["results"]:
+            output_text = raw_completion["outputText"]
+            # Call lstrip() Titan has the tendency to emit "\n" as the first token in the generated text output.
+            finish_reason = BedrockTitanClient._COMPLETION_REASON_TO_FINISH_REASON.get(
+                raw_completion["completionReason"], raw_completion["completionReason"].lower()
+            )
+            completion = truncate_and_tokenize_response_text(
+                output_text.lstrip(), request, self.tokenizer, self.tokenizer_name, finish_reason
+            )
+            completions.append(completion)
+        return completions

crfm-helm 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

Potentially problematic release.

crfm-helm 0.4.0py3-none-any.whl → 0.5.1py3-none-any.whl