PyPI - crfm-helm - Versions diffs - 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (546) hide show

helm/clients/bedrock_client.py ADDED Viewed

@@ -0,0 +1,128 @@
+from abc import abstractmethod
+from copy import deepcopy
+import json
+import os
+from typing import Any, Dict, List, Mapping, Optional
+from helm.common.cache import CacheConfig
+from helm.clients.client import CachingClient, truncate_and_tokenize_response_text
+from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
+from helm.clients.bedrock_utils import get_bedrock_client
+from helm.tokenizers.tokenizer import Tokenizer
+JSON_CONTENT_TYPE = "application/json"
+class BedrockClient(CachingClient):
+    @abstractmethod
+    def convert_request_to_raw_request(self, request: Request) -> Dict:
+        raise NotImplementedError()
+    @abstractmethod
+    def convert_raw_response_to_completions(self, response: Dict, request: Request) -> List[GeneratedOutput]:
+        raise NotImplementedError()
+    def __init__(
+        self,
+        cache_config: CacheConfig,
+        tokenizer: Tokenizer,
+        tokenizer_name: str,
+        bedrock_model_id: Optional[str] = None,
+        assumed_role: Optional[str] = None,
+        region: Optional[str] = None,
+    ):
+        super().__init__(cache_config=cache_config)
+        self.tokenizer = tokenizer
+        self.tokenizer_name = tokenizer_name
+        self.bedrock_model_id = bedrock_model_id
+        self.bedrock_client = get_bedrock_client(
+            assumed_role=assumed_role or os.environ.get("BEDROCK_ASSUME_ROLE", None),
+            region=region or os.environ.get("AWS_DEFAULT_REGION", None),
+        )
+    def make_request(self, request: Request) -> RequestResult:
+        # model_id should be something like "amazon.titan-tg1-large"
+        model_id = self.bedrock_model_id if self.bedrock_model_id else request.model.replace("/", ".")
+        raw_request = self.convert_request_to_raw_request(request)
+        # modelId isn't part of raw_request, so it must be explicitly passed into the input to
+        raw_request_for_cache: Dict = {"modelId": model_id, **deepcopy(raw_request)}
+        cache_key: Mapping = CachingClient.make_cache_key(raw_request_for_cache, request)
+        def do_it() -> Dict[Any, Any]:
+            response = self.bedrock_client.invoke_model(
+                body=json.dumps(raw_request), modelId=model_id, accept=JSON_CONTENT_TYPE, contentType=JSON_CONTENT_TYPE
+            )
+            return json.loads(response.get("body").read())
+        try:
+            response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
+        except Exception as error:
+            return RequestResult(
+                success=False,
+                cached=False,
+                error=str(error),
+                completions=[],
+                embedding=[],
+            )
+        completions = self.convert_raw_response_to_completions(response, request)
+        return RequestResult(
+            success=True,
+            cached=cached,
+            request_time=response["request_time"],
+            request_datetime=response["request_datetime"],
+            completions=completions,
+            embedding=[],
+        )
+class BedrockTitanClient(BedrockClient):
+    _COMPLETION_REASON_TO_FINISH_REASON = {
+        "LENGTH": "length",
+        "FINISH": "endoftext",
+    }
+    def convert_request_to_raw_request(self, request: Request) -> Dict:
+        # TODO: Support the following:
+        # - top_k_per_token
+        # - echo_prompt
+        # - num_completions
+        return {
+            "inputText": request.prompt,
+            "textGenerationConfig": {
+                "maxTokenCount": request.max_tokens,
+                # We ignore stop sequences in the request and always set stop sequences to the empty list.
+                # This is because:
+                #
+                # 1. The only permitted stop sequences are "|" and "User:"
+                #     - https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-text.html
+                #     - https://github.com/boto/boto3/issues/3993
+                #     - https://github.com/aws/aws-sdk/issues/692
+                #
+                # 2. Titan has the tendency to emit "\n" as the first token in the generated text output,
+                #    which would cause the output to stop immediately if "\n" is in the stop_sequences.
+                "stopSequences": [],
+                "temperature": request.temperature,
+                "topP": request.top_p,
+            },
+        }
+    def convert_raw_response_to_completions(self, response: Dict, request: Request) -> List[GeneratedOutput]:
+        # TODO: Support the following:
+        # - tokens
+        # - logprob
+        completions: List[GeneratedOutput] = []
+        for raw_completion in response["results"]:
+            output_text = raw_completion["outputText"]
+            # Call lstrip() Titan has the tendency to emit "\n" as the first token in the generated text output.
+            finish_reason = BedrockTitanClient._COMPLETION_REASON_TO_FINISH_REASON.get(
+                raw_completion["completionReason"], raw_completion["completionReason"].lower()
+            )
+            completion = truncate_and_tokenize_response_text(
+                output_text.lstrip(), request, self.tokenizer, self.tokenizer_name, finish_reason
+            )
+            completions.append(completion)
+        return completions

helm/clients/bedrock_utils.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""Helper utilities for working with Amazon Bedrock."""
+import os
+from typing import Optional
+from helm.common.hierarchical_logger import hlog
+from helm.common.optional_dependencies import handle_module_not_found_error
+try:
+    import boto3
+    from botocore.config import Config
+except ModuleNotFoundError as e:
+    handle_module_not_found_error(e, ["aws"])
+# From https://github.com/aws-samples/amazon-bedrock-workshop/blob/main/01_Generation/00_generate_w_bedrock.ipynb
+# MIT-0 Licensed
+def get_bedrock_client(
+    assumed_role: Optional[str] = None,
+    region: Optional[str] = None,
+    runtime: Optional[bool] = True,
+):
+    """Create a boto3 client for Amazon Bedrock, with optional configuration overrides
+    Parameters
+    ----------
+    assumed_role :
+        Optional ARN of an AWS IAM role to assume for calling the Bedrock service. If not
+        specified, the current active credentials will be used.
+    region :
+        Optional name of the AWS Region in which the service should be called (e.g. "us-east-1").
+        If not specified, AWS_REGION or AWS_DEFAULT_REGION environment variable will be used.
+    runtime :
+        Optional choice of getting different client to perform operations with the Amazon Bedrock service.
+    """
+    if region is None:
+        target_region = os.environ.get("AWS_REGION", os.environ.get("AWS_DEFAULT_REGION"))
+    else:
+        target_region = region
+    session_kwargs = {"region_name": target_region}
+    client_kwargs = {**session_kwargs}
+    profile_name = os.environ.get("AWS_PROFILE")
+    if profile_name:
+        session_kwargs["profile_name"] = profile_name
+    retry_config = Config(
+        region_name=target_region,
+        retries={
+            "max_attempts": 10,
+            "mode": "standard",
+        },
+    )
+    session = boto3.Session(**session_kwargs)
+    if assumed_role:
+        sts = session.client("sts")
+        response = sts.assume_role(RoleArn=str(assumed_role), RoleSessionName="crfm-helm")
+        client_kwargs["aws_access_key_id"] = response["Credentials"]["AccessKeyId"]
+        client_kwargs["aws_secret_access_key"] = response["Credentials"]["SecretAccessKey"]
+        client_kwargs["aws_session_token"] = response["Credentials"]["SessionToken"]
+    if runtime:
+        service_name = "bedrock-runtime"
+    else:
+        service_name = "bedrock"
+    bedrock_client = session.client(service_name=service_name, config=retry_config, **client_kwargs)
+    hlog(f"Amazon Bedrock client successfully created with endpoint {bedrock_client._endpoint}")
+    return bedrock_client

helm/{proxy/clients → clients}/client.py RENAMED Viewed

@@ -1,49 +1,16 @@
 import json
 from abc import ABC, abstractmethod
-from typing import Dict, List, Optional
+from typing import List, Mapping, Optional, cast
 from helm.common.hierarchical_logger import hlog
 from helm.common.media_object import MultimediaObject, TEXT_TYPE
-from helm.common.request import Request, RequestResult, Sequence, Token
-from helm.common.tokenization_request import (
-    TokenizationRequest,
-    TokenizationRequestResult,
-    DecodeRequest,
-    DecodeRequestResult,
-)
+from helm.common.request import Request, RequestResult, GeneratedOutput, Token
 from helm.common.cache import Cache, CacheConfig
-from helm.proxy.tokenizers.tokenizer import Tokenizer
+from helm.common.tokenization_request import DecodeRequest, TokenizationRequest
+from helm.tokenizers.tokenizer import Tokenizer
 class Client(ABC):
-    # TODO: This method should be removed.
-    # This only kept for the AutoClient. Eventually, we should introduce an
-    # AutoTokenizer or TokenizerFactory class.
-    @abstractmethod
-    def tokenize(self, request: TokenizationRequest) -> TokenizationRequestResult:
-        """Tokenizes `request.text` using `request.tokenizer`.
-        This simply calls the `tokenize` method of the tokenizer.
-        Some exceptions can be made (but should be avoided).
-        This is the case for the auto client, which needs to handle
-        tokenization for multiple tokenizers.
-        """
-        pass
-    # TODO: This method should be removed.
-    # This only kept for the AutoClient. Eventually, we should introduce an
-    # AutoTokenizer or TokenizerFactory class.
-    @abstractmethod
-    def decode(self, request: DecodeRequest) -> DecodeRequestResult:
-        """Decodes `request.tokens` using `request.tokenizer`.
-        This simply calls the `decode` method of the tokenizer.
-        Some exceptions can be made (but should be avoided).
-        This is the case for the auto client, which needs to handle
-        tokenization for multiple tokenizers.
-        """
-        pass
     @abstractmethod
     def make_request(self, request: Request) -> RequestResult:
         """Makes a request to the model.
@@ -54,7 +21,7 @@ class Client(ABC):
 class CachingClient(Client):
-    def __init__(self, cache_config: CacheConfig, tokenizer: Tokenizer) -> None:
+    def __init__(self, cache_config: CacheConfig) -> None:
         """Initializes the client.
         For most clients, both the cache config and tokenizer are required.
@@ -63,37 +30,30 @@ class CachingClient(Client):
         the request is made.
         """
         self.cache = Cache(cache_config) if cache_config is not None else None
-        self.tokenizer = tokenizer
     @staticmethod
-    def make_cache_key(raw_request: Dict, request: Request) -> Dict:
+    def make_cache_key(raw_request: Mapping, request: Request) -> Mapping:
         """
         Construct the key for the cache using the raw request.
         Add `request.random` to the key, if defined.
         """
         if request.random is not None:
             assert "random" not in raw_request
-            cache_key = {**raw_request, "random": request.random}
+            cache_key: Mapping = {**raw_request, "random": request.random}
         else:
             cache_key = raw_request
         return cache_key
-    def tokenize(self, request: TokenizationRequest) -> TokenizationRequestResult:
-        # Deprecated - use `self.tokenizer.tokenize` instead. Warn the user.
-        hlog("WARNING: CachingClient.tokenize is deprecated, use self.tokenizer.tokenize instead")
-        return self.tokenizer.tokenize(request)
-    def decode(self, request: DecodeRequest) -> DecodeRequestResult:
-        # Deprecated - use `self.tokenizer.decode` instead. Warn the user.
-        hlog("WARNING: CachingClient.decode is deprecated, use self.tokenizer.decode instead")
-        return self.tokenizer.decode(request)
-def truncate_sequence(sequence: Sequence, request: Request, print_warning: bool = True) -> Sequence:
+def truncate_sequence(sequence: GeneratedOutput, request: Request, print_warning: bool = True) -> GeneratedOutput:
     """
     Certain providers have bugs where they aren't respecting max_tokens,
     stop_sequences and the end of text token, so as a hack, we have to manually
     truncate the suffix of `sequence` and `tokens` as a post-hoc process.
+    This method is unsafe and may produce warnings or incorrect results.
+    Prefer using the safer truncate_and_tokenize_response_text() method instead
+    if your use case satisfies its requirements.
     """
     # TODO: if echo_prompt, then we should only ignore the prompt, but we don't
     # know how many tokens the prompt takes up.
@@ -133,7 +93,7 @@ def truncate_sequence(sequence: Sequence, request: Request, print_warning: bool
         if print_warning:
             hlog(f"WARNING: truncate_sequence needs to strip {json.dumps(stop)}")
-        sequence = Sequence(text=new_text, logprob=new_logprob, tokens=new_tokens)
+        sequence = GeneratedOutput(text=new_text, logprob=new_logprob, tokens=new_tokens)
     # Truncate based on the max number of tokens.
     if len(sequence.tokens) > request.max_tokens:
@@ -150,11 +110,63 @@ def truncate_sequence(sequence: Sequence, request: Request, print_warning: bool
         new_logprob = sum(token.logprob for token in new_tokens)
-        sequence = Sequence(text=new_text, logprob=new_logprob, tokens=new_tokens)
+        sequence = GeneratedOutput(text=new_text, logprob=new_logprob, tokens=new_tokens)
     return sequence
+def truncate_and_tokenize_response_text(
+    text: str, request: Request, tokenizer: Tokenizer, tokenizer_name: str, original_finish_reason: str = "endoftext"
+) -> GeneratedOutput:
+    """Truncate a string-only response to respect stop_sequences and max_tokens.
+    This can only be used if all of the following conditions are true:
+    - You have access to the tokenizer.
+    - The request has echo_prompt = False.
+    - The tokenizer supports encoding and decoding.
+    - The tokenizer's tokenize() method supports truncation.
+    - The model's response is text-only.
+    - The model's response not already provide the tokenized text.
+    - The model's response does not provide logprobs.
+    This method is safer than truncate_sequence() and should be preferred if the above conditions are met.
+    Unlike truncate_sequence(), this method will not produce warnings or incorrect results.
+    This is because the the tokens are derived from the truncated text using the tokenizer,
+    so the text and the tokens in the resulting result are guranteed to match."""
+    # Finish reason strings are token from basic_metrics._compute_finish_reason_metrics()
+    finish_reason: str = original_finish_reason
+    if request.echo_prompt:
+        raise Exception("truncate_and_tokenize_response_text() does not support requests with echo_prompt = True")
+    for stop_sequence in request.stop_sequences:
+        try:
+            text = text[: text.index(stop_sequence)]
+            finish_reason = "stop"
+        except ValueError:
+            pass
+    token_strings = cast(
+        List[str], tokenizer.tokenize(TokenizationRequest(text=text, tokenizer=tokenizer_name)).raw_tokens
+    )
+    if len(token_strings) > request.max_tokens:
+        encoded_ints = cast(
+            List[int],
+            tokenizer.tokenize(
+                TokenizationRequest(
+                    text=text, tokenizer=tokenizer_name, encode=True, truncation=True, max_length=request.max_tokens
+                )
+            ).raw_tokens,
+        )
+        text = tokenizer.decode(DecodeRequest(encoded_ints, tokenizer_name)).text
+        token_strings = cast(
+            List[str], tokenizer.tokenize(TokenizationRequest(text=text, tokenizer=tokenizer_name)).raw_tokens
+        )
+        finish_reason = "length"
+    tokens = [Token(text=token_string, logprob=0.0) for token_string in token_strings]
+    return GeneratedOutput(text=text, logprob=0.0, tokens=tokens, finish_reason={"reason": finish_reason})
 def cleanup_str(token: str, tokenizer_name: Optional[str] = None) -> str:
     """
     Certain tokenizers introduce special characters to represent spaces, such as
@@ -171,7 +183,7 @@ def cleanup_str(token: str, tokenizer_name: Optional[str] = None) -> str:
         "together",
     ]:
         return token.replace("▁", " ")
-    elif tokenizer_name is not None and tokenizer_name.startswith("huggingface"):
+    elif tokenizer_name is not None and (tokenizer_name.startswith("huggingface") or tokenizer_name.endswith("gpt2")):
         return token.replace("Ġ", " ")
     return token

helm/clients/clip_score_client.py ADDED Viewed

@@ -0,0 +1,49 @@
+from typing import Dict, Optional
+from dataclasses import asdict
+from helm.common.cache import Cache, CacheConfig
+from helm.common.clip_score_request import DEFAULT_CLIP_SCORE_MODEL, CLIPScoreRequest, CLIPScoreResult
+from helm.clients.clip_scorers.base_clip_scorer import BaseCLIPScorer
+class CLIPScoreClientError(Exception):
+    pass
+class CLIPScoreClient:
+    def __init__(self, cache_config: CacheConfig):
+        self.cache = Cache(cache_config)
+        self._clip_scorer: Optional[BaseCLIPScorer] = None
+    def compute_score(self, request: CLIPScoreRequest) -> CLIPScoreResult:
+        """
+        Compute a CLIPScore for a given caption and image.
+        """
+        # TODO: support multilingual CLIPScore and other CLIP models.
+        assert request.model == DEFAULT_CLIP_SCORE_MODEL, f"Unsupported model: {request.model}"
+        assert not request.multilingual
+        try:
+            def do_it():
+                if self._clip_scorer is None:
+                    from helm.clients.clip_scorers.clip_scorer import CLIPScorer
+                    self._clip_scorer = CLIPScorer()
+                score: float = self._clip_scorer.compute_score(
+                    caption=request.caption, image_location=request.image_location
+                )
+                return {"score": score}
+            cache_key: Dict = asdict(request)
+            results, cached = self.cache.get(cache_key, do_it)
+        except Exception as e:
+            raise CLIPScoreClientError(e)
+        return CLIPScoreResult(
+            success=True,
+            cached=cached,
+            score=results["score"],
+        )

helm/clients/clip_scorers/__init__.py ADDED Viewed

File without changes

helm/clients/clip_scorers/base_clip_scorer.py ADDED Viewed

@@ -0,0 +1,18 @@
+from abc import abstractmethod, ABC
+from typing import List
+class BaseCLIPScorer(ABC):
+    @abstractmethod
+    def compute_score(self, caption: str, image_location: str) -> float:
+        pass
+    def select_best_image(self, caption: str, image_locations: List[str]) -> str:
+        """Selects the image from a list of images with the highest CLIPScore given the caption."""
+        assert len(image_locations) > 0, "Need at least one image"
+        if len(image_locations) == 1:
+            return image_locations[0]
+        scores: List[float] = [self.compute_score(caption, image_location) for image_location in image_locations]
+        return image_locations[scores.index(max(scores))]

helm/clients/clip_scorers/clip_scorer.py ADDED Viewed

@@ -0,0 +1,50 @@
+from typing import Literal
+from torchvision import transforms
+import torch
+from helm.common.gpu_utils import get_torch_device
+from helm.common.images_utils import open_image
+from helm.common.optional_dependencies import handle_module_not_found_error
+from .base_clip_scorer import BaseCLIPScorer
+_ = torch.manual_seed(42)
+class CLIPScorer(BaseCLIPScorer):
+    """
+    CLIPScore is a reference free metric that can be used to evaluate the correlation between an image
+    caption and the content of the image. It has been found to be highly correlated with human judgement.
+    Paper: https://arxiv.org/abs/2104.08718
+    We use the TorchMetrics implementation:
+    https://torchmetrics.readthedocs.io/en/stable/multimodal/clip_score.html.
+    The score is bound between 0 and 100, where a score closer to 100 is better.
+    Verified implementation against the scores of image-caption pairs from
+    https://wandb.ai/dalle-mini/dalle-mini/reports/OpenAI-CLIP-Score-exploration--VmlldzoxNjMwODM1.
+    """
+    def __init__(
+        self,
+        model_name: Literal[
+            "openai/clip-vit-base-patch16",
+            "openai/clip-vit-base-patch32",
+            "openai/clip-vit-large-patch14-336",
+            "openai/clip-vit-large-patch14",
+        ] = "openai/clip-vit-large-patch14",
+    ):
+        try:
+            from torchmetrics.multimodal import CLIPScore
+        except ModuleNotFoundError as e:
+            handle_module_not_found_error(e, ["heim"])
+        self._device: torch.device = get_torch_device()
+        self._metric = CLIPScore(model_name_or_path=model_name).to(self._device)
+    def compute_score(self, caption: str, image_location: str) -> float:
+        image = open_image(image_location)
+        image_tensor: torch.Tensor = transforms.ToTensor()(image).to(self._device)
+        score: float = self._metric(image_tensor, caption).detach().item()
+        return score

helm/clients/clip_scorers/multilingual_clip_scorer.py ADDED Viewed

@@ -0,0 +1,50 @@
+import torch
+import transformers
+from helm.common.gpu_utils import get_torch_device, get_torch_device_name
+from helm.common.images_utils import open_image
+from helm.common.optional_dependencies import handle_module_not_found_error
+from .base_clip_scorer import BaseCLIPScorer
+_ = torch.manual_seed(42)
+class MultilingualCLIPScorer(BaseCLIPScorer):
+    """
+    Multilingual-CLIP extends OpenAI's English text encoders to multiple other languages.
+    Adapted from https://huggingface.co/M-CLIP/XLM-Roberta-Large-Vit-L-14
+    """
+    TEXT_MODEL_NAME: str = "M-CLIP/XLM-Roberta-Large-Vit-L-14"
+    IMAGE_MODEL_NAME: str = "ViT-L/14"
+    def __init__(self):
+        try:
+            import clip
+            from multilingual_clip import pt_multilingual_clip
+        except ModuleNotFoundError as e:
+            handle_module_not_found_error(e, ["heim"])
+        super().__init__()
+        self._device: torch.device = get_torch_device()
+        self._text_model = pt_multilingual_clip.MultilingualCLIP.from_pretrained(self.TEXT_MODEL_NAME)
+        self._tokenizer = transformers.AutoTokenizer.from_pretrained(self.TEXT_MODEL_NAME)
+        self._model, self._preprocess = clip.load(self.IMAGE_MODEL_NAME, device=get_torch_device_name())
+    def compute_score(self, caption: str, image_location: str) -> float:
+        # Get text features
+        text_features = self._text_model.forward(caption, self._tokenizer)
+        text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)
+        text_features = text_features.to(self._device)
+        image = open_image(image_location)
+        image = self._preprocess(image).unsqueeze(0).to(self._device)
+        # Get image features
+        with torch.no_grad():
+            image_features = self._model.encode_image(image)
+        image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)
+        # Compute score using text and image features
+        score = 100 * (image_features * text_features).sum(axis=-1)
+        return score.detach().item()

helm/{proxy/clients → clients}/cohere_client.py RENAMED Viewed

@@ -8,11 +8,9 @@ from helm.common.request import (
     EMBEDDING_UNAVAILABLE_REQUEST_RESULT,
     Request,
     RequestResult,
-    Sequence,
+    GeneratedOutput,
     Token,
 )
-from helm.proxy.models import get_models_by_organization
-from helm.proxy.tokenizers.tokenizer import Tokenizer
 from .client import CachingClient, truncate_sequence
 from .cohere_utils import get_cohere_url, DEFAULT_COHERE_API_VERSION
@@ -21,8 +19,8 @@ class CohereClient(CachingClient):
     ORGANIZATION: str = "cohere"
     GENERATE_ENDPOINT: str = "generate"
-    def __init__(self, api_key: str, tokenizer: Tokenizer, cache_config: CacheConfig):
-        super().__init__(cache_config=cache_config, tokenizer=tokenizer)
+    def __init__(self, api_key: str, cache_config: CacheConfig):
+        super().__init__(cache_config=cache_config)
         self.api_key: str = api_key
     def make_request(self, request: Request) -> RequestResult:
@@ -44,8 +42,6 @@ class CohereClient(CachingClient):
             # so `max_tokens` has to be greater than 0 when `return_likelihoods` is set to "GENERATION".
             assert request.max_tokens > 0, "max_tokens can only be 0 if echo_prompt=True"
-        # model: "Currently available models are small, medium, large, xlarge"
-        assert request.model in get_models_by_organization("cohere")
         # temperature: "min value of 0.0, max value of 5.0"
         assert 0.0 <= request.temperature <= 5.0, f"Invalid temperature: {request.temperature}. Valid range: [0,5]"
         # num_generations: "min value of 1, max value of 5"
@@ -124,7 +120,7 @@ class CohereClient(CachingClient):
             error: str = f"CohereClient error: {e}"
             return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
-        completions: List[Sequence] = []
+        completions: List[GeneratedOutput] = []
         for generation in response["generations"]:
             # From https://docs.cohere.ai/generate-reference, "the likelihood refers to the average log-likelihood
             # of the entire specified string..." What we want is the sum of the log probabilities of all tokens.
@@ -136,14 +132,7 @@ class CohereClient(CachingClient):
                 logprob: float = token_likelihood.get("likelihood", 0)
                 sequence_logprob += logprob
-                tokens.append(
-                    Token(
-                        text=token_likelihood["token"],
-                        logprob=logprob,
-                        # Cohere does not include the top log probs in the response
-                        top_logprobs={},
-                    )
-                )
+                tokens.append(Token(text=token_likelihood["token"], logprob=logprob))
             sequence_text: str = generation["text"]
             if request.echo_prompt and request.max_tokens > 0:
@@ -151,7 +140,7 @@ class CohereClient(CachingClient):
                 # `return_likelihoods` is "ALL" and `max_tokens` is greater than 0.
                 sequence_text = request.prompt + sequence_text
-            completion: Sequence = Sequence(text=sequence_text, logprob=sequence_logprob, tokens=tokens)
+            completion: GeneratedOutput = GeneratedOutput(text=sequence_text, logprob=sequence_logprob, tokens=tokens)
             completion = truncate_sequence(completion, request)
             completions.append(completion)

crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl