PyPI - crfm-helm - Versions diffs - 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (546) hide show

helm/clients/image_generation/cogview2/sr_pipeline/itersr_sampling.py ADDED Viewed

@@ -0,0 +1,120 @@
+# -*- encoding: utf-8 -*-
+"""
+@File    :   itersr_sampling.py
+@Time    :   2022/03/03 14:24:28
+@Author  :   Ming Ding
+@Contact :   dm18@mails.tsinghua.edu.cn
+"""
+# here put the import lib
+import torch
+import torch.nn.functional as F
+from icetk import icetk as tokenizer
+def top_k_logits_(logits, top_k=0, filter_value=-float("Inf")):
+    indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
+    logits[indices_to_remove] = filter_value
+    return logits
+class IterativeEntfilterStrategy:
+    def __init__(self, invalid_slices=[], temperature=1.0, topk=10):
+        self.invalid_slices = invalid_slices
+        self.temperature = temperature
+        self.topk = topk
+    def forward(self, logits, tokens, temperature=None, entfilter=None, filter_topk=5, temperature2=None):
+        # In interative strategy, logits are of shape [batch_size, seq_length, hidden_size]
+        if temperature is None:
+            temperature = self.temperature
+        logits = logits.float() / temperature
+        for invalid_slice in self.invalid_slices:
+            logits[..., invalid_slice] = -float("Inf")
+        # debiased topk
+        # probs = F.softmax(logits, dim=-1)
+        # tk_value, tk_idx = torch.topk(probs, self.topk, dim=-1)
+        # pred = torch.multinomial(probs.view(-1, logits.shape[-1]), num_samples=1).view(*logits.shape[:2], 1)
+        # edge_idx = tk_idx[:, :, -1:]
+        # edge_value = tk_value[:, :, -1:]
+        # edge_mask = probs.gather(dim=-1, index=pred) < edge_value
+        # pred[edge_mask] = edge_idx[edge_mask] # replace outliers as the "filter_topk"-th token
+        # pred.squeeze_(-1) # [batch_size, seq_length]
+        top_k_logits_(logits, self.topk)
+        probs = F.softmax(logits, dim=-1)
+        pred = torch.multinomial(probs.view(-1, logits.shape[-1]), num_samples=1).view(*logits.shape[:2], 1)
+        pred.squeeze_(-1)
+        assert tokens.shape[1] == pred.shape[1]
+        tokens = pred
+        return tokens
+def filling_sequence_itersr(
+    model,
+    seq0,
+    seq1,
+    warmup_steps=3,
+    block_hw=(4, 4),
+    strategy=IterativeEntfilterStrategy(topk=10),
+):
+    """
+    seq: [PAD]... [ROI1] text ... [BOI1] {layout[0]} 1024 {layout[1]} [EOI1]
+        4095 {layout[2]} final_token.
+    Attention:
+    The sampling temperature are changing, temporally we hard code them here.
+    The temperature in the strategy is not used.
+    """
+    assert hasattr(model, "layout")
+    layout = model.layout
+    device = seq0.device
+    # concat and pad sequences
+    batch_size = seq0.shape[0]
+    n_pad = layout[0] - seq0.shape[1]
+    assert n_pad >= 0, "You should truncate long input before filling."
+    seq = torch.cat(
+        (torch.tensor([0] * n_pad, device=device, dtype=seq0.dtype).unsqueeze(0).expand(batch_size, n_pad), seq0, seq1),
+        dim=1,
+    )  # [b, layout[-1]+1]
+    assert seq.shape[1] == layout[-1]
+    # build initial tokens, attention_mask, and position_ids
+    tokens = seq.clone()
+    attention_mask = torch.ones(layout[0]).to(device)
+    attention_mask[:n_pad] = 0
+    attention_mask = attention_mask.unsqueeze(0).type_as(next(model.parameters()))  # if fp16
+    position_ids = torch.cat(
+        (
+            torch.zeros(n_pad, dtype=torch.long),
+            torch.arange(0, layout[0] - n_pad),
+            torch.arange(1024, 1024 + layout[1] - layout[0]),
+        )
+    ).to(device)
+    log_attention_weights = torch.zeros(layout[0], device=device).type_as(next(model.parameters()))
+    log_attention_weights[n_pad : layout[0]] = 0.0
+    log_attention_weights = log_attention_weights.unsqueeze(0)
+    # prepare for interation
+    unfixed = tokens == tokenizer["<start_of_image>"]
+    ll, rr = block_hw
+    # edge_len = int(math.sqrt(layout[-1] - layout[-2]) + 1e-4)
+    num_steps = 1
+    # interative refining
+    # unfixed[..., -(layout[-1] - layout[-2]):].view(
+    #     batch_size, edge_len//ll, ll, edge_len//rr, rr)[:, :, :, :, -1] = False
+    ret = []
+    # ret.append(tokens[:, layout[-2]:-1].clone())
+    for step_cnt in range(1, num_steps + 1):
+        logits, *_dump = model(tokens, position_ids, attention_mask, log_attention_weights=log_attention_weights)
+        real_temp = 1.0
+        new_tokens = strategy.forward(logits, tokens, real_temp)
+        tokens[unfixed] = new_tokens[unfixed]
+        ret.append(tokens[:, layout[-2] :].clone())
+    return torch.cat(ret, dim=0)

helm/clients/image_generation/cogview2/sr_pipeline/sr_group.py ADDED Viewed

@@ -0,0 +1,42 @@
+# -*- encoding: utf-8 -*-
+"""
+@File    :   sr_group.py
+@Time    :   2022/04/02 01:17:21
+@Author  :   Ming Ding
+@Contact :   dm18@mails.tsinghua.edu.cn
+"""
+# here put the import lib
+from .direct_sr import DirectSuperResolution
+from .iterative_sr import IterativeSuperResolution
+from helm.common.optional_dependencies import handle_module_not_found_error
+class SRGroup:
+    def __init__(
+        self,
+        args,
+        home_path=None,
+    ):
+        try:
+            from SwissArmyTransformer.resources import auto_create
+        except ModuleNotFoundError as e:
+            handle_module_not_found_error(e, ["heim"])
+        dsr_path = auto_create("cogview2-dsr", path=home_path)
+        itersr_path = auto_create("cogview2-itersr", path=home_path)
+        dsr = DirectSuperResolution(args, dsr_path)
+        itersr = IterativeSuperResolution(args, itersr_path, shared_transformer=dsr.model.transformer)
+        self.dsr = dsr
+        self.itersr = itersr
+    def sr_base(self, img_tokens, txt_tokens):
+        assert img_tokens.shape[-1] == 400 and len(img_tokens.shape) == 2
+        batch_size = img_tokens.shape[0]
+        txt_len = txt_tokens.shape[-1]
+        if len(txt_tokens.shape) == 1:
+            txt_tokens = txt_tokens.unsqueeze(0).expand(batch_size, txt_len)
+        sred_tokens = self.dsr(txt_tokens, img_tokens)
+        iter_tokens = self.itersr(txt_tokens, sred_tokens[:, -3600:].clone())
+        return iter_tokens[-batch_size:]

helm/clients/image_generation/cogview2_client.py ADDED Viewed

@@ -0,0 +1,191 @@
+import os
+import argparse
+from functools import partial
+from typing import Any, Dict, List, Optional
+import torch
+from icetk import icetk as tokenizer
+from torchvision.utils import save_image
+from helm.common.cache import CacheConfig, Cache
+from helm.common.file_caches.file_cache import FileCache
+from helm.common.hierarchical_logger import hlog, htrack_block
+from helm.common.optional_dependencies import handle_module_not_found_error
+from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
+from helm.common.tokenization_request import (
+    DecodeRequest,
+    DecodeRequestResult,
+    TokenizationRequest,
+    TokenizationRequestResult,
+)
+from helm.clients.client import Client, CachingClient
+from helm.clients.image_generation.cogview2.coglm_strategy import CoglmStrategy
+from .image_generation_client_utils import get_single_image_multimedia_object
+class CogView2Client(Client):
+    """
+    https://github.com/THUDM/CogView2
+    """
+    MAX_SEQ_LEN: int = 95
+    MODEL_URL: str = "https://nlp.stanford.edu/projects/vhelm/cogview2/sharefs.zip"
+    def __init__(self, cache_config: CacheConfig, file_cache: FileCache):
+        self._cache = Cache(cache_config)
+        self._file_cache: FileCache = file_cache
+        self._args: Optional[argparse.Namespace] = None
+        self._strategy: Optional[CoglmStrategy] = None
+        self._model = None
+        self._srg = None
+    def _get_model(self) -> None:
+        try:
+            from SwissArmyTransformer import get_args
+            from helm.clients.image_generation.cogview2.coglm_utils import (
+                get_recipe,
+                InferenceModel,
+            )
+            from helm.clients.image_generation.cogview2.sr_pipeline import SRGroup
+        except ModuleNotFoundError as e:
+            handle_module_not_found_error(e, ["heim"])
+        tokenizer.add_special_tokens(["<start_of_image>", "<start_of_english>", "<start_of_chinese>"])
+        model_local_path: str = f"{self._file_cache._location}/cogview2"  # type: ignore
+        os.environ["SAT_HOME"] = f"{model_local_path}/sharefs/cogview-new"
+        # Download the model if not yet
+        if not os.path.exists(model_local_path):
+            os.system(f"mkdir -p {model_local_path}")
+            os.system(f"wget {self.MODEL_URL} -P {model_local_path}")
+            os.system(f"unzip {model_local_path}/sharefs.zip -d {model_local_path}")
+        if self._model is None:
+            # Set up args
+            args = get_args("--mode inference --fp16".split())
+            self._args = argparse.Namespace(**vars(args), **get_recipe("none"))
+            self._args.img_size = 160
+            self._args.only_first_stage = False
+            self._args.inverse_prompt = False
+            self._args.batch_size = 1
+            self._args.max_inference_batch_size = 1
+            # Load the model components
+            self._model, self._args = InferenceModel.from_pretrained(self._args, "coglm")
+            invalid_slices = [slice(tokenizer.num_image_tokens, None)]
+            self._strategy = CoglmStrategy(
+                invalid_slices,
+                temperature=getattr(self._args, "temp_all_gen"),
+                top_k=getattr(self._args, "topk_gen"),
+                top_k_cluster=getattr(self._args, "temp_cluster_gen"),
+            )
+            self._srg = SRGroup(self._args)  # type: ignore
+    def _model_inference(self, prompt) -> torch.Tensor:
+        try:
+            from SwissArmyTransformer.generation.autoregressive_sampling import filling_sequence
+            from helm.clients.image_generation.cogview2.coglm_utils import get_masks_and_position_ids_coglm
+        except ModuleNotFoundError as e:
+            handle_module_not_found_error(e, ["heim"])
+        with torch.no_grad():
+            text = getattr(self._args, "query_template").format(prompt)
+            seq = tokenizer.encode(text)
+            if len(seq) > self.MAX_SEQ_LEN:
+                seq = seq[: self.MAX_SEQ_LEN - 2] + seq[-2:]
+            txt_len = len(seq) - 1
+            device = getattr(self._args, "device")
+            seq = torch.tensor(seq + [-1] * 400, device=device)
+            # calibrate text length
+            log_attention_weights = torch.zeros(
+                len(seq), len(seq), device=device, dtype=torch.half if getattr(self._args, "fp16") else torch.float32
+            )
+            log_attention_weights[:, :txt_len] = getattr(self._args, "attn_plus")
+            # generation
+            mbz = getattr(self._args, "max_inference_batch_size")
+            batch_size = getattr(self._args, "batch_size")
+            assert batch_size < mbz or batch_size % mbz == 0
+            get_func = partial(get_masks_and_position_ids_coglm, context_length=txt_len)
+            output_list = []
+            for tim in range(max(batch_size // mbz, 1)):
+                setattr(self._strategy, "start_pos", txt_len + 1)
+                coarse_samples = filling_sequence(
+                    self._model,
+                    seq.clone(),
+                    batch_size=min(batch_size, mbz),
+                    strategy=self._strategy,
+                    log_attention_weights=log_attention_weights,
+                    get_masks_and_position_ids=get_func,
+                )[0]
+                output_list.append(coarse_samples)
+            output_tokens = torch.cat(output_list, dim=0)
+            images = []
+            iter_tokens = getattr(self._srg, "sr_base")(output_tokens[:, -400:], seq[:txt_len])
+            for seq in iter_tokens:
+                decoded_img = tokenizer.decode(image_ids=seq[-3600:])
+                decoded_img = torch.nn.functional.interpolate(decoded_img, size=(480, 480))
+                images.append(decoded_img)  # only the last image (target)
+            return images[0]
+    def make_request(self, request: Request) -> RequestResult:
+        raw_request = {
+            "prompt": request.prompt,
+        }
+        try:
+            def do_it() -> Dict[str, Any]:
+                prompt: str = request.prompt
+                with htrack_block(f"Generating images for prompt: {prompt}"):
+                    self._get_model()
+                    images: List[torch.Tensor] = []
+                    for _ in range(request.num_completions):
+                        output = self._model_inference(**raw_request).cpu()  # (1, 3, 480, 480)
+                        images.append(output)
+                    assert (
+                        len(images) == request.num_completions
+                    ), f"Expected {request.num_completions} images, but got {len(images)}"
+                    result: Dict = {"file_locations": []}
+                    for image in images:
+                        # Write out the image to a file and save the path
+                        file_location: str = self._file_cache.generate_unique_new_file_path()  # type: ignore
+                        save_image(image, file_location, normalize=True)
+                        hlog(f"Image saved at {file_location}.")
+                        result["file_locations"].append(file_location)
+                    return result
+            # Include the model name and number of completions in the cache key
+            cache_key = CachingClient.make_cache_key(
+                {"model": request.model_engine, "n": request.num_completions, **raw_request}, request
+            )
+            results, cached = self._cache.get(cache_key, wrap_request_time(do_it))
+        except RuntimeError as e:
+            error: str = f"CogView2Client error: {e}"
+            return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
+        completions: List[GeneratedOutput] = [
+            GeneratedOutput(
+                text="", logprob=0, tokens=[], multimodal_content=get_single_image_multimedia_object(location)
+            )
+            for location in results["file_locations"]
+        ]
+        return RequestResult(
+            success=True,
+            cached=cached,
+            request_time=results["request_time"],
+            completions=completions,
+            embedding=[],
+        )
+    def tokenize(self, request: TokenizationRequest) -> TokenizationRequestResult:
+        raise NotImplementedError("This client does not support tokenizing.")
+    def decode(self, request: DecodeRequest) -> DecodeRequestResult:
+        raise NotImplementedError("This client does not support decoding.")

helm/clients/image_generation/dalle2_client.py ADDED Viewed

@@ -0,0 +1,192 @@
+from typing import Any, Dict, List, Optional
+import base64
+from helm.common.cache import CacheConfig, Cache
+from helm.common.general import hlog
+from helm.common.file_caches.file_cache import FileCache
+from helm.common.media_object import MultimediaObject
+from helm.common.optional_dependencies import handle_module_not_found_error
+from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
+from helm.common.tokenization_request import (
+    TokenizationRequest,
+    TokenizationRequestResult,
+    DecodeRequest,
+    DecodeRequestResult,
+)
+from helm.clients.moderation_api_client import ModerationAPIClient
+from helm.clients.client import Client, CachingClient
+from .image_generation_client_utils import get_single_image_multimedia_object
+try:
+    import openai
+    from openai import OpenAI
+except ModuleNotFoundError as missing_module_exception:
+    handle_module_not_found_error(missing_module_exception, ["openai"])
+class DALLE2Client(Client):
+    MAX_PROMPT_LENGTH: int = 1000
+    DEFAULT_IMAGE_SIZE_STR: str = "512x512"
+    VALID_IMAGE_SIZES: List[str] = ["256x256", DEFAULT_IMAGE_SIZE_STR, "1024x1024"]
+    # Set the finish reason to this if the prompt violates OpenAI's content policy
+    CONTENT_POLICY_VIOLATED_FINISH_REASON: str = (
+        "The prompt violates OpenAI's content policy. "
+        "See https://labs.openai.com/policies/content-policy for more information."
+    )
+    # The DALL-E API will respond with the following error messages (or even a substring of the message)
+    # if it has any issues generating images for a particular prompt
+    PROMPT_FLAGGED_ERROR: str = (
+        "Your request was rejected as a result of our safety system. "
+        "Your prompt may contain text that is not allowed by our safety system."
+    )
+    PROMPT_FLAGGED_ERROR2: str = (
+        "Something went wrong with your generation. You may try again or ask for a different prompt"
+    )
+    PROMPT_FLAGGED_ERROR3: str = (
+        "The server had an error while processing your request. Sorry about that! You can retry your request, "
+        "or contact us through our help center at help.openai.com if the error persists."
+    )
+    def __init__(
+        self,
+        api_key: str,
+        cache_config: CacheConfig,
+        file_cache: FileCache,
+        moderation_api_client: ModerationAPIClient,
+        org_id: Optional[str] = None,
+    ):
+        self.file_cache: FileCache = file_cache
+        self._cache = Cache(cache_config)
+        self.client = OpenAI(api_key=api_key, organization=org_id)
+        self.moderation_api_client: ModerationAPIClient = moderation_api_client
+    def get_content_policy_violated_result(self, request: Request) -> RequestResult:
+        """
+        Return a RequestResult with no images and a finish reason indicating that the prompt / generated images
+        violate OpenAI's content policy.
+        """
+        no_image = GeneratedOutput(
+            text="",
+            logprob=0,
+            tokens=[],
+            multimodal_content=MultimediaObject(),
+            finish_reason={"reason": self.CONTENT_POLICY_VIOLATED_FINISH_REASON},
+        )
+        return RequestResult(
+            success=True,
+            cached=False,
+            request_time=0,
+            completions=[no_image] * request.num_completions,
+            embedding=[],
+        )
+    def get_size_str(self, request: Request) -> str:
+        """
+        Return the size string for the image generation request.
+        If the request does not specify a size, return the default size.
+        """
+        assert request.image_generation_parameters is not None
+        w: Optional[int] = request.image_generation_parameters.output_image_width
+        h: Optional[int] = request.image_generation_parameters.output_image_height
+        if w is None or h is None:
+            return self.DEFAULT_IMAGE_SIZE_STR
+        image_dimensions: str = f"{w}x{h}"
+        assert image_dimensions in self.VALID_IMAGE_SIZES, f"Valid image sizes are {self.VALID_IMAGE_SIZES}"
+        return image_dimensions
+    def fail_if_invalid_request(self, request: Request) -> None:
+        """
+        Validate the request to ensure it is a valid request for the DALL-E API.
+        """
+        assert request.image_generation_parameters is not None
+        if len(request.prompt) > self.MAX_PROMPT_LENGTH:
+            raise ValueError("The maximum length of the prompt is 1000 characters.")
+        if request.num_completions < 1 or request.num_completions > 10:
+            raise ValueError("`num_completions` must be between 1 and 10.")
+    def handle_openai_error(self, request: Request, error: Exception) -> RequestResult:
+        """
+        Handle a thrown error from the DALL-E API.
+        """
+        if (
+            str(error) in self.PROMPT_FLAGGED_ERROR
+            # Sometimes the DALL-E API will add additional information to the error message.
+            or self.PROMPT_FLAGGED_ERROR2 in str(error)
+            or self.PROMPT_FLAGGED_ERROR3 in str(error)
+        ):
+            # Some requests fail even if we check the prompt against the moderation API.
+            # For example, "black" in Spanish (negro) causes requests to DALL-E to fail even
+            # though the prompt does not get flagged by the Moderation API.
+            hlog(f"Failed safety check: {request.prompt}")
+            return self.get_content_policy_violated_result(request)
+        else:
+            return RequestResult(
+                success=False, cached=False, error=f"DALL-E error: {error}", completions=[], embedding=[]
+            )
+    def generate_with_dalle_api(self, raw_request: Dict[str, Any]) -> Dict:
+        """
+        Makes a single request to generate the images with the DALL-E API.
+        """
+        result = self.client.images.generate(**raw_request).model_dump(mode="json")
+        assert "data" in result, f"Invalid response: {result} from prompt: {raw_request['prompt']}"
+        for image in result["data"]:
+            # Write out the image to a file and save the path
+            image["file_path"] = self.file_cache.store(lambda: base64.b64decode(image["b64_json"]))
+            # Don't cache contents of `b64_json` as we already have the image stored
+            image.pop("b64_json", None)
+        return result
+    def make_request(self, request: Request) -> RequestResult:
+        self.fail_if_invalid_request(request)
+        # Use the Moderation API to check if the prompt violates OpenAI's content policy before generating images
+        if self.moderation_api_client.will_be_flagged(request.prompt):
+            return self.get_content_policy_violated_result(request)
+        # https://beta.openai.com/docs/api-reference/images/create#images/create-response_format
+        raw_request: Dict[str, Any] = {
+            "prompt": request.prompt,
+            "n": request.num_completions,
+            "size": self.get_size_str(request),
+            "response_format": "b64_json",  # Always set to b64_json as URLs are only valid for an hour
+        }
+        try:
+            def do_it() -> Dict[str, Any]:
+                # To maintain backwards compatibility, specify the model in the request but not in the cache key
+                return self.generate_with_dalle_api({"model": "dall-e-2", **raw_request})
+            cache_key = CachingClient.make_cache_key(raw_request, request)
+            response, cached = self._cache.get(cache_key, wrap_request_time(do_it))
+        except openai.OpenAIError as e:
+            return self.handle_openai_error(request, e)
+        completions: List[GeneratedOutput] = [
+            GeneratedOutput(
+                text="",
+                logprob=0,
+                tokens=[],
+                multimodal_content=get_single_image_multimedia_object(generated_image["file_path"]),
+            )
+            for generated_image in response["data"]
+        ]
+        return RequestResult(
+            success=True,
+            cached=cached,
+            request_time=response["request_time"],
+            completions=completions,
+            embedding=[],
+        )
+    def tokenize(self, request: TokenizationRequest) -> TokenizationRequestResult:
+        raise NotImplementedError("This client does not support tokenizing.")
+    def decode(self, request: DecodeRequest) -> DecodeRequestResult:
+        raise NotImplementedError("This client does not support decoding.")

helm/clients/image_generation/dalle3_client.py ADDED Viewed

@@ -0,0 +1,108 @@
+from typing import Any, Dict, List, Optional
+from helm.common.cache import CacheConfig
+from helm.common.file_caches.file_cache import FileCache
+from helm.common.general import singleton
+from helm.common.optional_dependencies import handle_module_not_found_error
+from helm.common.request import Request, RequestResult, GeneratedOutput, wrap_request_time
+from helm.clients.moderation_api_client import ModerationAPIClient
+from helm.clients.client import CachingClient
+from .dalle2_client import DALLE2Client
+from .image_generation_client_utils import get_single_image_multimedia_object
+try:
+    import openai
+except ModuleNotFoundError as missing_module_exception:
+    handle_module_not_found_error(missing_module_exception, ["openai"])
+class DALLE3Client(DALLE2Client):
+    """
+    Client for the OpenAI's DALL-E 3 API.
+    DALL-E 3 cookbook with explanations for the different parameters:
+    https://cookbook.openai.com/articles/what_is_new_with_dalle_3
+    """
+    DEFAULT_IMAGE_SIZE_STR: str = "1024x1024"
+    VALID_IMAGE_SIZES: List[str] = [DEFAULT_IMAGE_SIZE_STR, "1792x1024", "1024x1792"]
+    def __init__(
+        self,
+        api_key: str,
+        cache_config: CacheConfig,
+        file_cache: FileCache,
+        moderation_api_client: ModerationAPIClient,
+        org_id: Optional[str] = None,
+    ):
+        super().__init__(api_key, cache_config, file_cache, moderation_api_client, org_id)
+    def make_request(self, request: Request) -> RequestResult:
+        self.fail_if_invalid_request(request)
+        if self.moderation_api_client.will_be_flagged(request.prompt):
+            return self.get_content_policy_violated_result(request)
+        raw_request: Dict[str, Any] = {
+            "model": "dall-e-3",
+            "prompt": request.prompt,
+            "n": 1,  # As of December 2023, the DALL-E 3 API only supports a single generated image per request
+            "size": self.get_size_str(request),
+            "response_format": "b64_json",  # Always set to b64_json as URLs are only valid for an hour
+        }
+        if request.model_engine == "dall-e-3":
+            raw_request["quality"] = "standard"
+            raw_request["style"] = "vivid"
+        elif request.model_engine == "dall-e-3-natural":
+            raw_request["quality"] = "standard"
+            raw_request["style"] = "natural"
+        elif request.model_engine == "dall-e-3-hd":
+            raw_request["quality"] = "hd"
+            raw_request["style"] = "vivid"
+        elif request.model_engine == "dall-e-3-hd-natural":
+            raw_request["quality"] = "hd"
+            raw_request["style"] = "natural"
+        else:
+            raise ValueError(f"Invalid DALL-E 3 model: {request.model_engine}")
+        responses: List[Dict[str, Any]] = []
+        all_cached: bool = True
+        # Since the DALL-E 3 API only supports a single generated image, make `request.num_completions` requests
+        for completion_index in range(request.num_completions):
+            try:
+                def do_it() -> Dict[str, Any]:
+                    return self.generate_with_dalle_api({**raw_request})
+                cache_key = CachingClient.make_cache_key({"completion_index": completion_index, **raw_request}, request)
+                response, cached = self._cache.get(cache_key, wrap_request_time(do_it))
+                responses.append(response)
+                all_cached = all_cached and cached
+            except openai.OpenAIError as e:
+                return self.handle_openai_error(request, e)
+        completions: List[GeneratedOutput] = []
+        total_request_time: float = 0
+        for response in responses:
+            image_response: Dict[str, Any] = singleton(response["data"])
+            completions.append(
+                GeneratedOutput(
+                    # From https://cookbook.openai.com/articles/what_is_new_with_dalle_3,
+                    # "a new feature in the latest DALL·E-3 API is prompt rewriting, where we use
+                    # GPT-4 to optimize all of your prompts before they’re passed to DALL-E."
+                    text=image_response["revised_prompt"],
+                    multimodal_content=get_single_image_multimedia_object(image_response["file_path"]),
+                    logprob=0,
+                    tokens=[],
+                )
+            )
+            total_request_time += response["request_time"]
+        return RequestResult(
+            success=True,
+            cached=all_cached,
+            request_time=total_request_time,
+            completions=completions,
+            embedding=[],
+        )

helm/clients/image_generation/dalle_mini/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+__version__ = "0.1.4"
+from .model import DalleBart, DalleBartProcessor

crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl