PyPI - crfm-helm - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

crfm-helm 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (482) hide show

helm/common/cache.py CHANGED Viewed

@@ -1,13 +1,13 @@
 from collections import defaultdict
 from dataclasses import dataclass
-from typing import Dict, Callable, Generator, Optional, Tuple
+from typing import Dict, Callable, Generator, Mapping, Optional, Tuple
 import json
 import threading
 import sqlite3
 from helm.common.general import hlog, htrack
-from helm.common.key_value_store import KeyValueStore, SqliteKeyValueStore
+from helm.common.key_value_store import BlackHoleKeyValueStore, KeyValueStore, SqliteKeyValueStore
 from helm.proxy.retry import get_retry_decorator
 try:
@@ -51,6 +51,16 @@ class SqliteCacheConfig(KeyValueStoreCacheConfig):
         return self.path
+@dataclass(frozen=True)
+class BlackHoleCacheConfig(KeyValueStoreCacheConfig):
+    """Configuration for a cache that does not save any data."""
+    @property
+    def cache_stats_key(self) -> str:
+        """The string key used by CacheStats to identify this cache."""
+        return "disabled_cache"
 @dataclass(frozen=True)
 class MongoCacheConfig(KeyValueStoreCacheConfig):
     """Configuration for a cache backed by a MongoDB collection."""
@@ -113,12 +123,14 @@ def create_key_value_store(config: KeyValueStoreCacheConfig) -> KeyValueStore:
         return MongoKeyValueStore(config.uri, config.collection_name)
     elif isinstance(config, SqliteCacheConfig):
         return SqliteKeyValueStore(config.path)
+    elif isinstance(config, BlackHoleCacheConfig):
+        return BlackHoleKeyValueStore()
     else:
         raise ValueError(f"KeyValueStoreCacheConfig with unknown type: {config}")
 @retry
-def write_to_key_value_store(key_value_store: KeyValueStore, key: Dict, response: Dict) -> bool:
+def write_to_key_value_store(key_value_store: KeyValueStore, key: Mapping, response: Dict) -> bool:
     """
     Write to the key value store with retry. Returns boolean indicating whether the write was successful or not.
     """
@@ -188,7 +200,7 @@ class Cache(object):
         else:
             raise ValueError(f"CacheConfig with unknown type: {config}")
-    def get(self, request: Dict, compute: Callable[[], Dict]) -> Tuple[Dict, bool]:
+    def get(self, request: Mapping, compute: Callable[[], Dict]) -> Tuple[Dict, bool]:
         """Get the result of `request` (by calling `compute` as needed)."""
         cache_stats.increment_query(self.config.cache_stats_key)

helm/common/cache_backend_config.py ADDED Viewed

@@ -0,0 +1,47 @@
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+import os
+from helm.common.cache import CacheConfig, MongoCacheConfig, BlackHoleCacheConfig, SqliteCacheConfig
+class CacheBackendConfig(ABC):
+    """Config for a cache backend."""
+    @abstractmethod
+    def get_cache_config(self, shard_name: str) -> CacheConfig:
+        """Get a CacheConfig for the given shard."""
+        pass
+@dataclass(frozen=True)
+class MongoCacheBackendConfig(CacheBackendConfig):
+    """Config for a MongoDB cache backend."""
+    uri: str
+    """URL for the MongoDB database that contains the collection.
+    Example format: mongodb://[username:password@]host1[:port1]/[dbname]
+    For full format, see: https://www.mongodb.com/docs/manual/reference/connection-string/"""
+    def get_cache_config(self, shard_name: str) -> CacheConfig:
+        return MongoCacheConfig(uri=self.uri, collection_name=shard_name)
+@dataclass(frozen=True)
+class BlackHoleCacheBackendConfig(CacheBackendConfig):
+    """Config for a cache backend that does not save any data."""
+    def get_cache_config(self, shard_name: str) -> CacheConfig:
+        return BlackHoleCacheConfig()
+@dataclass(frozen=True)
+class SqliteCacheBackendConfig(CacheBackendConfig):
+    """Config for a Sqlite cache backend."""
+    path: str
+    """Path for the directory that will contain Sqlite files for caches."""
+    def get_cache_config(self, shard_name: str) -> CacheConfig:
+        return SqliteCacheConfig(path=os.path.join(self.path, f"{shard_name}.sqlite"))

helm/common/clip_score_request.py ADDED Viewed

@@ -0,0 +1,41 @@
+from dataclasses import dataclass
+from typing import Optional
+DEFAULT_CLIP_SCORE_MODEL = "openai/clip-vit-large-patch14"
+@dataclass(frozen=True)
+class CLIPScoreRequest:
+    """
+    Computes a CLIPScore for a given caption and image.
+    """
+    # Caption to compute CLIPScore for
+    caption: str
+    # Location of the image
+    image_location: str
+    # Which CLIP model to use
+    model: str = DEFAULT_CLIP_SCORE_MODEL
+    # Compute multilingual CLIPScore
+    multilingual: bool = False
+@dataclass(frozen=True)
+class CLIPScoreResult:
+    """Result after sending a `CLIPScoreRequest`."""
+    # Whether the request was successful
+    success: bool
+    # Whether the request was cached
+    cached: bool
+    # The CLIPScore
+    score: float = 0.0
+    # If `success` is false, what was the error?
+    error: Optional[str] = None

helm/common/file_caches/__init__.py ADDED Viewed

File without changes

helm/common/file_caches/file_cache.py ADDED Viewed

@@ -0,0 +1,16 @@
+from abc import ABC, abstractmethod
+from typing import Callable
+class FileCache(ABC):
+    """
+    Cache to store files.
+    """
+    @abstractmethod
+    def store(self, compute: Callable[[], bytes]) -> str:
+        """
+        Stores the output of `compute` as a file at a unique location.
+        Returns the location of the file.
+        """
+        pass

helm/common/file_caches/local_file_cache.py ADDED Viewed

@@ -0,0 +1,61 @@
+import os
+from typing import Callable
+from helm.common.general import ensure_directory_exists, generate_unique_id
+from .file_cache import FileCache
+from helm.common.optional_dependencies import handle_module_not_found_error
+try:
+    from PIL import Image
+except ModuleNotFoundError as e:
+    handle_module_not_found_error(e, ["images"])
+class LocalFileCache(FileCache):
+    def __init__(self, base_path: str, file_extension: str):
+        ensure_directory_exists(base_path)
+        self._location: str = base_path
+        self._file_extension: str = file_extension
+    def store(self, compute: Callable[[], bytes]) -> str:
+        """
+        Stores the output of `compute` as a file at a unique path.
+        Returns the file path.
+        """
+        file_path: str = self.generate_unique_new_file_path()
+        with open(file_path, "wb") as f:
+            f.write(compute())
+        return file_path
+    def generate_unique_new_file_path(self) -> str:
+        """Generate an unique file name at `base_path`"""
+        def generate_one() -> str:
+            file_name: str = f"{generate_unique_id()}.{self._file_extension}"
+            return os.path.join(self._location, file_name)
+        file_path: str
+        while True:
+            file_path = generate_one()
+            if not os.path.exists(file_path):
+                break
+        return file_path
+class LocalPILFileCache(LocalFileCache):
+    def __init__(self, base_path: str):
+        super().__init__(base_path, "png")
+    def store_image(self, compute: Callable[[], Image.Image]) -> str:
+        """
+        Stores the output of `compute` as a file at a unique path.
+        Returns the file path.
+        """
+        file_path: str = self.generate_unique_new_file_path()
+        compute().save(file_path)
+        return file_path
+    def load_image(self, file_path: str) -> Image.Image:
+        return Image.open(file_path).convert("RGB")

helm/common/file_caches/test_local_file_cache.py ADDED Viewed

@@ -0,0 +1,25 @@
+import os
+import shutil
+import tempfile
+import unittest
+from .local_file_cache import LocalFileCache
+class TestLocalFileCache(unittest.TestCase):
+    def setup_method(self, _):
+        self.path: str = tempfile.mkdtemp()
+    def teardown_method(self, _):
+        shutil.rmtree(self.path)
+    def test_get(self):
+        cache = LocalFileCache(self.path, file_extension="txt")
+        file_path1: str = cache.store(lambda: "hello.".encode())
+        # Verify the contents of the file
+        with open(file_path1, "r") as f:
+            assert f.read() == "hello."
+        cache.store(lambda: "bye.".encode())
+        assert len(os.listdir(self.path)) == 2

helm/common/file_upload_request.py ADDED Viewed

@@ -0,0 +1,27 @@
+from dataclasses import dataclass
+from typing import Optional
+@dataclass(frozen=True)
+class FileUploadRequest:
+    """Uploads a file at `path`."""
+    # Path of the file to upload
+    path: str
+@dataclass(frozen=True)
+class FileUploadResult:
+    """Result after sending a `FileUploadRequest`."""
+    # Whether the request was successful
+    success: bool
+    # Whether the request was cached
+    cached: bool
+    # URL of the uploaded file
+    url: str
+    # If `success` is false, what was the error?
+    error: Optional[str] = None

helm/common/general.py CHANGED Viewed

@@ -63,7 +63,7 @@ def shell(args: List[str]):
     hlog(f"Executing: {cmd}")
     exit_code = subprocess.call(args)
     if exit_code != 0:
-        hlog(f"Failed with exit code {exit_code}: {cmd}")
+        raise Exception(f"Failed with exit code {exit_code}: {cmd}")
 @htrack(None)

helm/common/image_generation_parameters.py ADDED Viewed

@@ -0,0 +1,25 @@
+from dataclasses import dataclass
+from typing import Optional
+@dataclass(frozen=True)
+class ImageGenerationParameters:
+    """
+    Parameters for image generation.
+    """
+    output_image_width: Optional[int] = None
+    """Width of the generated image. The model will generate images with the model's
+    default dimensions when unspecified."""
+    output_image_height: Optional[int] = None
+    """Height of the generated image. The model will generate images with the model's
+    default dimensions when unspecified."""
+    guidance_scale: Optional[float] = None
+    """A non-negative number determining how much importance is given to the prompt
+    when generating images. Higher values will generate images that follow more
+    closely to the prompt. Currently only for diffusion models."""
+    diffusion_denoising_steps: Optional[int] = None
+    """The number of denoising steps for diffusion models."""

helm/common/images_utils.py CHANGED Viewed

@@ -2,7 +2,10 @@ import base64
 import io
 import requests
 import shutil
-from typing import Optional
+from typing import List, Optional
+from urllib.request import urlopen
+import numpy as np
 from .general import is_url
 from helm.common.optional_dependencies import handle_module_not_found_error
@@ -45,3 +48,23 @@ def copy_image(src: str, dest: str, width: Optional[int] = None, height: Optiona
         image.save(dest)
     else:
         shutil.copy(src, dest)
+def is_blacked_out_image(image_location: str) -> bool:
+    """Returns True if the image is all black. False otherwise."""
+    try:
+        import cv2
+    except ModuleNotFoundError as e:
+        handle_module_not_found_error(e, ["heim"])
+    if is_url(image_location):
+        arr = np.asarray(bytearray(urlopen(image_location).read()), dtype=np.uint8)
+        image = cv2.imdecode(arr, -1)
+    else:
+        image = cv2.imread(image_location, 0)
+    return cv2.countNonZero(image) == 0
+def filter_blacked_out_images(image_locations: List[str]) -> List[str]:
+    """Returns a list of image locations that are not blacked out."""
+    return [image_location for image_location in image_locations if not is_blacked_out_image(image_location)]

helm/common/key_value_store.py CHANGED Viewed

@@ -1,12 +1,12 @@
 from abc import abstractmethod
 import contextlib
 import json
-from typing import Dict, Generator, Iterable, Optional, Tuple
+from typing import Dict, Generator, Iterable, Mapping, Optional, Tuple
 from sqlitedict import SqliteDict
-def request_to_key(request: Dict) -> str:
+def request_to_key(request: Mapping) -> str:
     """Normalize a `request` into a `key` so that we can hash using it."""
     return json.dumps(request, sort_keys=True)
@@ -27,7 +27,7 @@ class KeyValueStore(contextlib.AbstractContextManager):
         pass
     @abstractmethod
-    def put(self, key: Dict, value: Dict) -> None:
+    def put(self, key: Mapping, value: Dict) -> None:
         pass
     @abstractmethod
@@ -68,7 +68,7 @@ class SqliteKeyValueStore(KeyValueStore):
         for key, value in self._sqlite_dict.items():
             yield (key, value)
-    def put(self, key: Dict, value: Dict) -> None:
+    def put(self, key: Mapping, value: Dict) -> None:
         key_string = request_to_key(key)
         self._sqlite_dict[key_string] = value
         self._sqlite_dict.commit()
@@ -80,3 +80,34 @@ class SqliteKeyValueStore(KeyValueStore):
     def remove(self, key: Dict) -> None:
         del self._sqlite_dict[key]
         self._sqlite_dict.commit()
+class BlackHoleKeyValueStore(KeyValueStore):
+    """Key value store that discards all data."""
+    def __enter__(self) -> "BlackHoleKeyValueStore":
+        return self
+    def __exit__(self, exc_type, exc_value, traceback) -> None:
+        pass
+    def contains(self, key: Dict) -> bool:
+        return False
+    def get(self, key: Dict) -> Optional[Dict]:
+        return None
+    def get_all(self) -> Generator[Tuple[Dict, Dict], None, None]:
+        # Return an empty generator.
+        # See: https://stackoverflow.com/a/13243870
+        return
+        yield
+    def put(self, key: Mapping, value: Dict) -> None:
+        return None
+    def multi_put(self, pairs: Iterable[Tuple[Dict, Dict]]) -> None:
+        return None
+    def remove(self, key: Dict) -> None:
+        return None

helm/common/media_object.py CHANGED Viewed

@@ -5,6 +5,7 @@ from dataclasses import dataclass, field, replace
 from typing import List, Optional
+IMAGE_TYPE = "image"
 TEXT_TYPE = "text"
@@ -26,6 +27,10 @@ class MediaObject:
     location: Optional[str] = None
     """When the media object is a file, specify the location of the media object, which can be a local path or URL."""
+    def to_dict(self) -> dict:
+        """Converts the media object to a dictionary."""
+        return {key: value for key, value in self.__dict__.items() if value is not None}
     @property
     def type(self) -> str:
         """The MIME type of the media object."""
@@ -113,6 +118,14 @@ class MultimediaObject:
         """
         return MultimediaObject(media_objects=self.media_objects + other.media_objects)
+    @property
+    def size(self) -> int:
+        """
+        Get the number of `MediaObject`s in this multimodal content.
+        :return: The number of `MediaObject`s .
+        """
+        return len(self.media_objects)
     @property
     def text(self) -> str:
         """

helm/common/moderations_api_request.py ADDED Viewed

@@ -0,0 +1,71 @@
+from dataclasses import dataclass
+from typing import Optional
+@dataclass(frozen=True)
+class ModerationAPIRequest:
+    # Text to check against OpenAI's content policy
+    text: str
+    # From https://beta.openai.com/docs/api-reference/moderations/create,
+    # "the default is text-moderation-latest which will be automatically upgraded over time.
+    # This ensures you are always using our most accurate model. If you use text-moderation-stable,
+    # we will provide advanced notice before updating the model. Accuracy of text-moderation-stable
+    # may be slightly lower than for text-moderation-latest."
+    use_latest_model: bool = False
+@dataclass(frozen=True)
+class ModerationCategoryFlaggedResults:
+    """
+    Contains per-category binary content violation flags.
+    For descriptions of the categories, see https://beta.openai.com/docs/guides/moderation/overview.
+    """
+    hate_flagged: bool
+    hate_threatening_flagged: bool
+    self_harm_flagged: bool
+    sexual_flagged: bool
+    sexual_minors_flagged: bool
+    violence_flagged: bool
+    violence_graphic_flagged: bool
+@dataclass(frozen=True)
+class ModerationCategoryScores:
+    """
+    Contains per-category scores. Values are between 0 and 1, where higher values denote higher
+    confidence. The scores should not be interpreted as probabilities.
+    For descriptions of the categories, see https://beta.openai.com/docs/guides/moderation/overview.
+    """
+    hate_score: float
+    hate_threatening_score: float
+    self_harm_score: float
+    sexual_score: float
+    sexual_minors_score: float
+    violence_score: float
+    violence_graphic_score: float
+@dataclass(frozen=True)
+class ModerationAPIRequestResult:
+    """Result after sending a `ModerationAPIRequest`."""
+    # Whether the request was successful
+    success: bool
+    # Whether the request was cached
+    cached: bool
+    # True if the model classifies the content as violating OpenAI's content policy, False otherwise
+    flagged: Optional[bool]
+    # Flagged results
+    flagged_results: Optional[ModerationCategoryFlaggedResults]
+    # Score results
+    scores: Optional[ModerationCategoryScores]
+    # If `success` is false, what was the error?
+    error: Optional[str] = None

helm/common/mongo_key_value_store.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import json
-from typing import Dict, Generator, Iterable, Optional, Tuple
+from typing import Dict, Generator, Iterable, Mapping, Optional, Tuple
 from helm.common.key_value_store import KeyValueStore
 from helm.common.optional_dependencies import handle_module_not_found_error
@@ -35,7 +35,7 @@ class MongoKeyValueStore(KeyValueStore):
     def __exit__(self, exc_type, exc_value, traceback) -> None:
         return
-    def _canonicalize_key(self, key: Dict) -> SON:
+    def _canonicalize_key(self, key: Mapping) -> SON:
         serialized = json.dumps(key, sort_keys=True)
         return json.loads(serialized, object_pairs_hook=SON)
@@ -63,7 +63,7 @@ class MongoKeyValueStore(KeyValueStore):
             else:
                 yield (request, response)
-    def put(self, key: Dict, value: Dict) -> None:
+    def put(self, key: Mapping, value: Dict) -> None:
         request = self._canonicalize_key(key)
         document = SON([(self._REQUEST_KEY, request), (self._RESPONSE_KEY, value)])
         # The MongoDB collection should have a unique indexed on "request"

helm/common/multimodal_request_utils.py ADDED Viewed

@@ -0,0 +1,31 @@
+from typing import List, Optional
+from helm.benchmark.adaptation.request_state import RequestState
+from helm.benchmark.scenarios.scenario import Reference
+from helm.common.request import RequestResult
+def gather_generated_image_locations(request_result: RequestResult) -> List[str]:
+    """Gathers the locations (file paths or URLs) of the generated images."""
+    image_locations: List[str] = []
+    for image in request_result.completions:
+        # Models like DALL-E 2 can skip generating images for prompts that violate their content policy
+        if image.multimodal_content is None or image.multimodal_content.size == 0:
+            return []
+        location: Optional[str] = image.multimodal_content.media_objects[0].location
+        if location is not None:
+            image_locations.append(location)
+    return image_locations
+def get_gold_image_location(request_state: RequestState) -> str:
+    """Returns the first gold image location."""
+    references: List[Reference] = request_state.instance.references
+    assert (
+        len(references) > 0
+        and references[0].output.multimedia_content is not None
+        and references[0].output.multimedia_content.size > 0
+        and references[0].output.multimedia_content.media_objects[0].location is not None
+    ), "Expected at least one gold image"
+    return references[0].output.multimedia_content.media_objects[0].location

helm/common/nudity_check_request.py ADDED Viewed

@@ -0,0 +1,29 @@
+from dataclasses import dataclass, field
+from typing import List, Optional, Dict
+@dataclass(frozen=True)
+class NudityCheckRequest:
+    """
+    Checks for nudity for a given set of images.
+    """
+    # Batch of images
+    image_locations: List[str] = field(default_factory=list)
+@dataclass(frozen=True)
+class NudityCheckResult:
+    """Result after sending a `NudityCheckRequest`."""
+    # Whether the request was successful
+    success: bool
+    # Whether the request was cached
+    cached: bool
+    # Nudity results. True indicates the particular image contains nudity.
+    image_to_nudity: Dict[str, bool] = field(default_factory=dict)
+    # If `success` is false, what was the error?
+    error: Optional[str] = None

crfm-helm 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

crfm-helm 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl