PyPI - crfm-helm - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

crfm-helm 0.4.0py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (499) hide show

helm/common/media_object.py CHANGED Viewed

@@ -5,6 +5,7 @@ from dataclasses import dataclass, field, replace
 from typing import List, Optional
+IMAGE_TYPE = "image"
 TEXT_TYPE = "text"
@@ -26,6 +27,10 @@ class MediaObject:
     location: Optional[str] = None
     """When the media object is a file, specify the location of the media object, which can be a local path or URL."""
+    def to_dict(self) -> dict:
+        """Converts the media object to a dictionary."""
+        return {key: value for key, value in self.__dict__.items() if value is not None}
     @property
     def type(self) -> str:
         """The MIME type of the media object."""
@@ -113,6 +118,14 @@ class MultimediaObject:
         """
         return MultimediaObject(media_objects=self.media_objects + other.media_objects)
+    @property
+    def size(self) -> int:
+        """
+        Get the number of `MediaObject`s in this multimodal content.
+        :return: The number of `MediaObject`s .
+        """
+        return len(self.media_objects)
     @property
     def text(self) -> str:
         """

helm/common/moderations_api_request.py ADDED Viewed

@@ -0,0 +1,71 @@
+from dataclasses import dataclass
+from typing import Optional
+@dataclass(frozen=True)
+class ModerationAPIRequest:
+    # Text to check against OpenAI's content policy
+    text: str
+    # From https://beta.openai.com/docs/api-reference/moderations/create,
+    # "the default is text-moderation-latest which will be automatically upgraded over time.
+    # This ensures you are always using our most accurate model. If you use text-moderation-stable,
+    # we will provide advanced notice before updating the model. Accuracy of text-moderation-stable
+    # may be slightly lower than for text-moderation-latest."
+    use_latest_model: bool = False
+@dataclass(frozen=True)
+class ModerationCategoryFlaggedResults:
+    """
+    Contains per-category binary content violation flags.
+    For descriptions of the categories, see https://beta.openai.com/docs/guides/moderation/overview.
+    """
+    hate_flagged: bool
+    hate_threatening_flagged: bool
+    self_harm_flagged: bool
+    sexual_flagged: bool
+    sexual_minors_flagged: bool
+    violence_flagged: bool
+    violence_graphic_flagged: bool
+@dataclass(frozen=True)
+class ModerationCategoryScores:
+    """
+    Contains per-category scores. Values are between 0 and 1, where higher values denote higher
+    confidence. The scores should not be interpreted as probabilities.
+    For descriptions of the categories, see https://beta.openai.com/docs/guides/moderation/overview.
+    """
+    hate_score: float
+    hate_threatening_score: float
+    self_harm_score: float
+    sexual_score: float
+    sexual_minors_score: float
+    violence_score: float
+    violence_graphic_score: float
+@dataclass(frozen=True)
+class ModerationAPIRequestResult:
+    """Result after sending a `ModerationAPIRequest`."""
+    # Whether the request was successful
+    success: bool
+    # Whether the request was cached
+    cached: bool
+    # True if the model classifies the content as violating OpenAI's content policy, False otherwise
+    flagged: Optional[bool]
+    # Flagged results
+    flagged_results: Optional[ModerationCategoryFlaggedResults]
+    # Score results
+    scores: Optional[ModerationCategoryScores]
+    # If `success` is false, what was the error?
+    error: Optional[str] = None

helm/common/mongo_key_value_store.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import json
-from typing import Dict, Generator, Iterable, Optional, Tuple
+from typing import Dict, Generator, Iterable, Mapping, Optional, Tuple
 from helm.common.key_value_store import KeyValueStore
 from helm.common.optional_dependencies import handle_module_not_found_error
@@ -35,7 +35,7 @@ class MongoKeyValueStore(KeyValueStore):
     def __exit__(self, exc_type, exc_value, traceback) -> None:
         return
-    def _canonicalize_key(self, key: Dict) -> SON:
+    def _canonicalize_key(self, key: Mapping) -> SON:
         serialized = json.dumps(key, sort_keys=True)
         return json.loads(serialized, object_pairs_hook=SON)
@@ -63,7 +63,7 @@ class MongoKeyValueStore(KeyValueStore):
             else:
                 yield (request, response)
-    def put(self, key: Dict, value: Dict) -> None:
+    def put(self, key: Mapping, value: Dict) -> None:
         request = self._canonicalize_key(key)
         document = SON([(self._REQUEST_KEY, request), (self._RESPONSE_KEY, value)])
         # The MongoDB collection should have a unique indexed on "request"

helm/common/multimodal_request_utils.py ADDED Viewed

@@ -0,0 +1,31 @@
+from typing import List, Optional
+from helm.benchmark.adaptation.request_state import RequestState
+from helm.benchmark.scenarios.scenario import Reference
+from helm.common.request import RequestResult
+def gather_generated_image_locations(request_result: RequestResult) -> List[str]:
+    """Gathers the locations (file paths or URLs) of the generated images."""
+    image_locations: List[str] = []
+    for image in request_result.completions:
+        # Models like DALL-E 2 can skip generating images for prompts that violate their content policy
+        if image.multimodal_content is None or image.multimodal_content.size == 0:
+            return []
+        location: Optional[str] = image.multimodal_content.media_objects[0].location
+        if location is not None:
+            image_locations.append(location)
+    return image_locations
+def get_gold_image_location(request_state: RequestState) -> str:
+    """Returns the first gold image location."""
+    references: List[Reference] = request_state.instance.references
+    assert (
+        len(references) > 0
+        and references[0].output.multimedia_content is not None
+        and references[0].output.multimedia_content.size > 0
+        and references[0].output.multimedia_content.media_objects[0].location is not None
+    ), "Expected at least one gold image"
+    return references[0].output.multimedia_content.media_objects[0].location

helm/common/nudity_check_request.py ADDED Viewed

@@ -0,0 +1,29 @@
+from dataclasses import dataclass, field
+from typing import List, Optional, Dict
+@dataclass(frozen=True)
+class NudityCheckRequest:
+    """
+    Checks for nudity for a given set of images.
+    """
+    # Batch of images
+    image_locations: List[str] = field(default_factory=list)
+@dataclass(frozen=True)
+class NudityCheckResult:
+    """Result after sending a `NudityCheckRequest`."""
+    # Whether the request was successful
+    success: bool
+    # Whether the request was cached
+    cached: bool
+    # Nudity results. True indicates the particular image contains nudity.
+    image_to_nudity: Dict[str, bool] = field(default_factory=dict)
+    # If `success` is false, what was the error?
+    error: Optional[str] = None

helm/common/request.py CHANGED Viewed

@@ -3,6 +3,7 @@ from dataclasses import dataclass, field
 from typing import Any, Callable, Dict, List, Optional
 from helm.common.media_object import MultimediaObject
+from helm.common.image_generation_parameters import ImageGenerationParameters
 from .general import indent_lines, format_text
@@ -68,6 +69,9 @@ class Request:
     multimodal_prompt: Optional[MultimediaObject] = None
     """Multimodal prompt with media objects interleaved (e.g., text, video, image, text, ...)"""
+    image_generation_parameters: Optional[ImageGenerationParameters] = None
+    """Parameters for image generation."""
     @property
     def model_host(self) -> str:
         """Returns the model host (referring to the deployment).
@@ -93,8 +97,6 @@ class Token:
     """
     A `Token` represents one token position in a `Sequence`, which has the
     chosen `text` as well as the top probabilities under the model.
-    Note: (text, logprob) could exist or not exist in `top_logprobs`.
     """
     # Text that was chosen
@@ -103,22 +105,15 @@ class Token:
     # Log probability of generating that
     logprob: float
-    # text -> log probability of generating that
-    top_logprobs: Dict[str, float]
     def render_lines(self) -> List[str]:
-        top_logprobs_entries = sorted(self.top_logprobs.items(), key=lambda entry: -entry[1])
-        top_logprobs_str = (
-            "{" + ", ".join(f"{format_text(text)}: {logprob}" for text, logprob in top_logprobs_entries) + "}"
-        )
         return [
-            f"{format_text(self.text)} logprob={self.logprob} top_logprobs={top_logprobs_str}",
+            f"{format_text(self.text)} logprob={self.logprob}",
         ]
 @dataclass(frozen=True)
-class Sequence:
-    """A `Sequence` is a sequence of tokens."""
+class GeneratedOutput:
+    """A `GeneratedOutput` is a single generated output that may contain text or multimodal content."""
     # The concatenation of all the tokens
     text: str
@@ -130,10 +125,13 @@ class Sequence:
     tokens: List[Token]
     # Why did the sequence finish?
-    finish_reason: Optional[Dict] = None
+    finish_reason: Optional[Dict[str, Any]] = None
+    # Could be a sequence made up of multimedia content
+    multimodal_content: Optional[MultimediaObject] = None
-    def __add__(self, other: "Sequence") -> "Sequence":
-        return Sequence(self.text + other.text, self.logprob + other.logprob, self.tokens + other.tokens)
+    def __add__(self, other: "GeneratedOutput") -> "GeneratedOutput":
+        return GeneratedOutput(self.text + other.text, self.logprob + other.logprob, self.tokens + other.tokens)
     def render_lines(self) -> List[str]:
         result = [
@@ -172,7 +170,7 @@ class RequestResult:
     embedding: List[float]
     """Fixed dimensional embedding corresponding to the entire prompt"""
-    completions: List[Sequence]
+    completions: List[GeneratedOutput]
     """List of completion"""
     cached: bool
@@ -227,7 +225,7 @@ EMBEDDING_UNAVAILABLE_REQUEST_RESULT = RequestResult(
 )
-def wrap_request_time(compute: Callable[[], Dict[str, Any]]) -> Callable[[], Any]:
+def wrap_request_time(compute: Callable[[], Dict[str, Any]]) -> Callable[[], Dict[str, Any]]:
     """Return a version of `compute` that puts `request_time` into its output."""
     def wrapped_compute():

helm/common/test_general.py CHANGED Viewed

@@ -7,6 +7,7 @@ from helm.common.general import (
     format_split,
     get_file_name,
     unique_simplification,
+    is_url,
 )
@@ -58,3 +59,8 @@ def test_unique_simplification():
 def test_get_file_name():
     assert get_file_name("/path/to/image.png") == "image.png"
+def test_is_url():
+    assert is_url("https://crfm.stanford.edu")
+    assert not is_url("/some/path")

helm/common/tokenization_request.py CHANGED Viewed

@@ -106,7 +106,7 @@ class DecodeRequest:
     tokens: List[int]
     # Which tokenizer we should use
-    tokenizer: str = "huggingface/gpt2"
+    tokenizer: str
     # Whether to clean up the tokenization spaces. Setting to False preserves the original text.
     clean_up_tokenization_spaces: bool = False

crfm-helm 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

Potentially problematic release.

crfm-helm 0.4.0py3-none-any.whl → 0.5.1py3-none-any.whl