PyPI - crfm-helm - Versions diffs - 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (546) hide show

helm/benchmark/augmentations/person_name_perturbation.py CHANGED Viewed

@@ -263,13 +263,6 @@ class PersonNamePerturbation(Perturbation):
         name = rng.choice(list(options))
         return name
-    def perturb(self, text: str, rng: Random) -> str:
-        """
-        Perturbing the text is handled in `perturb_with_persistency` to ensure that perturbed names
-        in `Instance`s and `Reference`s match.
-        """
-        raise NotImplementedError("Should never be called")
     def perturb_with_persistency(
         self, text: str, rng: Random, name_substitution_mapping: Dict[str, str], skipped_tokens: Set[str]
     ) -> str:

helm/benchmark/augmentations/perturbation.py CHANGED Viewed

@@ -10,7 +10,6 @@ from helm.common.object_spec import ObjectSpec, create_object
 class Perturbation(ABC):
     # Unique name to describe perturbation
     name: str
@@ -28,17 +27,24 @@ class Perturbation(ABC):
         # If seed exists, use it as part of the random seed
         return Random(instance.id if seed is None else str(seed) + instance.id)
+    @abstractmethod
+    def apply(self, instance: Instance, seed: Optional[int] = None) -> Instance:
+        """Generate a modified instance from the input instance."""
+        pass
+class TextPerturbation(Perturbation, ABC):
     def apply(self, instance: Instance, seed: Optional[int] = None) -> Instance:
         """
-        Generates a new Instance by perturbing the input, tagging the Instance and perturbing the References,
-        if should_perturb_references is true. Initializes a random number generator based on instance_id that gets
-        passed to perturb and perturb_references.
+        Generates a new Instance by applying `perturb` to the input and (if requested) the references.
+        Initializes a random number generator based on instance_id that gets
+        passed to perturb.
         """
         rng: Random = self.get_rng(instance, seed)
         references: List[Reference] = instance.references
         if self.should_perturb_references:
-            references = [self.perturb_reference(reference, rng) for reference in references]
+            references = [self._perturb_reference(reference, rng) for reference in references]
         description = replace(self.description, seed=seed)
@@ -49,11 +55,18 @@ class Perturbation(ABC):
             input=Input(text=self.perturb(instance.input.text, rng)),
             references=references,
             perturbation=description,
+            contrast_inputs=[instance.input],
         )
-    def perturb_reference(self, reference: Reference, rng: Random) -> Reference:
+    def _perturb_reference(self, reference: Reference, rng: Random) -> Reference:
         """Generates a new Reference by perturbing the output and tagging the Reference."""
-        return replace(reference, output=Output(text=self.perturb(reference.output.text, rng)), tags=reference.tags)
+        return replace(
+            reference,
+            output=Output(
+                text=self.perturb(reference.output.text, rng), multimedia_content=reference.output.multimedia_content
+            ),
+            tags=reference.tags,
+        )
     @abstractmethod
     def perturb(self, text: str, rng: Random) -> str:

helm/benchmark/augmentations/perturbation_description.py CHANGED Viewed

@@ -23,7 +23,7 @@ class PerturbationDescription:
     computed_on: str = PERTURBATION_PERTURBED
     """Which types of Instances we are evaluating, to be populated during metric evaluation. PERTURBATION_PERTURBED
     (default) means we are evaluating on perturbed instances, PERTURBATION_ORIGINAL means we are evaluating the
-    unperturbed version of instances where this perturbation appplies, and, PERTURBATION_WORST means the the minimum
+    unperturbed version of instances where this perturbation applies, and, PERTURBATION_WORST means the the minimum
     metric between the two."""
     seed: Optional[int] = None

helm/benchmark/augmentations/space_perturbation.py CHANGED Viewed

@@ -2,11 +2,11 @@ from dataclasses import dataclass
 from random import Random
 import re
-from .perturbation import Perturbation
+from .perturbation import TextPerturbation
 from .perturbation_description import PerturbationDescription
-class SpacePerturbation(Perturbation):
+class SpacePerturbation(TextPerturbation):
     """
     A simple perturbation that replaces existing spaces with 0-max_spaces spaces (thus potentially merging words).
     """

helm/benchmark/augmentations/suffix_perturbation.py ADDED Viewed

@@ -0,0 +1,29 @@
+from dataclasses import dataclass
+from random import Random
+from .perturbation import TextPerturbation
+from .perturbation_description import PerturbationDescription
+class SuffixPerturbation(TextPerturbation):
+    """
+    Appends a suffix to the end of the text. Example:
+    A picture of a dog -> A picture of a dog, picasso
+    """
+    @dataclass(frozen=True)
+    class Description(PerturbationDescription):
+        suffix: str = ""
+    name: str = "style"
+    def __init__(self, suffix: str):
+        self._suffix: str = suffix
+    @property
+    def description(self) -> PerturbationDescription:
+        return SuffixPerturbation.Description(name=self.name, suffix=self._suffix)
+    def perturb(self, text: str, rng: Random) -> str:
+        return f"{text}, {self._suffix}"

helm/benchmark/augmentations/synonym_perturbation.py CHANGED Viewed

@@ -11,10 +11,10 @@ import spacy
 from helm.common.general import match_case, ensure_file_downloaded
 from .perturbation_description import PerturbationDescription
-from .perturbation import Perturbation
+from .perturbation import TextPerturbation
-class SynonymPerturbation(Perturbation):
+class SynonymPerturbation(TextPerturbation):
     """
     Synonyms. For implementation details, see
     https://github.com/GEM-benchmark/NL-Augmenter/blob/main/nlaugmenter/transformations/synonym_substitution/transformation.py

helm/benchmark/augmentations/test_perturbation.py CHANGED Viewed

@@ -15,6 +15,7 @@ from .space_perturbation import SpacePerturbation
 from .dialect_perturbation import DialectPerturbation
 from .person_name_perturbation import PersonNamePerturbation
 from .gender_perturbation import GenderPerturbation
+from .suffix_perturbation import SuffixPerturbation
 def test_extra_space_perturbation():
@@ -145,7 +146,6 @@ def test_space_perturbation():
     instance: Instance = Instance(id="id0", input=Input(text="Hello World!\nQuite a day, huh?"), references=[])
     instances: List[Instance] = data_augmenter.generate([instance], include_original=True)
-    print(instances)
     assert len(instances) == 2
     assert instances[1].perturbation.name == "space"
     assert instances[1].input.text == "Hello   World!\nQuite a  day,   huh?"
@@ -162,7 +162,6 @@ def test_dialect_perturbation():
     )
     instances: List[Instance] = data_augmenter.generate([instance], include_original=True)
-    print(instances)
     assert len(instances) == 2
     assert instances[1].perturbation.name == "dialect"
     assert instances[1].input.text == "I gon remember dis day to b the best day of mah life."
@@ -188,7 +187,6 @@ def test_person_name_perturbation():
     )
     instances: List[Instance] = data_augmenter.generate([instance], include_original=True)
-    print(instances)
     assert len(instances) == 2
     assert instances[1].perturbation.name == "person_name"
     assert (
@@ -209,7 +207,6 @@ def test_gender_pronoun_perturbation():
     )
     instances: List[Instance] = data_augmenter.generate([instance], include_original=True)
-    print(instances)
     assert len(instances) == 2
     assert instances[1].perturbation.mode == "pronouns"
     assert instances[1].input.text == "Did she mention that she was coming with her parents and their friends?"
@@ -227,13 +224,22 @@ def test_gender_term_perturbation():
     )
     instances: List[Instance] = data_augmenter.generate([instance], include_original=True)
-    print(instances)
     assert len(instances) == 2
     assert instances[1].perturbation.mode == "terms"
     assert instances[1].input.text == "His granddaughters looked a lot like their mom."
     assert instances[1].references[0].output.text == "How did their mother look like?"
+def test_suffix_perturbation():
+    data_augmenter = DataAugmenter(perturbations=[SuffixPerturbation(suffix="pixel art")])
+    instance: Instance = Instance(id="id0", input=Input(text="A blue dog"), references=[])
+    instances: List[Instance] = data_augmenter.generate([instance], include_original=True)
+    assert len(instances) == 2
+    assert instances[1].perturbation.suffix == "pixel art"
+    assert instances[1].input.text == "A blue dog, pixel art"
 # TODO(#1958) Fix the logic to renable this test
 @unittest.skip("Currently cannot replace words at either text boundary.")
 def test_gender_term_perturbation_edge_word():
@@ -247,7 +253,6 @@ def test_gender_term_perturbation_edge_word():
     )
     instances: List[Instance] = data_augmenter.generate([instance], include_original=False)
-    print(instances)
     assert len(instances) == 1
     assert instances[0].input.text == "mom said it is okay"
     assert instances[0].references[0].output.text == "Sure he did daughter"
@@ -266,6 +271,5 @@ def test_gender_term_perturbation_consequtive_words():
     )
     instances: List[Instance] = data_augmenter.generate([instance], include_original=False)
-    print(instances)
     assert len(instances) == 1
     assert instances[0].input.text == "I'm a mom mom: my daughter has a daughter."

helm/benchmark/augmentations/translate_perturbation.py ADDED Viewed

@@ -0,0 +1,30 @@
+from dataclasses import dataclass
+from random import Random
+from helm.clients.google_translate_client import GoogleTranslateClient
+from .perturbation import TextPerturbation
+from .perturbation_description import PerturbationDescription
+class TranslatePerturbation(TextPerturbation):
+    """
+    Translates to different languages.
+    """
+    @dataclass(frozen=True)
+    class Description(PerturbationDescription):
+        # Language code to translate to. Needs a default value since we inherit from `PerturbationDescription`
+        language_code: str = "zh-CN"
+    name: str = "translate"
+    def __init__(self, language_code: str):
+        self.language_code: str = language_code
+        self.google_translate_client = GoogleTranslateClient()
+    @property
+    def description(self) -> PerturbationDescription:
+        return TranslatePerturbation.Description(name=self.name, language_code=self.language_code)
+    def perturb(self, text: str, rng: Random) -> str:
+        return self.google_translate_client.translate(text, self.language_code)

helm/benchmark/augmentations/typos_perturbation.py CHANGED Viewed

@@ -2,10 +2,10 @@ from dataclasses import dataclass
 from random import Random
 from .perturbation_description import PerturbationDescription
-from .perturbation import Perturbation
+from .perturbation import TextPerturbation
-class TyposPerturbation(Perturbation):
+class TyposPerturbation(TextPerturbation):
     """
     Typos. For implementation details, see
     https://github.com/GEM-benchmark/NL-Augmenter/tree/main/transformations/butter_fingers_perturbation

helm/benchmark/config_registry.py ADDED Viewed

@@ -0,0 +1,38 @@
+import os
+import importlib_resources as resources
+from helm.benchmark.model_deployment_registry import register_model_deployments_from_path
+from helm.benchmark.model_metadata_registry import register_model_metadata_from_path
+from helm.benchmark.tokenizer_config_registry import register_tokenizer_configs_from_path
+from helm.benchmark.runner_config_registry import register_runner_config_from_path
+MODEL_METADATA_FILE: str = "model_metadata.yaml"
+TOKENIZER_CONFIGS_FILE: str = "tokenizer_configs.yaml"
+MODEL_DEPLOYMENTS_FILE: str = "model_deployments.yaml"
+RUNNER_CONFIG_FILE: str = "runner_config.yaml"
+CONFIG_PACKAGE = "helm.config"
+def register_configs_from_directory(dir_path: str) -> None:
+    model_metadata_path = os.path.join(dir_path, MODEL_METADATA_FILE)
+    if os.path.isfile(model_metadata_path):
+        register_model_metadata_from_path(model_metadata_path)
+    tokenizer_configs_path = os.path.join(dir_path, TOKENIZER_CONFIGS_FILE)
+    if os.path.isfile(tokenizer_configs_path):
+        register_tokenizer_configs_from_path(tokenizer_configs_path)
+    model_deployments_path = os.path.join(dir_path, MODEL_DEPLOYMENTS_FILE)
+    if os.path.isfile(model_deployments_path):
+        register_model_deployments_from_path(model_deployments_path)
+    runner_config_path = os.path.join(dir_path, RUNNER_CONFIG_FILE)
+    if os.path.isfile(runner_config_path):
+        register_runner_config_from_path(runner_config_path)
+def register_builtin_configs_from_helm_package() -> None:
+    package_path = str(resources.files(CONFIG_PACKAGE))
+    register_configs_from_directory(package_path)

helm/benchmark/executor.py CHANGED Viewed

@@ -1,9 +1,15 @@
 from typing import Optional
 from dataclasses import dataclass, replace
+from helm.common.cache_backend_config import (
+    CacheBackendConfig,
+    BlackHoleCacheBackendConfig,
+    MongoCacheBackendConfig,
+    SqliteCacheBackendConfig,
+)
 from helm.common.general import parallel_map
 from helm.common.hierarchical_logger import htrack, hlog
-from helm.common.request import RequestResult, Sequence
+from helm.common.request import RequestResult, GeneratedOutput
 from helm.common.authentication import Authentication
 from helm.proxy.services.remote_service import RemoteService
 from helm.proxy.services.server_service import ServerService
@@ -18,28 +24,36 @@ class ExecutorError(Exception):
 @dataclass(frozen=True)
 class ExecutionSpec:
-    # If non-empty, URL of the proxy server we send requests to (e.g., http://localhost:1959).
     url: Optional[str]
+    """If non-empty, URL of the proxy server we send requests to (e.g., http://localhost:1959)."""
-    # Pass into the service
     auth: Authentication
+    """Authentication that will be passed into the local service, if using the local service."""
-    # Path where API credentials and cache is stored.
-    # This path is the same as `--base-path` when launching the proxy server (see server.py).
-    # Required when url is not set.
     local_path: Optional[str]
+    """Path where API credentials and cache is stored.
+    This path is the same as `--base-path` when launching the proxy server (see server.py).
+    Required when url is not set."""
-    # How many threads to have at once
     parallelism: int
+    """How many threads to have at once"""
-    # Whether to skip execution
     dry_run: bool = False
+    """Whether to skip execution"""
+    sqlite_cache_backend_config: Optional[SqliteCacheBackendConfig] = None
+    """If set, SQLite will be used for the cache.
+    This specifies the directory in which the SQLite cache will store files.
+    At most one of sqlite_cache_backend_config and mongo_cache_backend_config can be set."""
-    # URL to the MongoDB database.
-    # If non-empty, the MongoDB database will be used for caching instead of SQLite.
-    # Example format: mongodb://[username:password@]host1[:port1]/[dbname]
-    # For full format, see: https://www.mongodb.com/docs/manual/reference/connection-string/
-    mongo_uri: str = ""
+    mongo_cache_backend_config: Optional[MongoCacheBackendConfig] = None
+    """If set, MongoDB will be used for the cache.
+    This specifies the MongoDB database to be used by the MongoDB cache.
+    At most one of sqlite_cache_backend_config and mongo_cache_backend_config can be set."""
 class Executor:
@@ -51,6 +65,16 @@ class Executor:
     def __init__(self, execution_spec: ExecutionSpec):
         self.execution_spec = execution_spec
+        cache_backend_config: CacheBackendConfig
+        if execution_spec.sqlite_cache_backend_config and execution_spec.mongo_cache_backend_config:
+            raise ExecutorError("At most one of sqlite_cache_backend_config and mongo_cache_backend_config can be set.")
+        elif execution_spec.sqlite_cache_backend_config:
+            cache_backend_config = execution_spec.sqlite_cache_backend_config
+        elif execution_spec.mongo_cache_backend_config:
+            cache_backend_config = execution_spec.mongo_cache_backend_config
+        else:
+            cache_backend_config = BlackHoleCacheBackendConfig()
         self.service: Service
         if execution_spec.url:
             hlog(f"Running using remote API proxy server: {execution_spec.url}")
@@ -58,7 +82,9 @@ class Executor:
         elif execution_spec.local_path:
             hlog(f"Running in local mode with base path: {execution_spec.local_path}")
             self.service = ServerService(
-                base_path=execution_spec.local_path, root_mode=True, mongo_uri=execution_spec.mongo_uri
+                base_path=execution_spec.local_path,
+                root_mode=True,
+                cache_backend_config=cache_backend_config,
             )
         else:
             raise ValueError("Either the proxy server URL or the local path must be set")
@@ -77,7 +103,11 @@ class Executor:
         )
         hlog(f"Processed {len(request_states)} requests")
-        return ScenarioState(scenario_state.adapter_spec, request_states)
+        return ScenarioState(
+            adapter_spec=scenario_state.adapter_spec,
+            request_states=request_states,
+            annotator_specs=scenario_state.annotator_specs,
+        )
     def process(self, state: RequestState) -> RequestState:
         try:
@@ -87,7 +117,7 @@ class Executor:
         if not result.success:
             if result.error_flags and not result.error_flags.is_fatal:
                 hlog(f"WARNING: Non-fatal error treated as empty completion: {result.error}")
-                result.completions = [Sequence(text="", logprob=0, tokens=[])]
+                result.completions = [GeneratedOutput(text="", logprob=0, tokens=[])]
             else:
                 raise ExecutorError(f"{str(result.error)} Request: {state.request}")
         return replace(state, result=result)

helm/benchmark/huggingface_registration.py CHANGED Viewed

@@ -4,10 +4,16 @@ from typing import Optional
 from helm.benchmark.model_deployment_registry import (
     ClientSpec,
     ModelDeployment,
-    WindowServiceSpec,
     register_model_deployment,
 )
+from helm.benchmark.model_metadata_registry import (
+    get_model_metadata,
+    get_unknown_model_metadata,
+    register_model_metadata,
+)
 from helm.benchmark.tokenizer_config_registry import TokenizerConfig, TokenizerSpec, register_tokenizer_config
+from helm.common.hierarchical_logger import hlog
+from helm.tokenizers.huggingface_tokenizer import HuggingFaceTokenizer
 def register_huggingface_model(
@@ -17,26 +23,50 @@ def register_huggingface_model(
     if revision:
         object_spec_args["revision"] = revision
+    # Auto-infer model properties from the tokenizer.
+    with HuggingFaceTokenizer.create_tokenizer(**object_spec_args) as tokenizer:
+        max_sequence_length = tokenizer.model_max_length
+        end_of_text_token = tokenizer.eos_token or ""
+        prefix_token = tokenizer.bos_token or ""
+    # If the tokenizer config has a model_max_length of 1000000000000000019884624838656
+    # it means that model creator did not specify model_max_length.
+    if max_sequence_length > 1_000_000:
+        raise ValueError(
+            f"Could not infer the model_max_length of Hugging Face model {pretrained_model_name_or_path}, so "
+            f"--enable-huggingface-models and --enable-local-huggingface-models cannot be used for this model. "
+            f"Please configure the model using prod_env/model_deployments.yaml instead."
+        )
     model_deployment = ModelDeployment(
         name=helm_model_name,
         client_spec=ClientSpec(
-            class_name="helm.proxy.clients.huggingface_client.HuggingFaceClient",
+            class_name="helm.clients.huggingface_client.HuggingFaceClient",
             args=object_spec_args,
         ),
         model_name=helm_model_name,
         tokenizer_name=helm_model_name,
-        window_service_spec=WindowServiceSpec(
-            class_name="helm.benchmark.window_services.huggingface_window_service.HuggingFaceWindowService",
-            args=object_spec_args,
-        ),
+        max_sequence_length=max_sequence_length,
     )
+    # We check if the model is already registered because we don't want to
+    # overwrite the model metadata if it's already registered.
+    # If it's not registered, we register it, as otherwise an error would be thrown
+    # when we try to register the model deployment.
+    try:
+        _ = get_model_metadata(model_name=helm_model_name)
+    except ValueError:
+        register_model_metadata(get_unknown_model_metadata(helm_model_name))
+        hlog(f"Registered default metadata for model {helm_model_name}")
     register_model_deployment(model_deployment)
     tokenizer_config = TokenizerConfig(
         name=helm_model_name,
         tokenizer_spec=TokenizerSpec(
-            class_name="helm.proxy.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer",
+            class_name="helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer",
             args=object_spec_args,
         ),
+        end_of_text_token=end_of_text_token,
+        prefix_token=prefix_token,
     )
     register_tokenizer_config(tokenizer_config)

crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl