PyPI - crfm-helm - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

crfm-helm 0.4.0py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (499) hide show

helm/benchmark/static_build/assets/tsinghua-keg-97d4b395.png ADDED Viewed

Binary file

helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png ADDED Viewed

Binary file

helm/benchmark/static_build/assets/vhelm-model-6d812526.png ADDED Viewed

Binary file

helm/benchmark/static_build/assets/yandex-38e09d70.png ADDED Viewed

Binary file

helm/benchmark/static_build/config.js ADDED Viewed

@@ -0,0 +1,4 @@
+window.RELEASE = "v1.0.0";
+window.BENCHMARK_OUTPUT_BASE_URL =
+	"https://storage.googleapis.com/crfm-helm-public/lite/benchmark_output/";
+window.PROJECT_ID = "lite";

helm/benchmark/static_build/index.html ADDED Viewed

@@ -0,0 +1,20 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/svg+xml" href="./helm.svg" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Holistic Evaluation of Language Models (HELM)</title>
+    <meta name="description" content="The Holistic Evaluation of Language Models (HELM) serves as a living benchmark for transparency in language models. Providing broad coverage and recognizing incompleteness, multi-metric measurements, and standardization. All data and analysis are freely accessible on the website for exploration and study." />
+    <script type="text/javascript" src="./config.js"></script>
+    <script type="module" crossorigin src="./assets/index-737eef9e.js"></script>
+    <link rel="modulepreload" crossorigin href="./assets/react-d4a0b69b.js">
+    <link rel="modulepreload" crossorigin href="./assets/recharts-6d337683.js">
+    <link rel="modulepreload" crossorigin href="./assets/tremor-54a99cc4.js">
+    <link rel="stylesheet" href="./assets/index-878a1094.css">
+  </head>
+  <body class="block">
+    <div id="root"></div>
+  </body>
+</html>

helm/benchmark/test_data_preprocessor.py CHANGED Viewed

@@ -4,14 +4,14 @@ from typing import List
 from helm.benchmark.augmentations.data_augmenter import DataAugmenterSpec
 from helm.benchmark.augmentations.perturbation import PerturbationSpec
 from helm.benchmark.data_preprocessor import DataPreprocessor
-from helm.benchmark.run_specs import get_scenario_spec1
+from helm.benchmark.run_specs.simple_run_specs import get_simple1_spec
 from helm.benchmark.scenarios.scenario import create_scenario, Instance, Scenario, with_instance_ids
 def test_data_preprocessor():
     # Test that each Instance is given a unique ID and is preserved through data augmentation
     data_preprocessor = DataPreprocessor(DataAugmenterSpec())
-    scenario: Scenario = create_scenario(get_scenario_spec1())
+    scenario: Scenario = create_scenario(get_simple1_spec().scenario_spec)
     instances = with_instance_ids(scenario.get_instances(output_path=""))
     instances: List[Instance] = data_preprocessor.preprocess(instances)
     for i, instance in enumerate(instances):
@@ -32,7 +32,7 @@ def test_data_preprocessor_with_data_augmentation():
         should_include_original_eval=True,
     )
     data_preprocessor = DataPreprocessor(data_augmenter_spec)
-    scenario: Scenario = create_scenario(get_scenario_spec1())
+    scenario: Scenario = create_scenario(get_simple1_spec().scenario_spec)
     instances = with_instance_ids(scenario.get_instances(output_path=""))
     instances: List[Instance] = data_preprocessor.preprocess(instances)
     assert len(instances) == 10 + 10 + 10  # original train + original eval + perturbed eval

helm/benchmark/test_run_expander.py CHANGED Viewed

@@ -2,7 +2,7 @@ import unittest
 from helm.benchmark.adaptation.adapter_spec import AdapterSpec
 from helm.benchmark.run_expander import IncreaseMaxTokensRunExpander
-from helm.benchmark.runner import RunSpec
+from helm.benchmark.run_spec import RunSpec
 from helm.benchmark.scenarios.scenario import ScenarioSpec

helm/benchmark/window_services/ai21_window_service.py CHANGED Viewed

@@ -9,12 +9,11 @@ from helm.common.tokenization_request import (
     TokenizationToken,
     TextRange,
 )
-from .window_service import WindowService, EncodeResult
+from .window_service import ConfigurableWindowService, EncodeResult, WindowService
 from .tokenizer_service import TokenizerService
-from .gpt2_window_service import GPT2WindowService
-class AI21WindowService(WindowService):
+class AI21WindowService(ConfigurableWindowService):
     """Tokenizes by making a request to the proxy server with REST endpoint: `/api/tokenize`."""
     # AI21's tokenizer API rejects a tokenization request if the input sequence is too long, so
@@ -32,39 +31,29 @@ class AI21WindowService(WindowService):
         "AI21 only gave API access to their tokenizer, so this method is not supported."
     )
-    def __init__(self, service: TokenizerService, gpt2_window_service: GPT2WindowService):
+    def __init__(
+        self,
+        gpt2_window_service: WindowService,
+        service: TokenizerService,
+        tokenizer_name: str,
+        max_sequence_length: int,
+        max_request_length: Optional[int] = None,
+        max_sequence_and_generated_tokens_length: Optional[int] = None,
+        end_of_text_token: Optional[str] = None,
+        prefix_token: Optional[str] = None,
+    ):
+        super().__init__(
+            tokenizer_name=tokenizer_name,
+            max_sequence_length=max_sequence_length,
+            max_request_length=max_request_length,
+            max_sequence_and_generated_tokens_length=max_sequence_and_generated_tokens_length,
+            end_of_text_token=end_of_text_token,
+            prefix_token=prefix_token,
+        )
         # We need the `TokenizerService` to make requests to the server.
         self.service: TokenizerService = service
         # As explained above, we need a `GPT2WindowService` to help tokenize long text sequences.
-        self.gpt2_window_service: GPT2WindowService = gpt2_window_service
-    @property
-    def tokenizer_name(self) -> str:
-        """Name of the tokenizer to use when sending a request."""
-        return "ai21/j1"
-    @property
-    def max_sequence_length(self) -> int:
-        """
-        The max token length of the model in. The AI21 server automatically prepends a token to every prompt,
-        so the actual max sequence length is 2048-1 = 2047.
-        """
-        return 2047
-    @property
-    def max_request_length(self) -> int:
-        """The max sequence length is the same as the max request length for AI21."""
-        return self.max_sequence_length
-    @property
-    def end_of_text_token(self) -> str:
-        # TODO: I'm not sure what their end of text token is. I don't think it's documented.
-        return " "
-    @property
-    def prefix_token(self) -> str:
-        """AI21 tokenizers do no have a prefix token"""
-        return ""
+        self.gpt2_window_service: WindowService = gpt2_window_service
     def encode(self, text: str, truncation: bool = False, max_length: Optional[int] = None) -> EncodeResult:
         """

helm/benchmark/window_services/cohere_window_service.py CHANGED Viewed

@@ -1,8 +1,7 @@
 from typing import List, Optional
-from helm.proxy.tokenizers.cohere_tokenizer import CohereTokenizer
+from helm.tokenizers.cohere_tokenizer import CohereTokenizer
 from .local_window_service import LocalWindowService
-from .tokenizer_service import TokenizerService
 from .window_service import EncodeResult
 from helm.common.tokenization_request import (
     TokenizationRequest,
@@ -12,47 +11,6 @@ from helm.common.tokenization_request import (
 class CohereWindowService(LocalWindowService):
-    def __init__(self, service: TokenizerService):
-        super().__init__(service)
-    @property
-    def tokenizer_name(self) -> str:
-        return "cohere/cohere"
-    @property
-    def max_sequence_length(self) -> int:
-        """
-        The max length of the model input. Similar to MT-NLG, Cohere does not predict the logprob of
-        the first input token so `max_sequence_length` is one token shorter than `max_request_length`.
-        """
-        return self.max_request_length - 1
-    @property
-    def max_request_length(self) -> int:
-        """
-        The max request length of the model. For Cohere, this is the same as the `max_sequence_length`.
-        If we exceed the `max_sequence_length`, we get the following error:
-        Request failed with too many tokens: total number of tokens (prompt and prediction) cannot
-        exceed 2048 - received 2049. Try using a shorter prompt or a smaller max_tokens value.
-        """
-        return 2048
-    @property
-    def end_of_text_token(self) -> str:
-        """
-        The end of text token. Cohere does not have one.
-        """
-        return ""
-    @property
-    def prefix_token(self) -> str:
-        """
-        The prefix token. Cohere does not return the log prob for the first token when `echo_prompt` is True.
-        """
-        # Cohere recommended ":", but we can try out different values
-        return ":"
     def encode(self, text: str, truncation: bool = False, max_length: Optional[int] = None) -> EncodeResult:
         """
         Encodes the input text to tokens.
@@ -141,23 +99,3 @@ class CohereWindowService(LocalWindowService):
             result = result[:-1]
         return result
-class CohereCommandWindowService(CohereWindowService):
-    def __init__(self, service: TokenizerService):
-        super().__init__(service)
-    @property
-    def max_request_length(self) -> int:
-        """
-        The max request length of the model. For Cohere, this is the same as the `max_sequence_length`.
-        If we exceed the `max_sequence_length`, we get the following error:
-        Request failed with too many tokens: total number of tokens (prompt and prediction) cannot
-        exceed 2048 - received 2049. Try using a shorter prompt or a smaller max_tokens value.
-        For the Command model, in rare situations, the co.tokenize returns a shorter list of tokens
-        than the co.generate. This causes sequence length errors for rare inputs. Cohere's advice is
-        to reduce the sequence length to 2020 to avoid these issues.
-        """
-        return 2020

helm/benchmark/window_services/default_window_service.py CHANGED Viewed

@@ -1,48 +1,6 @@
-from typing import Optional
-from .window_service import INT_MAX
 from .local_window_service import LocalWindowService
-from .tokenizer_service import TokenizerService
 class DefaultWindowService(LocalWindowService):
-    def __init__(
-        self,
-        service: TokenizerService,
-        tokenizer_name: str,
-        max_sequence_length: int,
-        max_request_length: Optional[int] = None,
-        max_sequence_and_generated_tokens_length: Optional[int] = None,
-        end_of_text_token: Optional[str] = None,
-        prefix_token: Optional[str] = None,
-    ):
-        super().__init__(service)
-        self._tokenizer_name = tokenizer_name
-        self._max_sequence_length = max_sequence_length
-        self._max_request_length = max_request_length or max_sequence_length
-        self._max_sequence_and_generated_tokens_length = max_sequence_and_generated_tokens_length or INT_MAX
-        self._end_of_text_token = end_of_text_token or ""
-        self._prefix_token = prefix_token or ""
-    @property
-    def tokenizer_name(self) -> str:
-        return self._tokenizer_name
-    @property
-    def max_sequence_length(self) -> int:
-        return self._max_sequence_length
-    @property
-    def max_request_length(self) -> int:
-        return self._max_request_length
-    @property
-    def max_sequence_and_generated_tokens_length(self) -> int:
-        return self._max_sequence_and_generated_tokens_length
-    @property
-    def end_of_text_token(self) -> str:
-        return self._end_of_text_token
-    @property
-    def prefix_token(self) -> str:
-        return self._prefix_token
+    # TODO: Delete this WindowService.
+    pass

helm/benchmark/window_services/encoder_decoder_window_service.py CHANGED Viewed

@@ -2,20 +2,9 @@ from abc import ABC
 from helm.common.hierarchical_logger import hlog
 from .local_window_service import LocalWindowService
-from .tokenizer_service import TokenizerService
 class EncoderDecoderWindowService(LocalWindowService, ABC):
-    def __init__(self, service: TokenizerService):
-        super().__init__(service)
-    @property
-    def max_request_length(self) -> int:
-        """
-        Return the max request length. We set the max requests length to be `max_sequence_length`.
-        """
-        return self.max_sequence_length
     @property
     def max_output_length(self) -> int:
         """

helm/benchmark/window_services/ice_window_service.py CHANGED Viewed

@@ -1,41 +1,7 @@
 from .local_window_service import LocalWindowService
-from .tokenizer_service import TokenizerService
 class ICEWindowService(LocalWindowService):
-    def __init__(self, service: TokenizerService):
-        super().__init__(service)
-    @property
-    def tokenizer_name(self) -> str:
-        return "TsinghuaKEG/ice"
-    @property
-    def max_sequence_length(self) -> int:
-        """
-        The max length of the model input.
-        According to https://github.com/THUDM/GLM-130B, the max sequence length is 2048.
-        """
-        return 2048
-    @property
-    def max_request_length(self) -> int:
-        return self.max_sequence_length + 1
-    @property
-    def end_of_text_token(self) -> str:
-        """The end of text token."""
-        # Followed up in https://github.com/THUDM/icetk/issues/1
-        return "</s>"
-    @property
-    def prefix_token(self) -> str:
-        """
-        The prefix token.
-        Inference with echo=True is not feasible, so just set it to the empty string.
-        """
-        return ""
     def truncate_from_right(self, text: str, expected_completion_token_length: int = 0) -> str:
         """
         Truncates text from the right to fit within the context window given by `max_request_length`

helm/benchmark/window_services/image_generation/__init__.py ADDED Viewed

File without changes

helm/benchmark/window_services/image_generation/clip_window_service.py ADDED Viewed

@@ -0,0 +1,15 @@
+from abc import ABC
+from helm.benchmark.window_services.local_window_service import LocalWindowService
+class CLIPWindowService(LocalWindowService, ABC):
+    def truncate_from_right(self, text: str, expected_completion_token_length: int = 0) -> str:
+        result: str = self.decode(self.encode(text, truncation=True, max_length=self.max_request_length).tokens)
+        # HACK: For the vast majority of cases, the above logic works, but there are a few where the
+        # token count exceeds `max_length` by 1.
+        while not self.fits_within_context_window(result):
+            result = result[:-1]
+        return result

helm/benchmark/window_services/image_generation/lexica_search_window_service.py ADDED Viewed

@@ -0,0 +1,9 @@
+from .clip_window_service import CLIPWindowService
+class LexicaSearchWindowService(CLIPWindowService):
+    def fits_within_context_window(self, text: str, expected_completion_token_length: int = 0) -> bool:
+        return len(text) <= self.max_sequence_length
+    def truncate_from_right(self, text: str, expected_completion_token_length: int = 0) -> str:
+        return text[: self.max_sequence_length]

helm/benchmark/window_services/image_generation/openai_dalle_window_service.py ADDED Viewed

@@ -0,0 +1,9 @@
+from .clip_window_service import CLIPWindowService
+class OpenAIDALLEWindowService(CLIPWindowService):
+    def fits_within_context_window(self, text: str, expected_completion_token_length: int = 0) -> bool:
+        return len(text) <= self.max_sequence_length
+    def truncate_from_right(self, text: str, expected_completion_token_length: int = 0) -> str:
+        return text[: self.max_sequence_length]

helm/benchmark/window_services/image_generation/test_clip_window_service.py ADDED Viewed

@@ -0,0 +1,29 @@
+import shutil
+import tempfile
+from helm.benchmark.window_services.tokenizer_service import TokenizerService
+from helm.benchmark.window_services.test_utils import get_tokenizer_service
+from helm.benchmark.window_services.window_service_factory import WindowServiceFactory
+from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
+class TestCLIPWindowService:
+    def setup_method(self):
+        self.path: str = tempfile.mkdtemp()
+        service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
+        self.window_service = WindowServiceFactory.get_window_service("huggingface/dreamlike-photoreal-v2-0", service)
+    def teardown_method(self, method):
+        shutil.rmtree(self.path)
+    def test_truncate_from_right(self):
+        example_text: str = (
+            "an instqrumemnt used for cutting cloth , paper , axdz othr thdin mteroial , "
+            "consamistng of two blades lad one on tvopb of the other and fhastned in tle mixdqdjle "
+            "so as to bllow them txo be pened and closed by thumb and fitngesr inserted tgrough rings on"
+        )
+        assert not self.window_service.fits_within_context_window(example_text)
+        # Truncate and ensure it fits within the context window
+        truncated_prompt: str = self.window_service.truncate_from_right(example_text)
+        assert self.window_service.fits_within_context_window(truncated_prompt)

helm/benchmark/window_services/image_generation/test_openai_dalle_window_service.py ADDED Viewed

@@ -0,0 +1,30 @@
+import shutil
+import tempfile
+from helm.benchmark.window_services.tokenizer_service import TokenizerService
+from helm.clients.image_generation.dalle2_client import DALLE2Client
+from helm.benchmark.window_services.test_utils import get_tokenizer_service, TEST_PROMPT
+from helm.benchmark.window_services.window_service_factory import WindowServiceFactory
+from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
+class TestOpenAIDALLEWindowService:
+    def setup_method(self):
+        self.path: str = tempfile.mkdtemp()
+        service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
+        self.window_service = WindowServiceFactory.get_window_service("openai/dall-e-2", service)
+    def teardown_method(self, method):
+        shutil.rmtree(self.path)
+    def test_fits_within_context_window(self):
+        assert self.window_service.fits_within_context_window(TEST_PROMPT)
+    def test_truncate_from_right(self):
+        long_prompt: str = TEST_PROMPT * 10
+        assert not self.window_service.fits_within_context_window(long_prompt)
+        # Truncate and ensure it fits within the context window
+        truncated_long_prompt: str = self.window_service.truncate_from_right(long_prompt)
+        assert len(truncated_long_prompt) == DALLE2Client.MAX_PROMPT_LENGTH
+        assert self.window_service.fits_within_context_window(truncated_long_prompt)

helm/benchmark/window_services/local_window_service.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from abc import ABC
 from typing import List, Optional, cast
-from .window_service import WindowService, EncodeResult
+from .window_service import ConfigurableWindowService, EncodeResult
 from .tokenizer_service import TokenizerService
 from helm.common.tokenization_request import (
     DecodeRequest,
@@ -10,11 +10,28 @@ from helm.common.tokenization_request import (
     TokenizationRequestResult,
     TokenizationToken,
 )
-from helm.proxy.clients.client import cleanup_tokens
+from helm.clients.client import cleanup_tokens
-class LocalWindowService(WindowService, ABC):
-    def __init__(self, service: TokenizerService):
+class LocalWindowService(ConfigurableWindowService, ABC):
+    def __init__(
+        self,
+        service: TokenizerService,
+        tokenizer_name: str,
+        max_sequence_length: int,
+        max_request_length: Optional[int] = None,
+        max_sequence_and_generated_tokens_length: Optional[int] = None,
+        end_of_text_token: Optional[str] = None,
+        prefix_token: Optional[str] = None,
+    ):
+        super().__init__(
+            tokenizer_name=tokenizer_name,
+            max_sequence_length=max_sequence_length,
+            max_request_length=max_request_length,
+            max_sequence_and_generated_tokens_length=max_sequence_and_generated_tokens_length,
+            end_of_text_token=end_of_text_token,
+            prefix_token=prefix_token,
+        )
         self.service: TokenizerService = service
     def encode(self, text: str, truncation: bool = False, max_length: Optional[int] = None) -> EncodeResult:

helm/benchmark/window_services/test_anthropic_window_service.py CHANGED Viewed

@@ -2,6 +2,7 @@ import shutil
 import tempfile
 from typing import List
+from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
 from .tokenizer_service import TokenizerService
 from .window_service_factory import WindowServiceFactory
 from .test_utils import get_tokenizer_service, TEST_PROMPT
@@ -120,7 +121,7 @@ class TestAnthropicWindowService:
     def setup_method(self):
         self.path: str = tempfile.mkdtemp()
-        service: TokenizerService = get_tokenizer_service(self.path)
+        service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
         self.window_service = WindowServiceFactory.get_window_service("anthropic/claude-v1.3", service)
     def teardown_method(self, method):

helm/benchmark/window_services/test_bloom_window_service.py CHANGED Viewed

@@ -2,6 +2,7 @@ import shutil
 import tempfile
 from typing import List
+from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
 from .tokenizer_service import TokenizerService
 from .window_service_factory import WindowServiceFactory
 from .test_utils import get_tokenizer_service, TEST_PROMPT
@@ -64,7 +65,7 @@ class TestBloomWindowService:
     def setup_method(self):
         self.path: str = tempfile.mkdtemp()
-        service: TokenizerService = get_tokenizer_service(self.path)
+        service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
         self.window_service = WindowServiceFactory.get_window_service("together/bloom", service)
     def teardown_method(self, method):

helm/benchmark/window_services/test_cohere_window_service.py CHANGED Viewed

@@ -6,6 +6,7 @@ from typing import List
 from sqlitedict import SqliteDict
+from helm.common.cache_backend_config import SqliteCacheBackendConfig
 from helm.common.general import ensure_directory_exists
 from .test_cohere_window_service_utils import REQUESTS_TO_RESPONSES, TEST_PROMPT, TOKENIZED_PROMPT
 from .tokenizer_service import TokenizerService
@@ -30,7 +31,7 @@ class TestCohereWindowService:
         with open(os.path.join(cls.path, "credentials.conf"), "w") as f:
             f.write("cohereApiKey: secret")
-        service: TokenizerService = get_tokenizer_service(cls.path)
+        service: TokenizerService = get_tokenizer_service(cls.path, SqliteCacheBackendConfig(cache_path))
         cls.window_service = WindowServiceFactory.get_window_service("cohere/xlarge-20220609", service)
         cls.prompt: str = TEST_PROMPT
         cls.tokenized_prompt: List[str] = TOKENIZED_PROMPT

helm/benchmark/window_services/test_flan_t5_window_service.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import tempfile
+from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
 from helm.benchmark.window_services.test_t511b_window_service import TestT511bWindowService
 from helm.benchmark.window_services.window_service_factory import TokenizerService, WindowServiceFactory
 from helm.benchmark.window_services.test_utils import get_tokenizer_service
@@ -8,5 +9,5 @@ from helm.benchmark.window_services.test_utils import get_tokenizer_service
 class TestFlanT5WindowService(TestT511bWindowService):
     def setup_method(self):
         self.path: str = tempfile.mkdtemp()
-        service: TokenizerService = get_tokenizer_service(self.path)
+        service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
         self.window_service = WindowServiceFactory.get_window_service("together/flan-t5-xxl", service)

helm/benchmark/window_services/test_gpt2_window_service.py CHANGED Viewed

@@ -2,7 +2,7 @@ import shutil
 import tempfile
 from helm.benchmark.window_services.tokenizer_service import TokenizerService
+from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
 from .test_utils import get_tokenizer_service, TEST_PROMPT, GPT2_TEST_TOKENS, GPT2_TEST_TOKEN_IDS
 from .window_service_factory import WindowServiceFactory
@@ -10,7 +10,7 @@ from .window_service_factory import WindowServiceFactory
 class TestGPT2WindowService:
     def setup_method(self):
         self.path: str = tempfile.mkdtemp()
-        service: TokenizerService = get_tokenizer_service(self.path)
+        service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
         self.window_service = WindowServiceFactory.get_window_service("huggingface/gpt2", service)
     def teardown_method(self, method):

helm/benchmark/window_services/test_gpt4_window_service.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import shutil
 import tempfile
+from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
 from .test_utils import get_tokenizer_service, TEST_PROMPT, GPT4_TEST_TOKEN_IDS, GPT4_TEST_TOKENS
 from .tokenizer_service import TokenizerService
 from .window_service_factory import WindowServiceFactory
@@ -9,7 +10,7 @@ from .window_service_factory import WindowServiceFactory
 class TestOpenAIWindowService:
     def setup_method(self):
         self.path: str = tempfile.mkdtemp()
-        service: TokenizerService = get_tokenizer_service(self.path)
+        service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
         self.window_service = WindowServiceFactory.get_window_service("openai/gpt-3.5-turbo-0301", service)
     def teardown_method(self, method):

helm/benchmark/window_services/test_gptj_window_service.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import shutil
 import tempfile
+from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
 from .tokenizer_service import TokenizerService
 from .window_service_factory import WindowServiceFactory
 from .test_utils import get_tokenizer_service, GPT2_TEST_TOKENS, GPT2_TEST_TOKEN_IDS, TEST_PROMPT
@@ -9,8 +10,8 @@ from .test_utils import get_tokenizer_service, GPT2_TEST_TOKENS, GPT2_TEST_TOKEN
 class TestGPTJWindowService:
     def setup_method(self):
         self.path: str = tempfile.mkdtemp()
-        service: TokenizerService = get_tokenizer_service(self.path)
-        self.window_service = WindowServiceFactory.get_window_service("together/gpt-j-6b", service)
+        service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
+        self.window_service = WindowServiceFactory.get_window_service("huggingface/gpt-j-6b", service)
     def teardown_method(self, method):
         shutil.rmtree(self.path)

helm/benchmark/window_services/test_gptneox_window_service.py CHANGED Viewed

@@ -2,6 +2,7 @@ import shutil
 import tempfile
 from typing import List
+from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
 from .tokenizer_service import TokenizerService
 from .window_service_factory import WindowServiceFactory
 from .test_utils import get_tokenizer_service, TEST_PROMPT
@@ -65,8 +66,8 @@ class TestGPTNeoXWindowService:
     def setup_method(self):
         self.path: str = tempfile.mkdtemp()
-        service: TokenizerService = get_tokenizer_service(self.path)
-        self.window_service = WindowServiceFactory.get_window_service("together/gpt-neox-20b", service)
+        service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
+        self.window_service = WindowServiceFactory.get_window_service("huggingface/gpt-neox-20b", service)
     def teardown_method(self, method):
         shutil.rmtree(self.path)

helm/benchmark/window_services/test_ice_window_service.py CHANGED Viewed

@@ -2,6 +2,7 @@ import shutil
 import tempfile
 from typing import List
+from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
 from .tokenizer_service import TokenizerService
 from .window_service_factory import WindowServiceFactory
 from .test_utils import get_tokenizer_service, TEST_PROMPT
@@ -64,7 +65,7 @@ class TestICEWindowService:
     def setup_method(self):
         self.path: str = tempfile.mkdtemp()
-        service: TokenizerService = get_tokenizer_service(self.path)
+        service: TokenizerService = get_tokenizer_service(self.path, BlackHoleCacheBackendConfig())
         self.window_service = WindowServiceFactory.get_window_service("together/glm", service)
     def teardown_method(self, method):

crfm-helm 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

Potentially problematic release.

crfm-helm 0.4.0py3-none-any.whl → 0.5.1py3-none-any.whl