PyPI - crfm-helm - Versions diffs - 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

crfm-helm 0.2.1py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

{crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/METADATA +10 -8
{crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/RECORD +50 -37
{crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/WHEEL +1 -1
{crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/entry_points.txt +1 -0
helm/benchmark/__init__.py +2 -0
helm/benchmark/adaptation/adapter_spec.py +3 -0
helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -7
helm/benchmark/contamination/__init__.py +0 -0
helm/benchmark/metrics/classification_metrics.py +28 -23
helm/benchmark/metrics/test_classification_metrics.py +44 -9
helm/benchmark/presentation/create_plots.py +617 -0
helm/benchmark/presentation/summarize.py +4 -2
helm/benchmark/presentation/test_create_plots.py +32 -0
helm/benchmark/run.py +23 -1
helm/benchmark/run_expander.py +161 -47
helm/benchmark/run_specs.py +84 -10
helm/benchmark/runner.py +31 -3
helm/benchmark/scenarios/copyright_scenario.py +1 -1
helm/benchmark/scenarios/imdb_listdir.json +50014 -0
helm/benchmark/scenarios/lex_glue_scenario.py +58 -17
helm/benchmark/scenarios/lextreme_scenario.py +37 -25
helm/benchmark/scenarios/opinions_qa_scenario.py +194 -0
helm/benchmark/scenarios/scenario.py +5 -0
helm/benchmark/scenarios/the_pile_scenario.py +1 -1
helm/benchmark/static/benchmarking.css +14 -0
helm/benchmark/static/benchmarking.js +43 -0
helm/benchmark/static/index.html +2 -0
helm/benchmark/static/json-urls.js +4 -0
helm/benchmark/static/plot-captions.js +16 -0
helm/benchmark/static/schema.yaml +66 -8
helm/benchmark/window_services/cohere_window_service.py +20 -0
helm/benchmark/window_services/flan_t5_window_service.py +29 -0
helm/benchmark/window_services/huggingface_window_service.py +39 -0
helm/benchmark/window_services/test_flan_t5_window_service.py +12 -0
helm/benchmark/window_services/wider_ai21_window_service.py +13 -0
helm/benchmark/window_services/window_service_factory.py +27 -6
helm/common/general.py +12 -5
helm/proxy/clients/aleph_alpha_client.py +47 -28
helm/proxy/clients/auto_client.py +28 -24
helm/proxy/clients/huggingface_client.py +30 -17
helm/proxy/clients/huggingface_model_registry.py +111 -0
helm/proxy/clients/huggingface_tokenizer.py +23 -7
helm/proxy/clients/openai_client.py +60 -2
helm/proxy/clients/test_huggingface_model_registry.py +57 -0
helm/proxy/clients/together_client.py +17 -2
helm/proxy/clients/yalm_tokenizer/voc_100b.sp +0 -0
helm/proxy/clients/yalm_tokenizer/yalm_tokenizer.py +8 -2
helm/proxy/models.py +82 -2
{crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/LICENSE +0 -0
{crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/top_level.txt +0 -0

helm/benchmark/static/schema.yaml CHANGED Viewed

@@ -30,6 +30,27 @@ models:
     access: limited
     num_parameters: 17000000000
     release_date: 2022-10-28
+  - name: ai21/j2-jumbo
+    display_name: Jurassic-2 Jumbo (178B)
+    description: Jurassic-2 Jumbo (178B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
+    creator_organization: AI21 Labs
+    access: limited
+    num_parameters: 178000000000
+    release_date: 2023-03-09
+  - name: ai21/j2-grande
+    display_name: Jurassic-2 Grande (17B)
+    description: Jurassic-2 Grande (17B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
+    creator_organization: AI21 Labs
+    access: limited
+    num_parameters: 17000000000
+    release_date: 2023-03-09
+  - name: ai21/j2-large
+    display_name: Jurassic-2 Large (7.5B)
+    description: Jurassic-2 Large (7.5B parameters) ([docs](https://www.ai21.com/blog/introducing-j2))
+    creator_organization: AI21 Labs
+    access: limited
+    num_parameters: 7500000000
+    release_date: 2023-03-09
   #  Aleph Alpha
   # TODO: add Luminous World when it's released
@@ -142,9 +163,17 @@ models:
     access: limited
     num_parameters: 6100000000
     release_date: 2022-11-08
-  - name: cohere/command-xlarge-nightly
-    display_name: Cohere command nightly (52.4B)
-    description: Cohere command nightly (52.4B parameters) is fine-tuned from the XL model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
+  - name: cohere/command-medium-beta
+    display_name: Cohere Command beta (6.1B)
+    description: Cohere Command beta (6.1B parameters) is fine-tuned from the medium model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
+    creator_organization: Cohere
+    access: limited
+    num_parameters: 6100000000
+    release_date: 2022-11-08
+    todo: true
+  - name: cohere/command-xlarge-beta
+    display_name: Cohere Command beta (52.4B)
+    description: Cohere Command beta (52.4B parameters) is fine-tuned from the XL model to respond well with instruction-like prompts ([details](https://docs.cohere.ai/docs/command-beta)).
     creator_organization: Cohere
     access: limited
     num_parameters: 52400000000
@@ -203,7 +232,6 @@ models:
     description: Flan-T5 (11B parameters) is T5 fine-tuned on 1.8K tasks ([paper](https://arxiv.org/pdf/2210.11416.pdf)).
     creator_organization: Google
     access: open
-    todo: true
   - name: google/palm
     display_name: PaLM (540B)
@@ -379,6 +407,12 @@ models:
     description: Codex-style model that is a stronger, multilingual version of the Codex (12B) model in the [Codex paper](https://arxiv.org/pdf/2107.03374.pdf).
     creator_organization: OpenAI
     access: limited
+  - name: openai/gpt-3.5-turbo-0301
+    display_name: gpt-3.5-turbo-0301
+    description: Sibling model Sibling model of text-davinci-003 is optimized for chat but works well for traditional completions tasks as well. Snapshot from 2023-03-01.
+    creator_organization: OpenAI
+    access: limited
+    release_date: 2023-03-01
   - name: openai/chat-gpt
     display_name: ChatGPT
     description: Sibling model to InstructGPT which interacts in a conversational way. See [OpenAI's announcement](https://openai.com/blog/chatgpt/). The size of the model is unknown.
@@ -396,6 +430,14 @@ models:
     num_parameters: 6700000000
     release_date: 2022-11-29
     todo: true
+  - name: together/gpt-neoxt-chat-base-20b
+    display_name: GPT-NeoXT-Chat-Base (20B)
+    description: GPT-NeoXT-Chat-Base (20B) is fine-tuned from GPT-NeoX, serving as a base model for developing open-source chatbots.
+    creator_organization: Together
+    access: open
+    num_parameters: 20000000000
+    release_date: 2023-03-08
+    todo: true
   # Salesforce
   - name: together/codegen
@@ -634,6 +676,14 @@ metrics:
     display_name: F1
     description: Average F1 score in terms of word overlap between the model output and correct reference.
     lower_is_better: false
+  - name: classification_macro_f1
+    display_name: Macro-F1
+    description: Population-level macro-averaged F1 score.
+    lower_is_better: false
+  - name: classification_micro_f1
+    display_name: Micro-F1
+    description: Population-level micro-averaged F1 score.
+    lower_is_better: false
   - name: absolute_value_difference
     display_name: Absolute difference
     short_display_name: Diff.
@@ -1165,6 +1215,14 @@ metric_groups:
       - name: monte_carlo_entropy
         split: ${main_split}
+  - name: classification_metrics
+    display_name: Classification metrics
+    metrics:
+      - name: classification_macro_f1
+        split: ${main_split}
+      - name: classification_micro_f1
+        split: ${main_split}
 ############################################################
 run_groups:
 ## Top-level
@@ -2106,24 +2164,24 @@ run_groups:
     display_name: LEXTREME
     description: A Multilingual Legal Benchmark for Natural Language Understanding
     metric_groups:
-      - accuracy
+      - classification_metrics
       - calibration
       - efficiency
       - general_information
     environment:
-      main_name: f1_score
+      main_name: classification_macro_f1
       main_split: test
   - name: lex_glue
     display_name: LexGLUE
     description: A Benchmark Dataset for Legal Language Understanding in English
     metric_groups:
-      - accuracy
+      - classification_metrics
       - calibration
       - efficiency
       - general_information
     environment:
-      main_name: f1_score
+      main_name: classification_macro_f1
       main_split: test
   - name: entity_data_imputation

helm/benchmark/window_services/cohere_window_service.py CHANGED Viewed

@@ -141,3 +141,23 @@ class CohereWindowService(LocalWindowService):
             result = result[:-1]
         return result
+class CohereCommandWindowService(CohereWindowService):
+    def __init__(self, service: TokenizerService):
+        super().__init__(service)
+    @property
+    def max_request_length(self) -> int:
+        """
+        The max request length of the model. For Cohere, this is the same as the `max_sequence_length`.
+        If we exceed the `max_sequence_length`, we get the following error:
+        Request failed with too many tokens: total number of tokens (prompt and prediction) cannot
+        exceed 2048 - received 2049. Try using a shorter prompt or a smaller max_tokens value.
+        For the Command model, in rare situations, the co.tokenize returns a shorter list of tokens
+        than the co.generate. This causes sequence length errors for rare inputs. Cohere's advice is
+        to reduce the sequence length to 2020 to avoid these issues.
+        """
+        return 2020

helm/benchmark/window_services/flan_t5_window_service.py ADDED Viewed

@@ -0,0 +1,29 @@
+from .encoder_decoder_window_service import EncoderDecoderWindowService
+from .tokenizer_service import TokenizerService
+class FlanT5WindowService(EncoderDecoderWindowService):
+    def __init__(self, service: TokenizerService):
+        super().__init__(service)
+    @property
+    def max_sequence_length(self) -> int:
+        """Return the max sequence length."""
+        # We subtract 1 to account for <extra_id_0> that gets appended to prompts.
+        return 512 - 1
+    @property
+    def end_of_text_token(self) -> str:
+        """The end of text token."""
+        return "</s>"
+    @property
+    def tokenizer_name(self) -> str:
+        """Name of the tokenizer to use when sending a request."""
+        return "google/flan-t5-xxl"
+    @property
+    def prefix_token(self) -> str:
+        """The prefix token is the same as the end of text token."""
+        # echo=True is not supported
+        return ""

helm/benchmark/window_services/huggingface_window_service.py ADDED Viewed

@@ -0,0 +1,39 @@
+from helm.proxy.clients.huggingface_tokenizer import HuggingFaceTokenizers
+from .local_window_service import LocalWindowService
+from .tokenizer_service import TokenizerService
+from helm.proxy.clients.huggingface_client import HuggingFaceModelConfig
+class HuggingFaceWindowService(LocalWindowService):
+    def __init__(self, service: TokenizerService, model_config: HuggingFaceModelConfig):
+        super().__init__(service)
+        self._tokenizer_name = str(model_config)
+        tokenizer = HuggingFaceTokenizers.get_tokenizer(self._tokenizer_name)
+        self._prefix_token = tokenizer.bos_token
+        self._end_of_text_token = tokenizer.eos_token
+        self._max_request_length = tokenizer.model_max_length
+    @property
+    def max_sequence_length(self) -> int:
+        """Return the max sequence length of this tokenizer."""
+        return self._max_request_length
+    @property
+    def max_request_length(self) -> int:
+        """Return the max request length of this tokenizer."""
+        return self.max_sequence_length
+    @property
+    def end_of_text_token(self) -> str:
+        """The end of text token."""
+        return self._end_of_text_token
+    @property
+    def tokenizer_name(self) -> str:
+        """Name of the tokenizer to use when sending a request."""
+        return self._tokenizer_name
+    @property
+    def prefix_token(self) -> str:
+        """The prefix token."""
+        return self._prefix_token

helm/benchmark/window_services/test_flan_t5_window_service.py ADDED Viewed

@@ -0,0 +1,12 @@
+import tempfile
+from helm.benchmark.window_services.test_t511b_window_service import TestT511bWindowService
+from helm.benchmark.window_services.window_service_factory import TokenizerService, WindowServiceFactory
+from helm.benchmark.window_services.test_utils import get_tokenizer_service
+class TestFlanT5WindowService(TestT511bWindowService):
+    def setup_method(self):
+        self.path: str = tempfile.mkdtemp()
+        service: TokenizerService = get_tokenizer_service(self.path)
+        self.window_service = WindowServiceFactory.get_window_service("together/flan-t5-xxl", service)

helm/benchmark/window_services/wider_ai21_window_service.py ADDED Viewed

@@ -0,0 +1,13 @@
+from .ai21_window_service import AI21WindowService
+class WiderAI21WindowService(AI21WindowService):
+    @property
+    def max_sequence_length(self) -> int:
+        """
+        Return the max sequence length of the larger AI21 Jurassic-2 models.
+        The AI21 server automatically prepends a token to every prompt,
+        so the actual max sequence length is 8192 - 1 = 8191.
+        """
+        return 8191

helm/benchmark/window_services/window_service_factory.py CHANGED Viewed

@@ -1,7 +1,14 @@
-from helm.proxy.models import get_model, get_model_names_with_tag, Model, WIDER_CONTEXT_WINDOW_TAG
+from helm.proxy.models import (
+    get_model,
+    get_model_names_with_tag,
+    Model,
+    AI21_WIDER_CONTEXT_WINDOW_TAG,
+    WIDER_CONTEXT_WINDOW_TAG,
+)
 from .ai21_window_service import AI21WindowService
+from .wider_ai21_window_service import WiderAI21WindowService
 from .anthropic_window_service import AnthropicWindowService
-from .cohere_window_service import CohereWindowService
+from .cohere_window_service import CohereWindowService, CohereCommandWindowService
 from .luminous_window_service import (
     LuminousBaseWindowService,
     LuminousExtendedWindowService,
@@ -12,6 +19,7 @@ from .openai_window_service import OpenAIWindowService
 from .wider_openai_window_service import WiderOpenAIWindowService
 from .mt_nlg_window_service import MTNLGWindowService
 from .bloom_window_service import BloomWindowService
+from .huggingface_window_service import HuggingFaceWindowService
 from .ice_window_service import ICEWindowService
 from .santacoder_window_service import SantaCoderWindowService
 from .gpt2_window_service import GPT2WindowService
@@ -20,10 +28,12 @@ from .gptneox_window_service import GPTNeoXWindowService
 from .opt_window_service import OPTWindowService
 from .t0pp_window_service import T0ppWindowService
 from .t511b_window_service import T511bWindowService
+from .flan_t5_window_service import FlanT5WindowService
 from .ul2_window_service import UL2WindowService
 from .yalm_window_service import YaLMWindowService
 from .window_service import WindowService
 from .tokenizer_service import TokenizerService
+from helm.proxy.clients.huggingface_client import get_huggingface_model_config
 class WindowServiceFactory:
@@ -38,7 +48,10 @@ class WindowServiceFactory:
         engine: str = model.engine
         window_service: WindowService
-        if model_name in get_model_names_with_tag(WIDER_CONTEXT_WINDOW_TAG):
+        huggingface_model_config = get_huggingface_model_config(model_name)
+        if huggingface_model_config:
+            window_service = HuggingFaceWindowService(service=service, model_config=huggingface_model_config)
+        elif model_name in get_model_names_with_tag(WIDER_CONTEXT_WINDOW_TAG):
             window_service = WiderOpenAIWindowService(service)
         # For the Google models, we approximate with the OpenAIWindowService
         elif organization == "openai" or organization == "simple" or organization == "google":
@@ -70,7 +83,7 @@ class WindowServiceFactory:
             window_service = ICEWindowService(service)
         elif model_name in ["huggingface/gpt-j-6b", "together/gpt-j-6b", "gooseai/gpt-j-6b"]:
             window_service = GPTJWindowService(service)
-        elif model_name in ["together/gpt-neox-20b", "gooseai/gpt-neo-20b"]:
+        elif model_name in ["together/gpt-neox-20b", "gooseai/gpt-neo-20b", "together/gpt-neoxt-chat-base-20b"]:
             window_service = GPTNeoXWindowService(service)
         elif model_name == "together/h3-2.7b":
             window_service = GPT2WindowService(service)
@@ -80,14 +93,22 @@ class WindowServiceFactory:
             window_service = T0ppWindowService(service)
         elif model_name == "together/t5-11b":
             window_service = T511bWindowService(service)
+        elif model_name == "together/flan-t5-xxl":
+            window_service = FlanT5WindowService(service)
         elif model_name == "together/ul2":
             window_service = UL2WindowService(service)
         elif model_name == "together/yalm":
             window_service = YaLMWindowService(service)
         elif organization == "cohere":
-            window_service = CohereWindowService(service)
+            if "command" in engine:
+                window_service = CohereCommandWindowService(service)
+            else:
+                window_service = CohereWindowService(service)
         elif organization == "ai21":
-            window_service = AI21WindowService(service=service, gpt2_window_service=GPT2WindowService(service))
+            if model_name in get_model_names_with_tag(AI21_WIDER_CONTEXT_WINDOW_TAG):
+                window_service = WiderAI21WindowService(service=service, gpt2_window_service=GPT2WindowService(service))
+            else:
+                window_service = AI21WindowService(service=service, gpt2_window_service=GPT2WindowService(service))
         else:
             raise ValueError(f"Unhandled model name: {model_name}")

helm/common/general.py CHANGED Viewed

@@ -49,7 +49,13 @@ def shell(args: List[str]):
 @htrack(None)
-def ensure_file_downloaded(source_url: str, target_path: str, unpack: bool = False, unpack_type: Optional[str] = None):
+def ensure_file_downloaded(
+    source_url: str,
+    target_path: str,
+    unpack: bool = False,
+    downloader_executable: str = "wget",
+    unpack_type: Optional[str] = None,
+):
     """Download `source_url` to `target_path` if it doesn't exist."""
     if os.path.exists(target_path):
         # Assume it's all good
@@ -59,7 +65,8 @@ def ensure_file_downloaded(source_url: str, target_path: str, unpack: bool = Fal
     # Download
     # gdown is used to download large files/zip folders from Google Drive.
     # It bypasses security warnings which wget cannot handle.
-    downloader_executable: str = "gdown" if source_url.startswith("https://drive.google.com") else "wget"
+    if source_url.startswith("https://drive.google.com"):
+        downloader_executable = "gdown"
     tmp_path: str = f"{target_path}.tmp"
     shell([downloader_executable, source_url, "-O", tmp_path])
@@ -195,13 +202,13 @@ def parallel_map(
     with htrack_block(f"Parallelizing computation on {len(items)} items over {parallelism} {units}"):
         results: List
         if parallelism == 1:
-            results = list(tqdm(map(process, items), total=len(items)))
+            results = list(tqdm(map(process, items), total=len(items), disable=None))
         elif multiprocessing:
             with ProcessPoolExecutor(max_workers=parallelism) as executor:
-                results = list(tqdm(executor.map(process, items), total=len(items)))
+                results = list(tqdm(executor.map(process, items), total=len(items), disable=None))
         else:
             with ThreadPoolExecutor(max_workers=parallelism) as executor:
-                results = list(tqdm(executor.map(process, items), total=len(items)))
+                results = list(tqdm(executor.map(process, items), total=len(items), disable=None))
     return results

helm/proxy/clients/aleph_alpha_client.py CHANGED Viewed

@@ -2,7 +2,11 @@ import json
 import requests
 from typing import Any, Dict, List
+from aleph_alpha_client import Client as AlephAlphaPythonClient
+from tokenizers import Tokenizer, Encoding
 from helm.common.cache import Cache, CacheConfig
+from helm.common.hierarchical_logger import hlog
 from helm.common.request import Request, RequestResult, Sequence, Token
 from helm.common.tokenization_request import (
     DecodeRequest,
@@ -19,9 +23,27 @@ class AlephAlphaClient(Client):
     TOKENIZE_ENDPOINT: str = "tokenize"
     DETOKENIZE_ENDPOINT: str = "detokenize"
+    VALID_TOKENIZERS: List[str] = [
+        "luminous-base",
+        "luminous-extended",
+        "luminous-supreme",
+    ]
     def __init__(self, api_key: str, cache_config: CacheConfig):
         self.api_key: str = api_key
         self.cache = Cache(cache_config)
+        self._aleph_alpha_client = AlephAlphaPythonClient(token=api_key)
+        self._tokenizer_name_to_tokenizer: Dict[str, Tokenizer] = {}
+    def _get_tokenizer(self, tokenizer_name: str) -> Tokenizer:
+        if tokenizer_name not in self.VALID_TOKENIZERS:
+            raise ValueError(f"Invalid tokenizer: {tokenizer_name}")
+        # Check if the tokenizer is cached
+        if tokenizer_name not in self._tokenizer_name_to_tokenizer:
+            self._tokenizer_name_to_tokenizer[tokenizer_name] = self._aleph_alpha_client.tokenizer(tokenizer_name)
+            hlog(f"Initialized tokenizer: {tokenizer_name}")
+        return self._tokenizer_name_to_tokenizer[tokenizer_name]
     def _send_request(self, endpoint: str, raw_request: Dict[str, Any]) -> Dict[str, Any]:
         response = requests.request(
@@ -33,6 +55,8 @@ class AlephAlphaClient(Client):
                 "Authorization": f"Bearer {self.api_key}",
             },
             data=json.dumps(raw_request),
+            # Setting the nice flag prevents intensive benchmarking runs from saturating Aleph Alpha's API queues
+            params=json.dumps({"nice": True}),
         )
         result = json.loads(response.text)
         assert "error" not in result, f"Request failed with error: {result['error']}"
@@ -40,7 +64,6 @@ class AlephAlphaClient(Client):
     def make_request(self, request: Request) -> RequestResult:
         """Make a request following https://docs.aleph-alpha.com/api/complete."""
-        # TODO: echo is not supported. Follow up on this.
         raw_request = {
             "model": request.model_engine,
             "prompt": request.prompt,
@@ -53,6 +76,7 @@ class AlephAlphaClient(Client):
             "n": request.num_completions,
             "stop_sequences": request.stop_sequences,
             "log_probs": request.top_k_per_token,
+            "echo": request.echo_prompt,
             "tokens": True,  # Setting to True returns individual tokens of the completion
         }
@@ -102,24 +126,21 @@ class AlephAlphaClient(Client):
         )
     def tokenize(self, request: TokenizationRequest) -> TokenizationRequestResult:
-        """Make a request following https://docs.aleph-alpha.com/api/tokenize."""
-        raw_request = {
-            "model": request.tokenizer_name,
-            "prompt": request.text,
-            "tokens": True,
-            "token_ids": True,
-        }
+        """
+        Encode the text using Aleph Alpha's tokenizer library:
+        https://aleph-alpha-client.readthedocs.io/en/latest/aleph_alpha_client.html#aleph_alpha_client.Client.tokenizer
+        """
         try:
             def do_it():
-                result = self._send_request(AlephAlphaClient.TOKENIZE_ENDPOINT, raw_request)
-                assert "tokens" in result and "token_ids" in result, f"Invalid response: {result}"
-                return result
-            response, cached = self.cache.get(raw_request, wrap_request_time(do_it))
-        except (requests.exceptions.RequestException, AssertionError) as e:
-            error: str = f"AlephAlphaClient error: {e}"
+                tokenizer: Tokenizer = self._get_tokenizer(request.tokenizer_name)
+                result: Encoding = tokenizer.encode(request.text, add_special_tokens=False)
+                return {"token_ids": result.ids, "tokens": result.tokens}
+            cache_key = {"model": request.tokenizer_name, "prompt": request.text, "tokens": True, "token_ids": True}
+            response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
+        except RuntimeError as e:
+            error: str = f"AlephAlphaClient tokenize error: {e}"
             return TokenizationRequestResult(error=error, success=False, cached=False, text="", tokens=[])
         tokens = response["token_ids" if request.encode else "tokens"]
@@ -135,22 +156,20 @@ class AlephAlphaClient(Client):
         )
     def decode(self, request: DecodeRequest) -> DecodeRequestResult:
-        """Make a request following https://docs.aleph-alpha.com/api/detokenize."""
-        raw_request = {
-            "model": request.tokenizer_name,
-            "token_ids": request.tokens,
-        }
+        """
+        Decode the tokens using Aleph Alpha's tokenizer library:
+        https://aleph-alpha-client.readthedocs.io/en/latest/aleph_alpha_client.html#aleph_alpha_client.Client.tokenizer
+        """
         try:
             def do_it():
-                result = self._send_request(AlephAlphaClient.DETOKENIZE_ENDPOINT, raw_request)
-                assert "result" in result, f"Invalid response: {result}"
-                return result
+                tokenizer: Tokenizer = self._get_tokenizer(request.tokenizer_name)
+                return {"result": tokenizer.decode(request.tokens)}
-            response, cached = self.cache.get(raw_request, wrap_request_time(do_it))
-        except (requests.exceptions.RequestException, AssertionError) as e:
-            error: str = f"AlephAlphaClient error: {e}"
+            cache_key = {"model": request.tokenizer_name, "token_ids": request.tokens}
+            response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
+        except RuntimeError as e:
+            error: str = f"AlephAlphaClient decode error: {e}"
             return DecodeRequestResult(error=error, success=False, cached=False, text="")
         return DecodeRequestResult(

crfm-helm 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

crfm-helm 0.2.1py3-none-any.whl → 0.2.2py3-none-any.whl