PyPI - crfm-helm - Versions diffs - 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (546) hide show

helm/{proxy/clients → clients}/perspective_api_client.py RENAMED Viewed

@@ -1,14 +1,14 @@
 # mypy: check_untyped_defs = False
 import threading
 from dataclasses import asdict
-from typing import List, Dict, Optional
+from typing import Any, List, Dict, Optional
 from dacite import from_dict
 from googleapiclient import discovery
 from googleapiclient.errors import BatchError, HttpError
 from googleapiclient.http import BatchHttpRequest
 from httplib2 import HttpLib2Error
-from helm.proxy.clients.toxicity_classifier_client import ToxicityClassifierClient
+from helm.clients.toxicity_classifier_client import ToxicityClassifierClient
 from helm.proxy.retry import NonRetriableException
 from helm.common.cache import Cache, CacheConfig
@@ -91,14 +91,9 @@ class PerspectiveAPIClient(ToxicityClassifierClient):
         Batch several requests into a single API request and get the toxicity attributes and scores.
         For more information, see https://googleapis.github.io/google-api-python-client/docs/batch.html.
         """
-        with self._client_lock:
-            if not self._client:
-                self._client = self._create_client()
         try:
-            def do_it():
+            def do_it() -> Dict[str, Any]:
                 text_to_response: Dict[str, Dict] = dict()
                 def callback(request_id: str, response: Dict, error: HttpError):
@@ -106,6 +101,10 @@ class PerspectiveAPIClient(ToxicityClassifierClient):
                         raise error
                     text_to_response[request_id] = response
+                with self._client_lock:
+                    if not self._client:
+                        self._client = self._create_client()
                 # Create a batch request. We will add a request to the batch request for each text string
                 batch_request: BatchHttpRequest = self._client.new_batch_http_request()

helm/clients/simple_client.py ADDED Viewed

@@ -0,0 +1,64 @@
+import itertools
+from typing import List, TypedDict
+from typing import Dict, Any
+from helm.common.cache import CacheConfig
+from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput, Token
+from helm.clients.client import CachingClient
+class SimpleClientRequest(TypedDict):
+    engine: str
+    prompt: str
+    num_completions: int
+class SimpleClient(CachingClient):
+    """Simple client for tutorials and for debugging."""
+    def __init__(self, cache_config: CacheConfig):
+        super().__init__(cache_config=cache_config)
+    def make_request(self, request: Request) -> RequestResult:
+        raw_request: SimpleClientRequest = {
+            "engine": request.model_engine,
+            "prompt": request.prompt,
+            "num_completions": request.num_completions,
+        }
+        def do_it() -> Dict[str, Any]:
+            return self.invoke_model(raw_request)
+        cache_key = CachingClient.make_cache_key(raw_request, request)
+        response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
+        logprob = 0
+        completions = [
+            GeneratedOutput(
+                text=text,
+                logprob=logprob,
+                tokens=[Token(text=text, logprob=logprob)],
+            )
+            for text in response["completions"]
+        ]
+        return RequestResult(
+            success=True,
+            cached=cached,
+            request_time=response["request_time"],
+            request_datetime=response.get("request_datetime"),
+            completions=completions,
+            embedding=[],
+        )
+    def invoke_model(self, raw_request: SimpleClientRequest) -> Dict[str, Any]:
+        """
+        Example:
+        Prompt: 7 2 4 6
+        Completions (num_completions = 3):
+        - 6
+        - 4
+        - 2
+        """
+        prompt_words: List[str] = raw_request["prompt"].split()
+        completions = list(itertools.islice(itertools.cycle(reversed(prompt_words)), raw_request["num_completions"]))
+        return {"completions": completions}

helm/{proxy/clients → clients}/test_auto_client.py RENAMED Viewed

@@ -1,12 +1,13 @@
 import dataclasses
 from tempfile import TemporaryDirectory
-from helm.common.request import Sequence, Token
+from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
+from helm.common.request import GeneratedOutput, Token
 import pytest
 from helm.common.request import Request, RequestResult
 from helm.common.general import get_credentials
-from helm.proxy.clients.auto_client import AutoClient
+from helm.clients.auto_client import AutoClient
 @pytest.mark.models
@@ -15,8 +16,8 @@ class TestAutoClient:
         credentials = get_credentials()
         if not credentials:
             pytest.skip("Skipping test because no credentials found")
-        with TemporaryDirectory() as cache_path:
-            auto_client = AutoClient(credentials, cache_path)
+        with TemporaryDirectory() as temp_dir_path:
+            auto_client = AutoClient(credentials, temp_dir_path, BlackHoleCacheBackendConfig())
             actual_result = auto_client.make_request(request)
             assert actual_result.request_time or actual_result.batch_request_time
             actual_result = dataclasses.replace(
@@ -27,6 +28,7 @@ class TestAutoClient:
     def test_make_request_databricks(self):
         request = Request(
             model="databricks/dolly-v2-3b",
+            model_deployment="together/dolly-v2-3b",
             prompt="Elephants are one of the most",
             temperature=0.0,
             max_tokens=10,
@@ -35,32 +37,29 @@ class TestAutoClient:
             success=True,
             embedding=[],
             completions=[
-                Sequence(
+                GeneratedOutput(
                     text=" intelligent species on the planet. They are also one",
                     logprob=-9.087313510477543,
                     tokens=[
                         Token(
                             text="Ġintelligent",
                             logprob=-1.9816237688064575,
-                            top_logprobs={"Ġintelligent": -1.9816237688064575},
                         ),
                         Token(
                             text="Ġspecies",
                             logprob=-1.2881066799163818,
-                            top_logprobs={"Ġspecies": -1.2881066799163818},
                         ),
-                        Token(text="Ġon", logprob=-0.16092979907989502, top_logprobs={"Ġon": -0.16092979907989502}),
-                        Token(text="Ġthe", logprob=-0.23620447516441345, top_logprobs={"Ġthe": -0.23620447516441345}),
+                        Token(text="Ġon", logprob=-0.16092979907989502),
+                        Token(text="Ġthe", logprob=-0.23620447516441345),
                         Token(
                             text="Ġplanet",
                             logprob=-0.015416033565998077,
-                            top_logprobs={"Ġplanet": -0.015416033565998077},
                         ),
-                        Token(text=".", logprob=-0.6683081388473511, top_logprobs={".": -0.6683081388473511}),
-                        Token(text="ĠThey", logprob=-1.9231040477752686, top_logprobs={"ĠThey": -1.9231040477752686}),
-                        Token(text="Ġare", logprob=-0.9322243332862854, top_logprobs={"Ġare": -0.9322243332862854}),
-                        Token(text="Ġalso", logprob=-0.7750787138938904, top_logprobs={"Ġalso": -0.7750787138938904}),
-                        Token(text="Ġone", logprob=-1.1063175201416016, top_logprobs={"Ġone": -1.1063175201416016}),
+                        Token(text=".", logprob=-0.6683081388473511),
+                        Token(text="ĠThey", logprob=-1.9231040477752686),
+                        Token(text="Ġare", logprob=-0.9322243332862854),
+                        Token(text="Ġalso", logprob=-0.7750787138938904),
+                        Token(text="Ġone", logprob=-1.1063175201416016),
                     ],
                     finish_reason={"reason": "length"},
                 )
@@ -69,6 +68,7 @@ class TestAutoClient:
         )
         request = Request(
             model="databricks/dolly-v2-3b",
+            model_deployment="together/dolly-v2-3b",
             prompt="Elephants are one of the most",
             temperature=0.0,
             max_tokens=10,

helm/clients/test_client.py ADDED Viewed

@@ -0,0 +1,100 @@
+from helm.common.cache import BlackHoleCacheConfig
+from helm.tokenizers.huggingface_tokenizer import HuggingFaceTokenizer
+from .client import truncate_sequence, truncate_and_tokenize_response_text
+from typing import List
+from helm.common.request import Request, GeneratedOutput, Token
+def truncate_sequence_helper(tokens: List[str], request: Request, expected_tokens: List[str]):
+    sequence = GeneratedOutput(
+        text="".join(tokens),
+        tokens=[Token(text=text, logprob=-1) for text in tokens],
+        logprob=-len(tokens),
+    )
+    output_sequence = truncate_sequence(sequence, request)
+    assert expected_tokens == [token.text for token in output_sequence.tokens]
+    assert "".join(expected_tokens) == output_sequence.text
+    assert output_sequence.logprob == sum(token.logprob for token in output_sequence.tokens)
+def test_truncate_sequence():
+    # echo_prompt = True, nothing gets truncated
+    truncate_sequence_helper(
+        ["a", "b", "c"],
+        Request(
+            model="openai/text-davinci-002", model_deployment="openai/text-davinci-002", prompt="abc", echo_prompt=True
+        ),
+        ["a", "b", "c"],
+    )
+    # Nothing gets truncated
+    truncate_sequence_helper(
+        ["hello", " world"],
+        Request(model="openai/text-davinci-002", model_deployment="openai/text-davinci-002", stop_sequences=["#"]),
+        ["hello", " world"],
+    )
+    # Truncate using stop sequences
+    truncate_sequence_helper(
+        ["hello", " world", "\n", "what"],
+        Request(model="openai/text-davinci-002", model_deployment="openai/text-davinci-002", stop_sequences=["\n"]),
+        ["hello", " world"],
+    )
+    # Truncate using max tokens
+    truncate_sequence_helper(
+        ["a", "b", "c"],
+        Request(model="openai/text-davinci-002", model_deployment="openai/text-davinci-002", max_tokens=2),
+        ["a", "b"],
+    )
+def test_truncate_and_tokenize_response_text():
+    tokenizer = HuggingFaceTokenizer(BlackHoleCacheConfig())
+    tokenizer_name = "huggingface/gpt2"
+    # No truncation
+    response = truncate_and_tokenize_response_text(
+        "I am a scientist. I am a scientist.", Request(max_tokens=100, stop_sequences=[]), tokenizer, tokenizer_name
+    )
+    assert response.finish_reason
+    assert response.finish_reason["reason"] == "endoftext"
+    assert response.text == "I am a scientist. I am a scientist."
+    assert response.tokens == [
+        Token("I", 0.0),
+        Token(" am", 0.0),
+        Token(" a", 0.0),
+        Token(" scientist", 0.0),
+        Token(".", 0.0),
+        Token(" I", 0.0),
+        Token(" am", 0.0),
+        Token(" a", 0.0),
+        Token(" scientist", 0.0),
+        Token(".", 0.0),
+    ]
+    response = truncate_and_tokenize_response_text(
+        "I am a scientist. I am a scientist.", Request(max_tokens=7, stop_sequences=["."]), tokenizer, tokenizer_name
+    )
+    assert response.finish_reason
+    assert response.finish_reason["reason"] == "stop"
+    assert response.text == "I am a scientist"
+    assert response.tokens == [Token("I", 0.0), Token(" am", 0.0), Token(" a", 0.0), Token(" scientist", 0.0)]
+    response = truncate_and_tokenize_response_text(
+        "I am a scientist. I am a scientist.", Request(max_tokens=3, stop_sequences=[]), tokenizer, tokenizer_name
+    )
+    assert response.finish_reason
+    assert response.finish_reason["reason"] == "length"
+    assert response.text == "I am a"
+    assert response.tokens == [Token("I", 0.0), Token(" am", 0.0), Token(" a", 0.0)]
+    response = truncate_and_tokenize_response_text(
+        "I am a scientist. I am a scientist.", Request(max_tokens=3, stop_sequences=["."]), tokenizer, tokenizer_name
+    )
+    assert response.finish_reason
+    assert response.finish_reason["reason"] == "length"
+    assert response.text == "I am a"
+    assert response.tokens == [Token("I", 0.0), Token(" am", 0.0), Token(" a", 0.0)]

helm/clients/test_huggingface_client.py ADDED Viewed

@@ -0,0 +1,70 @@
+import pytest
+from helm.common.cache import BlackHoleCacheConfig
+from helm.common.request import Request, RequestResult
+from helm.clients.huggingface_client import HuggingFaceClient
+class TestHuggingFaceClient:
+    def test_gpt2(self):
+        client = HuggingFaceClient(
+            cache_config=BlackHoleCacheConfig(), pretrained_model_name_or_path="openai-community/gpt2"
+        )
+        prompt: str = "I am a computer scientist."
+        result: RequestResult = client.make_request(
+            Request(
+                model="openai/gpt2",
+                model_deployment="huggingface/gpt2",
+                prompt=prompt,
+                num_completions=3,
+                top_k_per_token=5,
+                max_tokens=1,
+                echo_prompt=True,
+            )
+        )
+        assert len(result.completions) == 3
+        assert result.completions[0].text.startswith(
+            prompt
+        ), "echo_prompt was set to true. Expected the prompt at the beginning of each completion"
+    @pytest.mark.skip(reason="GPT-J 6B is 22 GB and extremely slow without a GPU.")
+    def test_gptj_6b(self):
+        client = HuggingFaceClient(
+            cache_config=BlackHoleCacheConfig(), pretrained_model_name_or_path="openai-community/gpt2"
+        )
+        result: RequestResult = client.make_request(
+            Request(
+                model="eleutherai/gpt-j-6b",
+                model_deployment="huggingface/gpt-j-6b",
+                prompt="I am a computer scientist.",
+                num_completions=3,
+                top_k_per_token=5,
+                max_tokens=0,
+            )
+        )
+        assert len(result.completions) == 3
+    def test_logprob(self):
+        client = HuggingFaceClient(
+            cache_config=BlackHoleCacheConfig(), pretrained_model_name_or_path="openai-community/gpt2"
+        )
+        prompt: str = "I am a computer scientist."
+        result: RequestResult = client.make_request(
+            Request(
+                model="openai/gpt2",
+                model_deployment="huggingface/gpt2",
+                prompt=prompt,
+                num_completions=1,
+                max_tokens=0,
+                echo_prompt=True,
+            )
+        )
+        assert result.completions[0].text.startswith(
+            prompt
+        ), "echo_prompt was set to true. Expected the prompt at the beginning of each completion"
+        total_logprob: float = 0
+        assert len(result.completions[0].tokens) == 6, "Expected 6 tokens in the completion"
+        for token in result.completions[0].tokens[1:]:
+            assert token.logprob != 0
+            total_logprob += token.logprob
+        assert result.completions[0].logprob == pytest.approx(total_logprob)

helm/clients/test_simple_client.py ADDED Viewed

@@ -0,0 +1,19 @@
+from helm.clients.simple_client import SimpleClient
+from helm.common.cache import BlackHoleCacheConfig
+from helm.common.request import GeneratedOutput, Request, Token
+def test_simple_client_make_request():
+    client = SimpleClient(BlackHoleCacheConfig())
+    request = Request(
+        model="simple/model1",
+        model_deployment="simple/model1",
+        prompt="Elephants are one of the most",
+        temperature=0.0,
+        max_tokens=10,
+    )
+    result = client.make_request(request)
+    assert result.success
+    assert not result.cached
+    assert result.embedding == []
+    assert result.completions == [GeneratedOutput(text="most", logprob=0, tokens=[Token(text="most", logprob=0)])]

helm/{proxy/clients → clients}/test_together_client.py RENAMED Viewed

@@ -4,7 +4,6 @@ import tempfile
 from helm.common.cache import SqliteCacheConfig
 from helm.common.request import Request
-from helm.proxy.tokenizers.huggingface_tokenizer import HuggingFaceTokenizer
 from .together_client import TogetherClient, TogetherClientError
@@ -13,25 +12,22 @@ class TestTogetherClient:
     def setup_method(self, method):
         cache_file = tempfile.NamedTemporaryFile(delete=False)
         self.cache_path: str = cache_file.name
-        self.client = TogetherClient(
-            tokenizer=HuggingFaceTokenizer(SqliteCacheConfig(self.cache_path)),
-            cache_config=SqliteCacheConfig(self.cache_path),
-        )
     def teardown_method(self, method):
         os.remove(self.cache_path)
     @pytest.mark.parametrize(
-        "test_input,expected",
+        "together_model,test_input,expected",
         [
             (
+                "togethercomputer/RedPajama-INCITE-Base-3B-v1",
                 Request(
                     model="together/redpajama-incite-base-3b-v1",
+                    model_deployment="together/redpajama-incite-base-3b-v1",
                 ),
                 {
                     "best_of": 1,
                     "echo": False,
-                    "logprobs": 1,
                     "max_tokens": 100,
                     "model": "togethercomputer/RedPajama-INCITE-Base-3B-v1",
                     "n": 1,
@@ -43,8 +39,10 @@ class TestTogetherClient:
                 },
             ),
             (
+                "huggyllama/llama-7b",
                 Request(
                     model="meta/llama-7b",
+                    model_deployment="together/llama-7b",
                     prompt="I am a computer scientist.",
                     temperature=0,
                     num_completions=4,
@@ -57,7 +55,6 @@ class TestTogetherClient:
                 {
                     "best_of": 3,
                     "echo": True,
-                    "logprobs": 3,
                     "max_tokens": 24,
                     "model": "huggyllama/llama-7b",
                     "n": 4,
@@ -69,14 +66,15 @@ class TestTogetherClient:
                 },
             ),
             (
+                "togethercomputer/alpaca-7b",
                 Request(
                     model="stanford/alpaca-7b",
+                    model_deployment="together/alpaca-7b",
                     stop_sequences=["\n"],
                 ),
                 {
                     "best_of": 1,
                     "echo": False,
-                    "logprobs": 1,
                     "max_tokens": 100,
                     "model": "togethercomputer/alpaca-7b",
                     "n": 1,
@@ -90,9 +88,22 @@ class TestTogetherClient:
             # TODO(#1828): Add test for `SET_DETAILS_TO_TRUE` after Together supports it.
         ],
     )
-    def test_convert_to_raw_request(self, test_input, expected):
-        assert expected == TogetherClient.convert_to_raw_request(test_input)
+    def test_convert_to_raw_request(self, together_model, test_input, expected):
+        client = TogetherClient(
+            cache_config=SqliteCacheConfig(self.cache_path),
+            together_model=together_model,
+        )
+        assert expected == client.convert_to_raw_request(test_input)
     def test_api_key_error(self):
+        client = TogetherClient(
+            cache_config=SqliteCacheConfig(self.cache_path),
+            together_model="togethercomputer/RedPajama-INCITE-Base-3B-v1",
+        )
         with pytest.raises(TogetherClientError):
-            self.client.make_request(Request(model="together/bloom"))
+            client.make_request(
+                Request(
+                    model="together/redpajama-incite-base-3b-v1",
+                    model_deployment="together/redpajama-incite-base-3b-v1",
+                )
+            )

helm/{proxy/clients → clients}/together_client.py RENAMED Viewed

@@ -5,63 +5,10 @@ import requests
 from retrying import retry
 from helm.common.cache import CacheConfig
-from helm.common.request import wrap_request_time, Request, RequestResult, Sequence, Token
-from helm.proxy.tokenizers.tokenizer import Tokenizer
+from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput, Token
 from .client import CachingClient, truncate_sequence, cleanup_str
-MODEL_ALIASES: Dict[str, str] = {
-    # Legacy models
-    "flan-t5-xxl": "flan-t5-xxl-hf",
-    "h3-2.7b": "h3-2.7b-h3",
-    "opt-1.3b": "opt-1.3b-ft-tp1",
-    "opt-6.7b": "opt-6.7b-ft-tp1",
-    # Production models
-    "redpajama-incite-base-3b-v1": "togethercomputer/RedPajama-INCITE-Base-3B-v1",
-    "redpajama-incite-instruct-3b-v1": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1",
-    "redpajama-incite-base-7b": "togethercomputer/RedPajama-INCITE-7B-Base",
-    "redpajama-incite-instruct-7b": "togethercomputer/RedPajama-INCITE-7B-Instruct",
-    "alpaca-7b": "togethercomputer/alpaca-7b",
-    "dolly-v2-3b": "databricks/dolly-v2-3b",
-    "dolly-v2-7b": "databricks/dolly-v2-7b",
-    "dolly-v2-12b": "databricks/dolly-v2-12b",
-    "falcon-7b": "togethercomputer/falcon-7b",
-    "falcon-7b-instruct": "togethercomputer/falcon-7b-instruct",
-    "falcon-40b": "togethercomputer/falcon-40b",
-    "falcon-40b-instruct": "togethercomputer/falcon-40b-instruct",
-    "llama-7b": "huggyllama/llama-7b",
-    "llama-13b": "huggyllama/llama-13b",
-    "llama-30b": "huggyllama/llama-30b",
-    "llama-65b": "huggyllama/llama-65b",
-    "llama-2-7b": "togethercomputer/llama-2-7b",
-    "llama-2-13b": "togethercomputer/llama-2-13b",
-    "llama-2-70b": "togethercomputer/llama-2-70b",
-    "mistral-7b-v0.1": "mistralai/Mistral-7B-v0.1",
-    "mpt-7b": "togethercomputer/mpt-7b",
-    "mpt-instruct-7b": "togethercomputer/mpt-7b-instruct",
-    "mpt-30b": "togethercomputer/mpt-30b",
-    "mpt-instruct-30b": "togethercomputer/mpt-30b-instruct",
-    "pythia-1b-v0": "EleutherAI/pythia-1b-v0",
-    "pythia-2.8b-v0": "EleutherAI/pythia-2.8b-v0",
-    "pythia-6.9b": "EleutherAI/pythia-6.9b",
-    "pythia-12b-v0": "EleutherAI/pythia-12b-v0",
-    "stablelm-base-alpha-3b": "stabilityai/stablelm-base-alpha-3b",
-    "stablelm-base-alpha-7b": "stabilityai/stablelm-base-alpha-7b",
-    "vicuna-7b-v1.3": "lmsys/vicuna-7b-v1.3",
-    "vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3",
-}
-"""Together model name aliases.
-HELM users use a shorter model name (e.g. together/flan-t5-xxl)
-whereas the Together client sends and caches requests using
-a longer model name that is suffixed with the implementation framework
-(e.g. flan-t5-xxl-hf). This allows trackcing exactly which
-implementation was used in the cached results, since some results may
-be different depending on the implementation (e.g. efficiency metrics).
-This also allows future migration of results in the case of changes of
-available implementations on Together."""
 class _RewriteRequestTags:
     """Tags that indicate that the request for the model must be rewritten before sending to Together."""
@@ -105,6 +52,10 @@ The keys are the model engine of the HELM model name (e.g. "alpaca-7b"), not the
 (e.g. "stanford/alpaca-7b") or the Together model name (e.g. "togethercomputer/alpaca-7b")."""
+TOGETHER_SUPPORTS_ASYNC_REQUESTS = False
+"""Whether Together AI currently supports asynchronous requests."""
 def _rewrite_raw_request_for_model_tags(raw_request: Dict[str, Any], model_engine: str) -> Dict[str, Any]:
     """Rewrite the raw request given the model."""
     # Make a deepcopy to avoid mutating the input in unexpected ways
@@ -146,43 +97,41 @@ class TogetherClient(CachingClient):
     INFERENCE_ENDPOINT: str = "https://api.together.xyz/api/inference"
     RETRIEVE_JOB_MAX_WAIT_SECONDS: int = 60
-    @staticmethod
-    def convert_to_raw_request(request: Request) -> Dict:
+    def convert_to_raw_request(self, request: Request) -> Dict:
         # Following the examples from https://github.com/togethercomputer/open-models-api
         raw_request = {
             "request_type": "language-model-inference",
-            "model": MODEL_ALIASES.get(request.model_engine, request.model_engine),
+            "model": self.together_model or request.model,
             "prompt": request.prompt,
             "temperature": request.temperature,
             "n": request.num_completions,
             "max_tokens": request.max_tokens,
             "best_of": request.top_k_per_token,
-            "logprobs": request.top_k_per_token,
             "stop": request.stop_sequences or None,
             "echo": request.echo_prompt,
             "top_p": request.top_p,
         }
         return _rewrite_raw_request_for_model_tags(raw_request, request.model_engine)
-    def __init__(self, tokenizer: Tokenizer, cache_config: CacheConfig, api_key: Optional[str] = None):
-        super().__init__(cache_config=cache_config, tokenizer=tokenizer)
+    def __init__(self, cache_config: CacheConfig, together_model: Optional[str] = None, api_key: Optional[str] = None):
+        super().__init__(cache_config=cache_config)
         # TODO: the endpoint currently doesn't require an API key. When an API key is not specified
         #       in credentials.conf, we rely on offline evaluation only.
         self.api_key: Optional[str] = api_key
+        self.together_model = together_model
     def _get_job_url(self, job_id: str) -> str:
         return f"https://api.together.xyz/jobs/job/{job_id}"
     def make_request(self, request: Request) -> RequestResult:
-        raw_request = TogetherClient.convert_to_raw_request(request)
-        cache_key: Dict = CachingClient.make_cache_key(raw_request, request)
+        raw_request = self.convert_to_raw_request(request)
+        cache_key = CachingClient.make_cache_key(raw_request, request)
         if not self.api_key:
             raise TogetherClientError("togetherApiKey not set in credentials.conf")
         headers: Dict[str, str] = {"Authorization": f"Bearer {self.api_key}"}
-        # TODO: Remove synchronous branch.
-        if request.model_engine in MODEL_ALIASES:
+        if TOGETHER_SUPPORTS_ASYNC_REQUESTS:
             def submit_job() -> str:
                 submit_request = {**raw_request, "async": True}
@@ -271,7 +220,7 @@ class TogetherClient(CachingClient):
                 )
         # Expect the result to be structured the same way as a response from OpenAI API.
-        completions: List[Sequence] = []
+        completions: List[GeneratedOutput] = []
         for raw_completion in response["choices"]:
             sequence_logprob = 0
             tokens: List[Token] = []
@@ -281,22 +230,20 @@ class TogetherClient(CachingClient):
             # Waiting for a fix.
             if "logprobs" in raw_completion:
                 raw_data = raw_completion["logprobs"]
-                for text, logprob, top_logprobs in zip(
-                    raw_data["tokens"], raw_data["token_logprobs"], raw_data["top_logprobs"]
-                ):
+                for text, logprob in zip(raw_data["tokens"], raw_data["token_logprobs"]):
                     # TODO #1654: Check if this is still needed
                     text = cleanup_str(text, "together")
-                    tokens.append(Token(text=text, logprob=logprob or 0, top_logprobs=dict(top_logprobs or {})))
+                    tokens.append(Token(text=text, logprob=logprob or 0))
                     sequence_logprob += logprob or 0
             else:
                 # hack: just make the entire text one token so that something shows up in the frontend
                 text = cleanup_str(raw_completion["text"], "together")
-                tokens.append(Token(text=text, logprob=0, top_logprobs={}))
+                tokens.append(Token(text=text, logprob=0))
             raw_finish_reason: Optional[str] = raw_completion.get("finish_reason")
             finish_reason: Optional[Dict] = {"reason": raw_finish_reason} if raw_finish_reason else None
-            completion = Sequence(
+            completion = GeneratedOutput(
                 text=cleanup_str(raw_completion["text"], "together"),
                 logprob=sequence_logprob,
                 tokens=tokens,

crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl