PyPI - crfm-helm - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

crfm-helm 0.4.0py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (499) hide show

helm/clients/simple_client.py ADDED Viewed

@@ -0,0 +1,64 @@
+import itertools
+from typing import List, TypedDict
+from typing import Dict, Any
+from helm.common.cache import CacheConfig
+from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput, Token
+from helm.clients.client import CachingClient
+class SimpleClientRequest(TypedDict):
+    engine: str
+    prompt: str
+    num_completions: int
+class SimpleClient(CachingClient):
+    """Simple client for tutorials and for debugging."""
+    def __init__(self, cache_config: CacheConfig):
+        super().__init__(cache_config=cache_config)
+    def make_request(self, request: Request) -> RequestResult:
+        raw_request: SimpleClientRequest = {
+            "engine": request.model_engine,
+            "prompt": request.prompt,
+            "num_completions": request.num_completions,
+        }
+        def do_it() -> Dict[str, Any]:
+            return self.invoke_model(raw_request)
+        cache_key = CachingClient.make_cache_key(raw_request, request)
+        response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
+        logprob = 0
+        completions = [
+            GeneratedOutput(
+                text=text,
+                logprob=logprob,
+                tokens=[Token(text=text, logprob=logprob)],
+            )
+            for text in response["completions"]
+        ]
+        return RequestResult(
+            success=True,
+            cached=cached,
+            request_time=response["request_time"],
+            request_datetime=response.get("request_datetime"),
+            completions=completions,
+            embedding=[],
+        )
+    def invoke_model(self, raw_request: SimpleClientRequest) -> Dict[str, Any]:
+        """
+        Example:
+        Prompt: 7 2 4 6
+        Completions (num_completions = 3):
+        - 6
+        - 4
+        - 2
+        """
+        prompt_words: List[str] = raw_request["prompt"].split()
+        completions = list(itertools.islice(itertools.cycle(reversed(prompt_words)), raw_request["num_completions"]))
+        return {"completions": completions}

helm/{proxy/clients → clients}/test_auto_client.py RENAMED Viewed

@@ -1,12 +1,13 @@
 import dataclasses
 from tempfile import TemporaryDirectory
-from helm.common.request import Sequence, Token
+from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
+from helm.common.request import GeneratedOutput, Token
 import pytest
 from helm.common.request import Request, RequestResult
 from helm.common.general import get_credentials
-from helm.proxy.clients.auto_client import AutoClient
+from helm.clients.auto_client import AutoClient
 @pytest.mark.models
@@ -15,8 +16,8 @@ class TestAutoClient:
         credentials = get_credentials()
         if not credentials:
             pytest.skip("Skipping test because no credentials found")
-        with TemporaryDirectory() as cache_path:
-            auto_client = AutoClient(credentials, cache_path)
+        with TemporaryDirectory() as temp_dir_path:
+            auto_client = AutoClient(credentials, temp_dir_path, BlackHoleCacheBackendConfig())
             actual_result = auto_client.make_request(request)
             assert actual_result.request_time or actual_result.batch_request_time
             actual_result = dataclasses.replace(
@@ -36,32 +37,29 @@ class TestAutoClient:
             success=True,
             embedding=[],
             completions=[
-                Sequence(
+                GeneratedOutput(
                     text=" intelligent species on the planet. They are also one",
                     logprob=-9.087313510477543,
                     tokens=[
                         Token(
                             text="Ġintelligent",
                             logprob=-1.9816237688064575,
-                            top_logprobs={"Ġintelligent": -1.9816237688064575},
                         ),
                         Token(
                             text="Ġspecies",
                             logprob=-1.2881066799163818,
-                            top_logprobs={"Ġspecies": -1.2881066799163818},
                         ),
-                        Token(text="Ġon", logprob=-0.16092979907989502, top_logprobs={"Ġon": -0.16092979907989502}),
-                        Token(text="Ġthe", logprob=-0.23620447516441345, top_logprobs={"Ġthe": -0.23620447516441345}),
+                        Token(text="Ġon", logprob=-0.16092979907989502),
+                        Token(text="Ġthe", logprob=-0.23620447516441345),
                         Token(
                             text="Ġplanet",
                             logprob=-0.015416033565998077,
-                            top_logprobs={"Ġplanet": -0.015416033565998077},
                         ),
-                        Token(text=".", logprob=-0.6683081388473511, top_logprobs={".": -0.6683081388473511}),
-                        Token(text="ĠThey", logprob=-1.9231040477752686, top_logprobs={"ĠThey": -1.9231040477752686}),
-                        Token(text="Ġare", logprob=-0.9322243332862854, top_logprobs={"Ġare": -0.9322243332862854}),
-                        Token(text="Ġalso", logprob=-0.7750787138938904, top_logprobs={"Ġalso": -0.7750787138938904}),
-                        Token(text="Ġone", logprob=-1.1063175201416016, top_logprobs={"Ġone": -1.1063175201416016}),
+                        Token(text=".", logprob=-0.6683081388473511),
+                        Token(text="ĠThey", logprob=-1.9231040477752686),
+                        Token(text="Ġare", logprob=-0.9322243332862854),
+                        Token(text="Ġalso", logprob=-0.7750787138938904),
+                        Token(text="Ġone", logprob=-1.1063175201416016),
                     ],
                     finish_reason={"reason": "length"},
                 )

helm/clients/test_client.py ADDED Viewed

@@ -0,0 +1,100 @@
+from helm.common.cache import BlackHoleCacheConfig
+from helm.tokenizers.huggingface_tokenizer import HuggingFaceTokenizer
+from .client import truncate_sequence, truncate_and_tokenize_response_text
+from typing import List
+from helm.common.request import Request, GeneratedOutput, Token
+def truncate_sequence_helper(tokens: List[str], request: Request, expected_tokens: List[str]):
+    sequence = GeneratedOutput(
+        text="".join(tokens),
+        tokens=[Token(text=text, logprob=-1) for text in tokens],
+        logprob=-len(tokens),
+    )
+    output_sequence = truncate_sequence(sequence, request)
+    assert expected_tokens == [token.text for token in output_sequence.tokens]
+    assert "".join(expected_tokens) == output_sequence.text
+    assert output_sequence.logprob == sum(token.logprob for token in output_sequence.tokens)
+def test_truncate_sequence():
+    # echo_prompt = True, nothing gets truncated
+    truncate_sequence_helper(
+        ["a", "b", "c"],
+        Request(
+            model="openai/text-davinci-002", model_deployment="openai/text-davinci-002", prompt="abc", echo_prompt=True
+        ),
+        ["a", "b", "c"],
+    )
+    # Nothing gets truncated
+    truncate_sequence_helper(
+        ["hello", " world"],
+        Request(model="openai/text-davinci-002", model_deployment="openai/text-davinci-002", stop_sequences=["#"]),
+        ["hello", " world"],
+    )
+    # Truncate using stop sequences
+    truncate_sequence_helper(
+        ["hello", " world", "\n", "what"],
+        Request(model="openai/text-davinci-002", model_deployment="openai/text-davinci-002", stop_sequences=["\n"]),
+        ["hello", " world"],
+    )
+    # Truncate using max tokens
+    truncate_sequence_helper(
+        ["a", "b", "c"],
+        Request(model="openai/text-davinci-002", model_deployment="openai/text-davinci-002", max_tokens=2),
+        ["a", "b"],
+    )
+def test_truncate_and_tokenize_response_text():
+    tokenizer = HuggingFaceTokenizer(BlackHoleCacheConfig())
+    tokenizer_name = "huggingface/gpt2"
+    # No truncation
+    response = truncate_and_tokenize_response_text(
+        "I am a scientist. I am a scientist.", Request(max_tokens=100, stop_sequences=[]), tokenizer, tokenizer_name
+    )
+    assert response.finish_reason
+    assert response.finish_reason["reason"] == "endoftext"
+    assert response.text == "I am a scientist. I am a scientist."
+    assert response.tokens == [
+        Token("I", 0.0),
+        Token(" am", 0.0),
+        Token(" a", 0.0),
+        Token(" scientist", 0.0),
+        Token(".", 0.0),
+        Token(" I", 0.0),
+        Token(" am", 0.0),
+        Token(" a", 0.0),
+        Token(" scientist", 0.0),
+        Token(".", 0.0),
+    ]
+    response = truncate_and_tokenize_response_text(
+        "I am a scientist. I am a scientist.", Request(max_tokens=7, stop_sequences=["."]), tokenizer, tokenizer_name
+    )
+    assert response.finish_reason
+    assert response.finish_reason["reason"] == "stop"
+    assert response.text == "I am a scientist"
+    assert response.tokens == [Token("I", 0.0), Token(" am", 0.0), Token(" a", 0.0), Token(" scientist", 0.0)]
+    response = truncate_and_tokenize_response_text(
+        "I am a scientist. I am a scientist.", Request(max_tokens=3, stop_sequences=[]), tokenizer, tokenizer_name
+    )
+    assert response.finish_reason
+    assert response.finish_reason["reason"] == "length"
+    assert response.text == "I am a"
+    assert response.tokens == [Token("I", 0.0), Token(" am", 0.0), Token(" a", 0.0)]
+    response = truncate_and_tokenize_response_text(
+        "I am a scientist. I am a scientist.", Request(max_tokens=3, stop_sequences=["."]), tokenizer, tokenizer_name
+    )
+    assert response.finish_reason
+    assert response.finish_reason["reason"] == "length"
+    assert response.text == "I am a"
+    assert response.tokens == [Token("I", 0.0), Token(" am", 0.0), Token(" a", 0.0)]

helm/{proxy/clients → clients}/test_huggingface_client.py RENAMED Viewed

@@ -1,31 +1,24 @@
-import os
 import pytest
-import tempfile
-from helm.common.cache import SqliteCacheConfig
+from helm.common.cache import BlackHoleCacheConfig
 from helm.common.request import Request, RequestResult
-from .huggingface_client import HuggingFaceClient
+from helm.clients.huggingface_client import HuggingFaceClient
 class TestHuggingFaceClient:
-    def setup_method(self, method):
-        cache_file = tempfile.NamedTemporaryFile(delete=False)
-        self.cache_path: str = cache_file.name
-        self.client = HuggingFaceClient(cache_config=SqliteCacheConfig(self.cache_path))
-    def teardown_method(self, method):
-        os.remove(self.cache_path)
     def test_gpt2(self):
+        client = HuggingFaceClient(
+            cache_config=BlackHoleCacheConfig(), pretrained_model_name_or_path="openai-community/gpt2"
+        )
         prompt: str = "I am a computer scientist."
-        result: RequestResult = self.client.make_request(
+        result: RequestResult = client.make_request(
             Request(
                 model="openai/gpt2",
                 model_deployment="huggingface/gpt2",
                 prompt=prompt,
                 num_completions=3,
                 top_k_per_token=5,
-                max_tokens=0,
+                max_tokens=1,
                 echo_prompt=True,
             )
         )
@@ -36,7 +29,10 @@ class TestHuggingFaceClient:
     @pytest.mark.skip(reason="GPT-J 6B is 22 GB and extremely slow without a GPU.")
     def test_gptj_6b(self):
-        result: RequestResult = self.client.make_request(
+        client = HuggingFaceClient(
+            cache_config=BlackHoleCacheConfig(), pretrained_model_name_or_path="openai-community/gpt2"
+        )
+        result: RequestResult = client.make_request(
             Request(
                 model="eleutherai/gpt-j-6b",
                 model_deployment="huggingface/gpt-j-6b",
@@ -49,8 +45,11 @@ class TestHuggingFaceClient:
         assert len(result.completions) == 3
     def test_logprob(self):
+        client = HuggingFaceClient(
+            cache_config=BlackHoleCacheConfig(), pretrained_model_name_or_path="openai-community/gpt2"
+        )
         prompt: str = "I am a computer scientist."
-        result: RequestResult = self.client.make_request(
+        result: RequestResult = client.make_request(
             Request(
                 model="openai/gpt2",
                 model_deployment="huggingface/gpt2",

helm/clients/test_simple_client.py ADDED Viewed

@@ -0,0 +1,19 @@
+from helm.clients.simple_client import SimpleClient
+from helm.common.cache import BlackHoleCacheConfig
+from helm.common.request import GeneratedOutput, Request, Token
+def test_simple_client_make_request():
+    client = SimpleClient(BlackHoleCacheConfig())
+    request = Request(
+        model="simple/model1",
+        model_deployment="simple/model1",
+        prompt="Elephants are one of the most",
+        temperature=0.0,
+        max_tokens=10,
+    )
+    result = client.make_request(request)
+    assert result.success
+    assert not result.cached
+    assert result.embedding == []
+    assert result.completions == [GeneratedOutput(text="most", logprob=0, tokens=[Token(text="most", logprob=0)])]

helm/{proxy/clients → clients}/test_together_client.py RENAMED Viewed

@@ -12,15 +12,15 @@ class TestTogetherClient:
     def setup_method(self, method):
         cache_file = tempfile.NamedTemporaryFile(delete=False)
         self.cache_path: str = cache_file.name
-        self.client = TogetherClient(cache_config=SqliteCacheConfig(self.cache_path))
     def teardown_method(self, method):
         os.remove(self.cache_path)
     @pytest.mark.parametrize(
-        "test_input,expected",
+        "together_model,test_input,expected",
         [
             (
+                "togethercomputer/RedPajama-INCITE-Base-3B-v1",
                 Request(
                     model="together/redpajama-incite-base-3b-v1",
                     model_deployment="together/redpajama-incite-base-3b-v1",
@@ -28,7 +28,6 @@ class TestTogetherClient:
                 {
                     "best_of": 1,
                     "echo": False,
-                    "logprobs": 1,
                     "max_tokens": 100,
                     "model": "togethercomputer/RedPajama-INCITE-Base-3B-v1",
                     "n": 1,
@@ -40,6 +39,7 @@ class TestTogetherClient:
                 },
             ),
             (
+                "huggyllama/llama-7b",
                 Request(
                     model="meta/llama-7b",
                     model_deployment="together/llama-7b",
@@ -55,7 +55,6 @@ class TestTogetherClient:
                 {
                     "best_of": 3,
                     "echo": True,
-                    "logprobs": 3,
                     "max_tokens": 24,
                     "model": "huggyllama/llama-7b",
                     "n": 4,
@@ -67,6 +66,7 @@ class TestTogetherClient:
                 },
             ),
             (
+                "togethercomputer/alpaca-7b",
                 Request(
                     model="stanford/alpaca-7b",
                     model_deployment="together/alpaca-7b",
@@ -75,7 +75,6 @@ class TestTogetherClient:
                 {
                     "best_of": 1,
                     "echo": False,
-                    "logprobs": 1,
                     "max_tokens": 100,
                     "model": "togethercomputer/alpaca-7b",
                     "n": 1,
@@ -89,9 +88,22 @@ class TestTogetherClient:
             # TODO(#1828): Add test for `SET_DETAILS_TO_TRUE` after Together supports it.
         ],
     )
-    def test_convert_to_raw_request(self, test_input, expected):
-        assert expected == TogetherClient.convert_to_raw_request(test_input)
+    def test_convert_to_raw_request(self, together_model, test_input, expected):
+        client = TogetherClient(
+            cache_config=SqliteCacheConfig(self.cache_path),
+            together_model=together_model,
+        )
+        assert expected == client.convert_to_raw_request(test_input)
     def test_api_key_error(self):
+        client = TogetherClient(
+            cache_config=SqliteCacheConfig(self.cache_path),
+            together_model="togethercomputer/RedPajama-INCITE-Base-3B-v1",
+        )
         with pytest.raises(TogetherClientError):
-            self.client.make_request(Request(model="bigscience/bloom", model_deployment="together/bloom"))
+            client.make_request(
+                Request(
+                    model="together/redpajama-incite-base-3b-v1",
+                    model_deployment="together/redpajama-incite-base-3b-v1",
+                )
+            )

helm/{proxy/clients → clients}/together_client.py RENAMED Viewed

@@ -1,69 +1,20 @@
 from copy import deepcopy
-from typing import List, Dict, Any, Optional, Union
+from itertools import zip_longest
+from typing import List, Dict, Any, Optional, TypedDict, Union
 import requests
 from retrying import retry
 from helm.common.cache import CacheConfig
-from helm.common.request import wrap_request_time, Request, RequestResult, Sequence, Token
-from .client import CachingClient, truncate_sequence, cleanup_str
-MODEL_ALIASES: Dict[str, str] = {
-    # Legacy models
-    "flan-t5-xxl": "flan-t5-xxl-hf",
-    "h3-2.7b": "h3-2.7b-h3",
-    "opt-1.3b": "opt-1.3b-ft-tp1",
-    "opt-6.7b": "opt-6.7b-ft-tp1",
-    "mpt-7b": "togethercomputer/mpt-7b",
-    "mpt-instruct-7b": "togethercomputer/mpt-7b-instruct",
-    "stablelm-base-alpha-3b": "stabilityai/stablelm-base-alpha-3b",
-    "stablelm-base-alpha-7b": "stabilityai/stablelm-base-alpha-7b",
-    # Production models
-    "redpajama-incite-base-3b-v1": "togethercomputer/RedPajama-INCITE-Base-3B-v1",
-    "redpajama-incite-instruct-3b-v1": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1",
-    "redpajama-incite-base-7b": "togethercomputer/RedPajama-INCITE-7B-Base",
-    "redpajama-incite-instruct-7b": "togethercomputer/RedPajama-INCITE-7B-Instruct",
-    "alpaca-7b": "togethercomputer/alpaca-7b",
-    "dolly-v2-3b": "databricks/dolly-v2-3b",
-    "dolly-v2-7b": "databricks/dolly-v2-7b",
-    "dolly-v2-12b": "databricks/dolly-v2-12b",
-    "falcon-7b": "togethercomputer/falcon-7b",
-    "falcon-7b-instruct": "togethercomputer/falcon-7b-instruct",
-    "falcon-40b": "togethercomputer/falcon-40b",
-    "falcon-40b-instruct": "togethercomputer/falcon-40b-instruct",
-    "gpt-jt-6b-v1": "togethercomputer/GPT-JT-6B-v1",
-    "gpt-neoxt-chat-base-20b": "togethercomputer/GPT-NeoXT-Chat-Base-20B",
-    "llama-7b": "huggyllama/llama-7b",
-    "llama-13b": "huggyllama/llama-13b",
-    "llama-30b": "huggyllama/llama-30b",
-    "llama-65b": "huggyllama/llama-65b",
-    "llama-2-7b": "togethercomputer/llama-2-7b",
-    "llama-2-13b": "togethercomputer/llama-2-13b",
-    "llama-2-70b": "togethercomputer/llama-2-70b",
-    "mistral-7b-v0.1": "mistralai/Mistral-7B-v0.1",
-    "mixtral-8x7b-32kseqlen": "mistralai/mixtral-8x7b-32kseqlen",
-    "mpt-30b": "togethercomputer/mpt-30b",
-    "mpt-instruct-30b": "togethercomputer/mpt-30b-instruct",
-    "pythia-1b-v0": "EleutherAI/pythia-1b-v0",
-    "pythia-2.8b-v0": "EleutherAI/pythia-2.8b-v0",
-    "pythia-6.9b": "EleutherAI/pythia-6.9b",
-    "pythia-12b-v0": "EleutherAI/pythia-12b-v0",
-    "vicuna-7b-v1.3": "lmsys/vicuna-7b-v1.3",
-    "vicuna-13b-v1.3": "lmsys/vicuna-13b-v1.3",
-    "yi-6b": "zero-one-ai/Yi-6B",
-    "yi-34b": "zero-one-ai/Yi-34B",
-}
-"""Together model name aliases.
+from helm.common.optional_dependencies import handle_module_not_found_error
+from helm.common.request import wrap_request_time, Request, RequestResult, GeneratedOutput, Token
+from helm.clients.client import CachingClient, truncate_sequence, cleanup_str
-HELM users use a shorter model name (e.g. together/flan-t5-xxl)
-whereas the Together client sends and caches requests using
-a longer model name that is suffixed with the implementation framework
-(e.g. flan-t5-xxl-hf). This allows tracking exactly which
-implementation was used in the cached results, since some results may
-be different depending on the implementation (e.g. efficiency metrics).
-This also allows future migration of results in the case of changes of
-available implementations on Together."""
+try:
+    from together import Together
+    from together.types import ChatCompletionResponse
+except ModuleNotFoundError as e:
+    handle_module_not_found_error(e, ["together"])
 class _RewriteRequestTags:
@@ -154,36 +105,35 @@ class TogetherClient(CachingClient):
     INFERENCE_ENDPOINT: str = "https://api.together.xyz/api/inference"
     RETRIEVE_JOB_MAX_WAIT_SECONDS: int = 60
-    @staticmethod
-    def convert_to_raw_request(request: Request) -> Dict:
+    def convert_to_raw_request(self, request: Request) -> Dict:
         # Following the examples from https://github.com/togethercomputer/open-models-api
         raw_request = {
             "request_type": "language-model-inference",
-            "model": MODEL_ALIASES.get(request.model_engine, request.model_engine),
+            "model": self.together_model or request.model,
             "prompt": request.prompt,
             "temperature": request.temperature,
             "n": request.num_completions,
             "max_tokens": request.max_tokens,
             "best_of": request.top_k_per_token,
-            "logprobs": request.top_k_per_token,
             "stop": request.stop_sequences or None,
             "echo": request.echo_prompt,
             "top_p": request.top_p,
         }
         return _rewrite_raw_request_for_model_tags(raw_request, request.model_engine)
-    def __init__(self, cache_config: CacheConfig, api_key: Optional[str] = None):
+    def __init__(self, cache_config: CacheConfig, together_model: Optional[str] = None, api_key: Optional[str] = None):
         super().__init__(cache_config=cache_config)
         # TODO: the endpoint currently doesn't require an API key. When an API key is not specified
         #       in credentials.conf, we rely on offline evaluation only.
         self.api_key: Optional[str] = api_key
+        self.together_model = together_model
     def _get_job_url(self, job_id: str) -> str:
         return f"https://api.together.xyz/jobs/job/{job_id}"
     def make_request(self, request: Request) -> RequestResult:
-        raw_request = TogetherClient.convert_to_raw_request(request)
-        cache_key: Dict = CachingClient.make_cache_key(raw_request, request)
+        raw_request = self.convert_to_raw_request(request)
+        cache_key = CachingClient.make_cache_key(raw_request, request)
         if not self.api_key:
             raise TogetherClientError("togetherApiKey not set in credentials.conf")
@@ -278,7 +228,7 @@ class TogetherClient(CachingClient):
                 )
         # Expect the result to be structured the same way as a response from OpenAI API.
-        completions: List[Sequence] = []
+        completions: List[GeneratedOutput] = []
         for raw_completion in response["choices"]:
             sequence_logprob = 0
             tokens: List[Token] = []
@@ -288,22 +238,20 @@ class TogetherClient(CachingClient):
             # Waiting for a fix.
             if "logprobs" in raw_completion:
                 raw_data = raw_completion["logprobs"]
-                for text, logprob, top_logprobs in zip(
-                    raw_data["tokens"], raw_data["token_logprobs"], raw_data["top_logprobs"]
-                ):
+                for text, logprob in zip(raw_data["tokens"], raw_data["token_logprobs"]):
                     # TODO #1654: Check if this is still needed
                     text = cleanup_str(text, "together")
-                    tokens.append(Token(text=text, logprob=logprob or 0, top_logprobs=dict(top_logprobs or {})))
+                    tokens.append(Token(text=text, logprob=logprob or 0))
                     sequence_logprob += logprob or 0
             else:
                 # hack: just make the entire text one token so that something shows up in the frontend
                 text = cleanup_str(raw_completion["text"], "together")
-                tokens.append(Token(text=text, logprob=0, top_logprobs={}))
+                tokens.append(Token(text=text, logprob=0))
             raw_finish_reason: Optional[str] = raw_completion.get("finish_reason")
             finish_reason: Optional[Dict] = {"reason": raw_finish_reason} if raw_finish_reason else None
-            completion = Sequence(
+            completion = GeneratedOutput(
                 text=cleanup_str(raw_completion["text"], "together"),
                 logprob=sequence_logprob,
                 tokens=tokens,
@@ -332,3 +280,86 @@ class TogetherClient(CachingClient):
                 completions=completions,
                 embedding=[],
             )
+class TogetherRawChatRequest(TypedDict):
+    messages: List[Dict[str, str]]
+    model: str
+    max_tokens: int
+    stop: List[str]
+    temperature: float
+    top_p: float
+    top_k: int
+    logprobs: int
+    echo: bool
+    n: int
+def convert_to_raw_chat_request(request: Request) -> TogetherRawChatRequest:
+    if request.messages:
+        messages = request.messages
+    else:
+        messages = [{"role": "user", "content": request.prompt}]
+    return {
+        "messages": messages,
+        "model": request.model,
+        "max_tokens": request.max_tokens,
+        "stop": request.stop_sequences,
+        "temperature": request.temperature,
+        "top_p": request.top_p,
+        "top_k": request.top_k_per_token,
+        "logprobs": min(request.top_k_per_token, 1),
+        "echo": request.echo_prompt,
+        "n": request.num_completions,
+    }
+class TogetherChatClient(CachingClient):
+    """Client that uses the Python Together library for chat models."""
+    def __init__(self, cache_config: CacheConfig, api_key: str, together_model: Optional[str] = None):
+        super().__init__(cache_config=cache_config)
+        self._client = Together(api_key=api_key)
+    def make_request(self, request: Request) -> RequestResult:
+        raw_request = convert_to_raw_chat_request(request)
+        cache_key = CachingClient.make_cache_key(raw_request, request)
+        def do_it() -> Dict[Any, Any]:
+            response = self._client.chat.completions.create(**raw_request)
+            return response.model_dump(mode="json")
+        try:
+            raw_response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
+            response = ChatCompletionResponse.model_validate(raw_response)
+        except Exception as error:
+            return RequestResult(
+                success=False,
+                cached=False,
+                error=str(error),
+                completions=[],
+                embedding=[],
+            )
+        generated_outputs: List[GeneratedOutput] = []
+        for choice in response.choices:
+            # NOTE: Together always returns None for choice.finish_reason
+            # NOTE: Together does not return logprobs for the whole generated output, only for individual tokens
+            tokens: List[Token] = []
+            if choice.logprobs:
+                for token_text, token_logprob in zip_longest(
+                    choice.logprobs.tokens or [], choice.logprobs.token_logprobs or []
+                ):
+                    if token_text is None:
+                        break
+                    tokens.append(Token(text=token_text, logprob=token_logprob or 0.0))
+            assert choice.message.role == "assistant"
+            generated_outputs.append(GeneratedOutput(text=choice.message.content, logprob=0.0, tokens=tokens))
+        return RequestResult(
+            success=True,
+            cached=cached,
+            request_time=raw_response["request_time"],
+            request_datetime=raw_response["request_datetime"],
+            completions=generated_outputs,
+            embedding=[],
+        )

crfm-helm 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

Potentially problematic release.

crfm-helm 0.4.0py3-none-any.whl → 0.5.1py3-none-any.whl