PyPI - crfm-helm - Versions diffs - 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (546) hide show

helm/benchmark/window_services/test_mt_nlg_window_service.py DELETED Viewed

@@ -1,48 +0,0 @@
-import shutil
-import tempfile
-from .test_utils import get_tokenizer_service, TEST_PROMPT, GPT2_TEST_TOKENS, GPT2_TEST_TOKEN_IDS
-from .tokenizer_service import TokenizerService
-from .window_service_factory import WindowServiceFactory
-class TestMTNLGWindowService:
-    def setup_method(self):
-        self.path: str = tempfile.mkdtemp()
-        service: TokenizerService = get_tokenizer_service(self.path)
-        self.window_service = WindowServiceFactory.get_window_service("microsoft/TNLGv2_7B", service)
-    def teardown_method(self, method):
-        shutil.rmtree(self.path)
-    def test_max_request_length(self):
-        assert self.window_service.max_request_length == 2048
-    def test_encode(self):
-        assert self.window_service.encode(TEST_PROMPT).token_values == GPT2_TEST_TOKEN_IDS
-    def test_decode(self):
-        assert self.window_service.decode(self.window_service.encode(TEST_PROMPT).tokens) == TEST_PROMPT
-    def test_tokenize(self):
-        assert self.window_service.tokenize(TEST_PROMPT) == GPT2_TEST_TOKENS
-    def test_fits_within_context_window(self):
-        # Should fit in the context window since we subtracted the number of tokens of the test prompt
-        # from the max request length of 2048
-        assert self.window_service.fits_within_context_window(TEST_PROMPT, 2048 - 51)
-        # Should not fit within the max request length because we're expecting one more extra token in the completion
-        assert not self.window_service.fits_within_context_window(TEST_PROMPT, 2048 - 51 + 1)
-    def test_truncate_from_right(self):
-        # Create a prompt that exceed max context length: 51 * 41 = 2091 tokens
-        long_prompt: str = TEST_PROMPT * 41
-        assert not self.window_service.fits_within_context_window(long_prompt)
-        # Truncate and ensure it fits within the context window
-        truncated_long_prompt: str = self.window_service.truncate_from_right(long_prompt)
-        assert self.window_service.get_num_tokens(truncated_long_prompt) == 2048
-        assert self.window_service.fits_within_context_window(truncated_long_prompt)
-    def test_tokenize_and_count(self):
-        assert self.window_service.get_num_tokens(TEST_PROMPT) == 51

helm/benchmark/window_services/ul2_window_service.py DELETED Viewed

@@ -1,30 +0,0 @@
-from .encoder_decoder_window_service import EncoderDecoderWindowService
-from .tokenizer_service import TokenizerService
-class UL2WindowService(EncoderDecoderWindowService):
-    def __init__(self, service: TokenizerService):
-        super().__init__(service)
-    @property
-    def max_sequence_length(self) -> int:
-        """Return the max sequence length."""
-        # From https://arxiv.org/pdf/2205.05131.pdf, "the sequence length is set to 512/512 for inputs and targets".
-        # We subtract 1 to account for <extra_id_0> that gets appended to prompts.
-        return 512 - 1
-    @property
-    def end_of_text_token(self) -> str:
-        """The end of text token."""
-        return "</s>"
-    @property
-    def tokenizer_name(self) -> str:
-        """Name of the tokenizer to use when sending a request."""
-        return "google/ul2"
-    @property
-    def prefix_token(self) -> str:
-        """The prefix token is the same as the end of text token."""
-        # echo=True is not supported
-        return ""

helm/benchmark/window_services/wider_ai21_window_service.py DELETED Viewed

@@ -1,24 +0,0 @@
-from .ai21_window_service import AI21WindowService
-class WiderAI21WindowService(AI21WindowService):
-    @property
-    def max_sequence_length(self) -> int:
-        """
-        Return the max sequence length of the larger AI21 Jurassic-2 models.
-        The AI21 server automatically prepends a token to every prompt,
-        so the actual max sequence length is 8192 - 1 = 8191.
-        """
-        return 8191
-class AI21Jurassic2JumboWindowService(AI21WindowService):
-    @property
-    def max_sequence_length(self) -> int:
-        """
-        Return the max sequence length of the AI21 Jurassic-2 Jumbo.
-        AI21 has recommended using a sequence length of 6000 tokens to avoid OOMs.
-        """
-        return 6000

helm/benchmark/window_services/wider_openai_window_service.py DELETED Viewed

@@ -1,52 +0,0 @@
-from .gpt2_window_service import GPT2WindowService
-class WiderOpenAIWindowService(GPT2WindowService):
-    @property
-    def max_sequence_length(self) -> int:
-        """Return the max sequence length of the larger second-generation OpenAI models.
-        Source: https://platform.openai.com/docs/models"""
-        return 4000
-class OpenAIChatWindowService(WiderOpenAIWindowService):
-    @property
-    def tokenizer_name(self) -> str:
-        return "openai/cl100k_base"
-class GPTTurboWindowService(OpenAIChatWindowService):
-    @property
-    def max_sequence_length(self) -> int:
-        """Return the max sequence length for GPT-3.5 Turbo.
-        Source: https://platform.openai.com/docs/models"""
-        return 4000
-class GPTTurbo16KWindowService(OpenAIChatWindowService):
-    @property
-    def max_sequence_length(self) -> int:
-        """Return the max sequence length for GPT-3.5 Turbo.
-        Source: https://platform.openai.com/docs/models"""
-        return 16000
-class GPT4WindowService(OpenAIChatWindowService):
-    @property
-    def max_sequence_length(self) -> int:
-        """Return the max sequence length for GPT-4.
-        Source: https://platform.openai.com/docs/models"""
-        return 8192
-class GPT432KWindowService(OpenAIChatWindowService):
-    @property
-    def max_sequence_length(self) -> int:
-        """Return the max sequence length for GPT-4.
-        Source: https://platform.openai.com/docs/models"""
-        return 32768

helm/proxy/clients/aleph_alpha_client.py DELETED Viewed

@@ -1,99 +0,0 @@
-import json
-import requests
-from typing import Any, Dict, List
-from aleph_alpha_client import Client as AlephAlphaPythonClient
-from helm.common.cache import CacheConfig
-from helm.common.request import wrap_request_time, Request, RequestResult, Sequence, Token
-from helm.proxy.tokenizers.tokenizer import Tokenizer
-from .client import CachingClient, truncate_sequence
-class AlephAlphaClient(CachingClient):
-    COMPLETION_ENDPOINT: str = "complete"
-    def __init__(self, api_key: str, tokenizer: Tokenizer, cache_config: CacheConfig):
-        super().__init__(cache_config=cache_config, tokenizer=tokenizer)
-        self.api_key: str = api_key
-        self._aleph_alpha_client = AlephAlphaPythonClient(token=api_key)
-    def _send_request(self, endpoint: str, raw_request: Dict[str, Any]) -> Dict[str, Any]:
-        response = requests.request(
-            method="POST",
-            url=f"https://api.aleph-alpha.com/{endpoint}",
-            headers={
-                "Content-Type": "application/json",
-                "Accept": "application/json",
-                "Authorization": f"Bearer {self.api_key}",
-            },
-            data=json.dumps(raw_request),
-            # Setting the nice flag prevents intensive benchmarking runs from saturating Aleph Alpha's API queues
-            params=json.dumps({"nice": True}),
-        )
-        result = json.loads(response.text)
-        assert "error" not in result, f"Request failed with error: {result['error']}"
-        return result
-    def make_request(self, request: Request) -> RequestResult:
-        """Make a request following https://docs.aleph-alpha.com/api/complete."""
-        raw_request = {
-            "model": request.model_engine,
-            "prompt": request.prompt,
-            "maximum_tokens": request.max_tokens,
-            "temperature": request.temperature,
-            "top_k": request.top_k_per_token,
-            "top_p": request.top_p,
-            "presence_penalty": request.presence_penalty,
-            "frequency_penalty": request.frequency_penalty,
-            "n": request.num_completions,
-            "stop_sequences": request.stop_sequences,
-            "log_probs": request.top_k_per_token,
-            "echo": request.echo_prompt,
-            "tokens": True,  # Setting to True returns individual tokens of the completion
-        }
-        try:
-            def do_it():
-                result = self._send_request(AlephAlphaClient.COMPLETION_ENDPOINT, raw_request)
-                assert "completions" in result, f"Invalid response: {result}"
-                return result
-            response, cached = self.cache.get(raw_request, wrap_request_time(do_it))
-        except (requests.exceptions.RequestException, AssertionError) as e:
-            error: str = f"AlephAlphaClient error: {e}"
-            return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
-        completions: List[Sequence] = []
-        for completion in response["completions"]:
-            sequence_logprob: float = 0
-            tokens: List[Token] = []
-            # `completion_tokens` is the list of selected tokens.
-            for i, token in enumerate(completion["completion_tokens"]):
-                # Get the top K logprobs for the ith token
-                top_logprobs: Dict[str, float] = completion["log_probs"][i]
-                # Use the selected token value to get the logprob
-                logprob: float = top_logprobs[token]
-                sequence_logprob += logprob
-                tokens.append(
-                    Token(
-                        text=token,
-                        logprob=logprob,
-                        top_logprobs=top_logprobs,
-                    )
-                )
-            sequence: Sequence = Sequence(text=completion["completion"], logprob=sequence_logprob, tokens=tokens)
-            sequence = truncate_sequence(sequence, request)
-            completions.append(sequence)
-        return RequestResult(
-            success=True,
-            cached=cached,
-            request_time=response["request_time"],
-            request_datetime=response["request_datetime"],
-            completions=completions,
-            embedding=[],
-        )

crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl