PyPI - crfm-helm - Versions diffs - 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (546) hide show

{crfm_helm-0.3.0.dist-info → crfm_helm-0.5.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.41.3)
+Generator: bdist_wheel (0.43.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

helm/benchmark/adaptation/adapter_spec.py CHANGED Viewed

@@ -1,6 +1,26 @@
 from dataclasses import dataclass, field
 from typing import List, Optional
+from helm.common.image_generation_parameters import ImageGenerationParameters
+# Adaptation methods
+ADAPT_GENERATION: str = "generation"
+ADAPT_LANGUAGE_MODELING: str = "language_modeling"
+ADAPT_MULTIPLE_CHOICE_JOINT: str = "multiple_choice_joint"
+ADAPT_MULTIPLE_CHOICE_SEPARATE_ORIGINAL: str = "multiple_choice_separate_original"
+ADAPT_MULTIPLE_CHOICE_SEPARATE_CALIBRATED: str = "multiple_choice_separate_calibrated"
+ADAPT_RANKING_BINARY: str = "ranking_binary"
+ADAPT_MULTIPLE_CHOICE_SEPARATE_METHODS: List[str] = [
+    ADAPT_MULTIPLE_CHOICE_SEPARATE_ORIGINAL,
+    ADAPT_MULTIPLE_CHOICE_SEPARATE_CALIBRATED,
+]
+# Multimodal adaptation methods
+ADAPT_GENERATION_MULTIMODAL: str = "generation_multimodal"
+ADAPT_MULTIPLE_CHOICE_JOINT_MULTIMODAL: str = "multiple_choice_joint_multimodal"
 @dataclass(frozen=True)
 class Substitution:
@@ -26,6 +46,9 @@ class AdapterSpec:
     # For example, it is recommended to prefix all prompts with [NLG] for UL2.
     global_prefix: str = ""
+    # Append all prompts with this string.
+    global_suffix: str = ""
     # Prompt starts with instructions
     instructions: str = ""
@@ -68,12 +91,18 @@ class AdapterSpec:
     # set of training instances.  Used to compute error bars.
     num_train_trials: int = 1
+    # Number of trials, where we query the model with the same requests, but different random seeds
+    num_trials: int = 1
     # If true, randomly sample N training examples; if false, select N consecutive training examples
     sample_train: bool = True
     # Decoding parameters (inherited by `Request`)
-    # Model to make the request to (need to fill in)
+    # Model deployment to make the request to (need to fill in)
+    model_deployment: str = ""
+    # Model to make the request to
     model: str = ""
     # Temperature to use
@@ -89,5 +118,11 @@ class AdapterSpec:
     random: Optional[str] = None
     # If true, for instances with multiple correct reference, the gold answer should be considered
-    # to be all of the correct references rather than any of the correct references.
+    # to be all the correct references rather than any of the correct references.
     multi_label: bool = False
+    # Parameters for image generation
+    image_generation_parameters: Optional[ImageGenerationParameters] = None
+    # The splits from which evaluation instances will be drawn (set hash=False to make `AdapterSpec` hashable)
+    eval_splits: Optional[List[str]] = field(default=None, hash=False)

helm/benchmark/adaptation/adapters/adapter.py CHANGED Viewed

@@ -1,15 +1,12 @@
 from abc import ABC, abstractmethod
 from typing import List
-import numpy as np
 from helm.benchmark.adaptation.adapter_spec import AdapterSpec
-from helm.benchmark.adaptation.scenario_state import ScenarioState
-from helm.benchmark.scenarios.scenario import Instance, TRAIN_SPLIT, EVAL_SPLITS
+from helm.benchmark.adaptation.request_state import RequestState
+from helm.benchmark.scenarios.scenario import Instance
 from helm.benchmark.window_services.tokenizer_service import TokenizerService
 from helm.benchmark.window_services.window_service import WindowService
 from helm.benchmark.window_services.window_service_factory import WindowServiceFactory
-from helm.common.hierarchical_logger import hlog
 class Adapter(ABC):
@@ -21,48 +18,13 @@ class Adapter(ABC):
     def __init__(self, adapter_spec: AdapterSpec, tokenizer_service: TokenizerService):
         self.adapter_spec: AdapterSpec = adapter_spec
         self.window_service: WindowService = WindowServiceFactory.get_window_service(
-            adapter_spec.model, tokenizer_service
+            adapter_spec.model_deployment, tokenizer_service
         )
     @abstractmethod
-    def adapt(self, instances: List[Instance], parallelism: int) -> ScenarioState:
+    def adapt(self, instances: List[Instance], parallelism: int) -> List[RequestState]:
         """
         Takes a a list of `Instance`s and returns a `ScenarioState` with the
         list of corresponding `RequestState`s.
         """
         pass
-    def get_run_instances(self, instances: List[Instance]) -> List[Instance]:
-        """
-        Get the instances necessary for this run:
-        Train instances (split=train): keep all (if any) for in-context learning
-        Eval instances (split=valid or test): keep at most `max_eval_instances` specified in `AdapterSpec` by sampling
-        Return the resulting train and eval instances.
-        """
-        all_train_instances: List[Instance] = [instance for instance in instances if instance.split == TRAIN_SPLIT]
-        all_eval_instances: List[Instance] = [instance for instance in instances if instance.split in EVAL_SPLITS]
-        if (
-            self.adapter_spec.max_eval_instances is not None
-            and len(all_eval_instances) > self.adapter_spec.max_eval_instances
-        ):
-            # Pick the first `self.adapter_spec.max_eval_instances`.
-            # The random sampling includes instances monotonically.
-            np.random.seed(0)
-            selected_eval_instances = list(
-                np.random.choice(
-                    all_eval_instances,  # type: ignore
-                    self.adapter_spec.max_eval_instances,
-                    replace=False,
-                )
-            )
-        else:
-            selected_eval_instances = all_eval_instances
-        hlog(
-            f"{len(instances)} instances, "
-            f"{len(all_train_instances)} train instances, "
-            f"{len(selected_eval_instances)}/{len(all_eval_instances)} eval instances"
-        )
-        return all_train_instances + selected_eval_instances

helm/benchmark/adaptation/adapters/adapter_factory.py CHANGED Viewed

@@ -1,31 +1,26 @@
-from typing import List
-from helm.benchmark.adaptation.adapter_spec import AdapterSpec
-from helm.benchmark.window_services.tokenizer_service import TokenizerService
-from .adapter import Adapter
-from .generation_adapter import GenerationAdapter
-from .language_modeling_adapter import LanguageModelingAdapter
-from .multiple_choice_joint_adapter import MultipleChoiceJointAdapter
-from .multiple_choice_separate_adapter import MultipleChoiceSeparateAdapter
-from .multiple_choice_calibrated_adapter import MultipleChoiceCalibratedAdapter
-from .binary_ranking_adapter import BinaryRankingAdapter
-from .multimodal.generation_multimodal_adapter import GenerationMultimodalAdapter
-# Adaptation methods
-ADAPT_GENERATION: str = "generation"
-ADAPT_LANGUAGE_MODELING: str = "language_modeling"
-ADAPT_MULTIPLE_CHOICE_JOINT: str = "multiple_choice_joint"
-ADAPT_MULTIPLE_CHOICE_SEPARATE_ORIGINAL: str = "multiple_choice_separate_original"
-ADAPT_MULTIPLE_CHOICE_SEPARATE_CALIBRATED: str = "multiple_choice_separate_calibrated"
-ADAPT_RANKING_BINARY: str = "ranking_binary"
-ADAPT_MULTIPLE_CHOICE_SEPARATE_METHODS: List[str] = [
-    ADAPT_MULTIPLE_CHOICE_SEPARATE_ORIGINAL,
+from helm.benchmark.adaptation.adapter_spec import (
+    ADAPT_GENERATION,
+    ADAPT_GENERATION_MULTIMODAL,
+    ADAPT_LANGUAGE_MODELING,
+    ADAPT_MULTIPLE_CHOICE_JOINT,
+    ADAPT_MULTIPLE_CHOICE_JOINT_MULTIMODAL,
     ADAPT_MULTIPLE_CHOICE_SEPARATE_CALIBRATED,
-]
-# Multimodal adaptation methods
-ADAPT_GENERATION_MULTIMODAL: str = "generation_multimodal"
+    ADAPT_MULTIPLE_CHOICE_SEPARATE_ORIGINAL,
+    ADAPT_RANKING_BINARY,
+    AdapterSpec,
+)
+from helm.benchmark.adaptation.adapters.adapter import Adapter
+from helm.benchmark.adaptation.adapters.binary_ranking_adapter import BinaryRankingAdapter
+from helm.benchmark.adaptation.adapters.generation_adapter import GenerationAdapter
+from helm.benchmark.adaptation.adapters.language_modeling_adapter import LanguageModelingAdapter
+from helm.benchmark.adaptation.adapters.multimodal.generation_multimodal_adapter import GenerationMultimodalAdapter
+from helm.benchmark.adaptation.adapters.multimodal.multiple_choice_joint_multimodal_adapter import (
+    MultipleChoiceJointMultimodalAdapter,
+)
+from helm.benchmark.adaptation.adapters.multiple_choice_calibrated_adapter import MultipleChoiceCalibratedAdapter
+from helm.benchmark.adaptation.adapters.multiple_choice_joint_adapter import MultipleChoiceJointAdapter
+from helm.benchmark.adaptation.adapters.multiple_choice_separate_adapter import MultipleChoiceSeparateAdapter
+from helm.benchmark.window_services.tokenizer_service import TokenizerService
 class AdapterFactory:
@@ -51,6 +46,8 @@ class AdapterFactory:
             adapter = BinaryRankingAdapter(adapter_spec, tokenizer_service)
         elif method == ADAPT_GENERATION_MULTIMODAL:
             adapter = GenerationMultimodalAdapter(adapter_spec, tokenizer_service)
+        elif method == ADAPT_MULTIPLE_CHOICE_JOINT_MULTIMODAL:
+            adapter = MultipleChoiceJointMultimodalAdapter(adapter_spec, tokenizer_service)
         else:
             raise ValueError(f"Invalid adaptation method: {method}")

helm/benchmark/adaptation/adapters/binary_ranking_adapter.py CHANGED Viewed

@@ -50,6 +50,7 @@ class BinaryRankingAdapter(InContextLearningAdapter):
             )
             request = Request(
                 model=self.adapter_spec.model,
+                model_deployment=self.adapter_spec.model_deployment,
                 prompt=prompt.text,
                 num_completions=self.adapter_spec.num_outputs,
                 temperature=self.adapter_spec.temperature,

helm/benchmark/adaptation/adapters/generation_adapter.py CHANGED Viewed

@@ -39,12 +39,14 @@ class GenerationAdapter(InContextLearningAdapter):
         )
         request = Request(
             model=self.adapter_spec.model,
+            model_deployment=self.adapter_spec.model_deployment,
             prompt=prompt.text,
             num_completions=self.adapter_spec.num_outputs,
             temperature=self.adapter_spec.temperature,
             max_tokens=self.adapter_spec.max_tokens,
             stop_sequences=self.adapter_spec.stop_sequences,
             random=self.adapter_spec.random,
+            image_generation_parameters=self.adapter_spec.image_generation_parameters,
         )
         request_state = RequestState(
             instance=eval_instance,

helm/benchmark/adaptation/adapters/in_context_learning_adapter.py CHANGED Viewed

@@ -7,9 +7,9 @@ from typing import List, Dict, Optional
 from helm.benchmark.adaptation.prompt import Prompt
 from helm.benchmark.adaptation.request_state import RequestState
-from helm.benchmark.adaptation.scenario_state import ScenarioState
 from helm.benchmark.scenarios.scenario import Instance, TRAIN_SPLIT, EVAL_SPLITS, Reference
 from helm.common.general import parallel_map
+from helm.common.request import Request
 from helm.common.hierarchical_logger import hlog, htrack, htrack_block
 from .adapter import Adapter
@@ -30,7 +30,7 @@ class InContextLearningAdapter(Adapter, ABC):
         pass
     @htrack(None)
-    def adapt(self, instances: List[Instance], parallelism: int) -> ScenarioState:
+    def adapt(self, instances: List[Instance], parallelism: int) -> List[RequestState]:
         """
         Takes a list of `Instance`s and builds a list of corresponding `RequestState`s.
         The reason we don't do this per eval instance is that we create a common set of
@@ -64,7 +64,7 @@ class InContextLearningAdapter(Adapter, ABC):
                 )
         hlog(f"{len(all_request_states)} requests")
-        return ScenarioState(self.adapter_spec, all_request_states)
+        return all_request_states
     def _adapt_trial_index(
         self,
@@ -101,7 +101,23 @@ class InContextLearningAdapter(Adapter, ABC):
                             hlog(line)
         # Flatten and return
-        return [request_state for result in results for request_state in result]
+        all_request_states: List[RequestState] = [request_state for result in results for request_state in result]
+        return self._add_trials(all_request_states)
+    def _add_trials(self, request_states: List[RequestState]) -> List[RequestState]:
+        """Expand the request states by adding trials."""
+        if self.adapter_spec.num_trials <= 1:
+            return request_states
+        all_request_states: List[RequestState] = request_states.copy()
+        for i in range(1, self.adapter_spec.num_trials):
+            seed: str = str(i)
+            for request_state in request_states:
+                request: Request = replace(request_state.request, random=seed)
+                all_request_states.append(replace(request_state, request=request))
+        assert len(all_request_states) == len(request_states) * self.adapter_spec.num_trials
+        return all_request_states
     def sample_examples(
         self, all_train_instances: List[Instance], seed: int, sample_train: bool = True
@@ -214,6 +230,7 @@ class InContextLearningAdapter(Adapter, ABC):
         # Prompt
         prompt = Prompt(
             global_prefix=self.adapter_spec.global_prefix,
+            global_suffix=self.adapter_spec.global_suffix,
             instructions_block=instructions_block,
             train_instance_blocks=train_instance_blocks,
             eval_instance_block=eval_instance_block,

helm/benchmark/adaptation/adapters/language_modeling_adapter.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from typing import List, Tuple, Optional
 from helm.benchmark.adaptation.request_state import RequestState
-from helm.benchmark.adaptation.scenario_state import ScenarioState
 from helm.benchmark.scenarios.scenario import Instance, EVAL_SPLITS
 from helm.benchmark.window_services.window_service import EncodeResult
 from helm.common.general import flatten_list, parallel_map
@@ -26,7 +25,7 @@ class LanguageModelingAdapter(Adapter):
     """
     @htrack(None)
-    def adapt(self, instances: List[Instance], parallelism: int) -> ScenarioState:
+    def adapt(self, instances: List[Instance], parallelism: int) -> List[RequestState]:
         """
         Takes a list of `Instance`s and builds a list of corresponding `RequestState`s.
         Only requires eval instances.
@@ -34,13 +33,19 @@ class LanguageModelingAdapter(Adapter):
         # Pick out evaluation instances. This includes both valid and test splits.
         eval_instances: List[Instance] = [instance for instance in instances if instance.split in EVAL_SPLITS]
         hlog(f"{len(eval_instances)} eval instances")
+        # Since at least 2023-01-01, this adapter was using `instances` instead of `eval_instances`
+        # https://github.com/stanford-crfm/helm/commit/ac9892f7449418d32ab55843702db312b58003ed#diff-69871182494f0d9f4bc6aeea76e99c13edf0213e2c123432a63cd2024d66ffcaR39
+        # This assert is intended to identify run specs (if any) that had been producing incorrect results.
+        assert len(eval_instances) == len(instances), (
+            "Non-evaluation instances were passed to LanguageModelingAdapter, but LanguageModelingAdapter "
+            + "expects evaluation instances only. Please open a GitHub issue with your RunSpec."
+        )
         all_request_states: List[RequestState] = flatten_list(
-            parallel_map(self._generate_requests, instances, parallelism)
+            parallel_map(self._generate_requests, eval_instances, parallelism)
         )
         hlog(f"{len(all_request_states)} requests")
-        return ScenarioState(self.adapter_spec, all_request_states)
+        return all_request_states
     def _generate_requests(self, eval_instance: Instance) -> List[RequestState]:
         """
@@ -114,6 +119,7 @@ class LanguageModelingAdapter(Adapter):
         )
         request = Request(
             model=self.adapter_spec.model,
+            model_deployment=self.adapter_spec.model_deployment,
             prompt=prompt_text,
             num_completions=1,
             temperature=0,
@@ -162,6 +168,7 @@ class LanguageModelingAdapter(Adapter):
             request = Request(
                 model=self.adapter_spec.model,
+                model_deployment=self.adapter_spec.model_deployment,
                 prompt=prompt_text,
                 num_completions=1,
                 temperature=0,

helm/benchmark/adaptation/adapters/multimodal/generation_multimodal_adapter.py CHANGED Viewed

@@ -29,6 +29,7 @@ class GenerationMultimodalAdapter(InContextLearningMultimodalAdapter):
         request = Request(
             model=self.adapter_spec.model,
+            model_deployment=self.adapter_spec.model_deployment,
             multimodal_prompt=prompt.multimedia_object,
             num_completions=self.adapter_spec.num_outputs,
             temperature=self.adapter_spec.temperature,

helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py CHANGED Viewed

@@ -27,6 +27,7 @@ class InContextLearningMultimodalAdapter(InContextLearningAdapter, ABC):
         request = Request(
             model=self.adapter_spec.model,
+            model_deployment=self.adapter_spec.model_deployment,
             multimodal_prompt=prompt.multimedia_object,
             num_completions=self.adapter_spec.num_outputs,
             temperature=self.adapter_spec.temperature,

helm/benchmark/adaptation/adapters/multimodal/multiple_choice_joint_multimodal_adapter.py ADDED Viewed

@@ -0,0 +1,104 @@
+from abc import ABC
+from typing import Dict, List, Optional
+from helm.benchmark.adaptation.request_state import RequestState
+from helm.benchmark.scenarios.scenario import Instance
+from helm.common.media_object import MediaObject, MultimediaObject
+from helm.common.request import Request
+from helm.benchmark.adaptation.adapters.multimodal.in_context_learning_multimodal_adapter import (
+    InContextLearningMultimodalAdapter,
+)
+from .multimodal_prompt import MultimodalPrompt
+class MultipleChoiceJointMultimodalAdapter(InContextLearningMultimodalAdapter, ABC):
+    """
+    An `Adapter`, guided by the `AdapterSpec`, takes a `Scenario` and produces
+    a `ScenarioState`. This `Adapter` has additional logic to support in-context
+    learning for multimodal models.
+    """
+    @staticmethod
+    def get_reference_prefix(prefix: str, i: int) -> str:
+        """
+        Example: prefix = "\nA. ", i = 2, return "\nC. "
+        """
+        return prefix.replace("A", chr(ord("A") + i))
+    def generate_requests(
+        self, eval_instance: Instance, train_trial_index: int, training_instances: List[Instance]
+    ) -> List[RequestState]:
+        prompt: MultimodalPrompt = self.construct_prompt(
+            training_instances, eval_instance, include_output=False, reference_index=None
+        )
+        output_mapping: Dict[str, str] = dict(
+            (self.get_reference_prefix("A", reference_index), reference.output.text)
+            for reference_index, reference in enumerate(eval_instance.references)
+        )
+        request = Request(
+            model=self.adapter_spec.model,
+            model_deployment=self.adapter_spec.model_deployment,
+            multimodal_prompt=prompt.multimedia_object,
+            num_completions=self.adapter_spec.num_outputs,
+            temperature=self.adapter_spec.temperature,
+            max_tokens=self.adapter_spec.max_tokens,
+            stop_sequences=[],
+            random=self.adapter_spec.random,
+        )
+        request_state = RequestState(
+            instance=eval_instance,
+            reference_index=None,
+            request_mode=None,
+            train_trial_index=train_trial_index,
+            output_mapping=output_mapping,
+            request=request,
+            result=None,
+            num_train_instances=prompt.num_train_instances,
+            prompt_truncated=False,
+        )
+        return [request_state]
+    def construct_example_multimodal_prompt(
+        self, instance: Instance, include_output: bool, reference_index: Optional[int]
+    ) -> MultimediaObject:
+        """
+        Returns a single example of the prompt. `include_output` controls whether the gold output is included.
+        """
+        # Input
+        assert instance.input.multimedia_content is not None
+        result: MultimediaObject = instance.input.multimedia_content.add_textual_prefix(self.adapter_spec.input_prefix)
+        result = result.add_textual_suffix(self.adapter_spec.input_suffix)
+        # Include the references
+        delimiter: str = ", "
+        no_correct_references: str = "n/a"
+        output: str = no_correct_references
+        for reference_index, reference in enumerate(instance.references):
+            prefix = self.get_reference_prefix(self.adapter_spec.reference_prefix, reference_index)
+            if reference.output.multimedia_content is not None:
+                reference_output_content: MultimediaObject = reference.output.multimedia_content
+                reference_output_content = reference_output_content.add_textual_prefix(prefix)
+                reference_output_content = reference_output_content.add_textual_suffix(
+                    self.adapter_spec.reference_suffix
+                )
+                result = result.combine(reference_output_content)
+            else:
+                result = result.add_textual_suffix(prefix + reference.output.text + self.adapter_spec.reference_suffix)
+            if reference.is_correct:
+                if output == no_correct_references:
+                    output = self.get_reference_prefix("A", reference_index)
+                elif self.adapter_spec.multi_label:
+                    output += delimiter
+                    output += self.get_reference_prefix("A", reference_index)
+        if include_output:
+            output_content: MultimediaObject = MultimediaObject([MediaObject(text=output, content_type="text/plain")])
+            output_content = output_content.add_textual_prefix(self.adapter_spec.output_prefix)
+            output_content = output_content.add_textual_suffix(self.adapter_spec.output_suffix)
+            result = result.combine(output_content)
+        else:
+            result = result.add_textual_suffix(self.adapter_spec.output_prefix.rstrip())
+        return result

helm/benchmark/adaptation/adapters/multimodal/test_in_context_learning_multimodal_adapter.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import shutil
 import tempfile
 import unittest
+from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
 from helm.common.media_object import MediaObject, MultimediaObject
 from helm.benchmark.scenarios.scenario import Instance, Reference, Input, Output, TEST_SPLIT, TRAIN_SPLIT, CORRECT_TAG
@@ -14,7 +15,7 @@ from .multimodal_prompt import MultimodalPrompt
 class TestInContextLearningMultimodalAdapter(unittest.TestCase):
     def setup_method(self, _):
         self._path: str = tempfile.mkdtemp()
-        self._tokenizer_service = get_tokenizer_service(self._path)
+        self._tokenizer_service = get_tokenizer_service(self._path, BlackHoleCacheBackendConfig())
     def teardown_method(self, _):
         shutil.rmtree(self._path)
@@ -22,6 +23,7 @@ class TestInContextLearningMultimodalAdapter(unittest.TestCase):
     def test_construct_prompt(self):
         adapter_spec: AdapterSpec = AdapterSpec(
             model="simple/model1",
+            model_deployment="simple/model1",
             method=ADAPT_GENERATION_MULTIMODAL,
             global_prefix="[START]",
             instructions="Please answer the following question about the images.",
@@ -91,6 +93,7 @@ class TestInContextLearningMultimodalAdapter(unittest.TestCase):
     def test_construct_prompt_multi_label(self):
         adapter_spec: AdapterSpec = AdapterSpec(
             model="simple/model1",
+            model_deployment="simple/model1",
             method=ADAPT_GENERATION_MULTIMODAL,
             global_prefix="[START]",
             instructions="Please answer the following question about the images.",
@@ -171,6 +174,7 @@ class TestInContextLearningMultimodalAdapter(unittest.TestCase):
         """
         adapter_spec: AdapterSpec = AdapterSpec(
             model="simple/model1",
+            model_deployment="simple/model1",
             method=ADAPT_GENERATION_MULTIMODAL,
             input_prefix="User: ",
             input_suffix="<end_of_utterance>",

helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py CHANGED Viewed

@@ -55,6 +55,7 @@ class MultipleChoiceJointAdapter(InContextLearningAdapter):
         )
         request = Request(
             model=self.adapter_spec.model,
+            model_deployment=self.adapter_spec.model_deployment,
             prompt=prompt.text,
             num_completions=1,
             top_k_per_token=self.adapter_spec.num_outputs,

helm/benchmark/adaptation/adapters/multiple_choice_separate_adapter.py CHANGED Viewed

@@ -41,6 +41,7 @@ class MultipleChoiceSeparateAdapter(InContextLearningAdapter):
     ) -> RequestState:
         request = Request(
             model=self.adapter_spec.model,
+            model_deployment=self.adapter_spec.model_deployment,
             prompt=prompt.text,
             num_completions=1,
             temperature=0,

helm/benchmark/adaptation/adapters/test_adapter.py CHANGED Viewed

@@ -2,6 +2,7 @@ import shutil
 import tempfile
 from helm.common.authentication import Authentication
+from helm.common.cache_backend_config import BlackHoleCacheBackendConfig
 from helm.proxy.services.server_service import ServerService
 from helm.benchmark.window_services.tokenizer_service import TokenizerService
@@ -13,7 +14,7 @@ class TestAdapter:
     def setup_method(self):
         self.path: str = tempfile.mkdtemp()
-        service = ServerService(base_path=self.path, root_mode=True)
+        service = ServerService(base_path=self.path, root_mode=True, cache_backend_config=BlackHoleCacheBackendConfig())
         self.tokenizer_service = TokenizerService(service, Authentication("test"))
     def teardown_method(self, _):

crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl