crfm-helm 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/METADATA +10 -8
  2. {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/RECORD +50 -37
  3. {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/WHEEL +1 -1
  4. {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/entry_points.txt +1 -0
  5. helm/benchmark/__init__.py +2 -0
  6. helm/benchmark/adaptation/adapter_spec.py +3 -0
  7. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -7
  8. helm/benchmark/contamination/__init__.py +0 -0
  9. helm/benchmark/metrics/classification_metrics.py +28 -23
  10. helm/benchmark/metrics/test_classification_metrics.py +44 -9
  11. helm/benchmark/presentation/create_plots.py +617 -0
  12. helm/benchmark/presentation/summarize.py +4 -2
  13. helm/benchmark/presentation/test_create_plots.py +32 -0
  14. helm/benchmark/run.py +23 -1
  15. helm/benchmark/run_expander.py +161 -47
  16. helm/benchmark/run_specs.py +84 -10
  17. helm/benchmark/runner.py +31 -3
  18. helm/benchmark/scenarios/copyright_scenario.py +1 -1
  19. helm/benchmark/scenarios/imdb_listdir.json +50014 -0
  20. helm/benchmark/scenarios/lex_glue_scenario.py +58 -17
  21. helm/benchmark/scenarios/lextreme_scenario.py +37 -25
  22. helm/benchmark/scenarios/opinions_qa_scenario.py +194 -0
  23. helm/benchmark/scenarios/scenario.py +5 -0
  24. helm/benchmark/scenarios/the_pile_scenario.py +1 -1
  25. helm/benchmark/static/benchmarking.css +14 -0
  26. helm/benchmark/static/benchmarking.js +43 -0
  27. helm/benchmark/static/index.html +2 -0
  28. helm/benchmark/static/json-urls.js +4 -0
  29. helm/benchmark/static/plot-captions.js +16 -0
  30. helm/benchmark/static/schema.yaml +66 -8
  31. helm/benchmark/window_services/cohere_window_service.py +20 -0
  32. helm/benchmark/window_services/flan_t5_window_service.py +29 -0
  33. helm/benchmark/window_services/huggingface_window_service.py +39 -0
  34. helm/benchmark/window_services/test_flan_t5_window_service.py +12 -0
  35. helm/benchmark/window_services/wider_ai21_window_service.py +13 -0
  36. helm/benchmark/window_services/window_service_factory.py +27 -6
  37. helm/common/general.py +12 -5
  38. helm/proxy/clients/aleph_alpha_client.py +47 -28
  39. helm/proxy/clients/auto_client.py +28 -24
  40. helm/proxy/clients/huggingface_client.py +30 -17
  41. helm/proxy/clients/huggingface_model_registry.py +111 -0
  42. helm/proxy/clients/huggingface_tokenizer.py +23 -7
  43. helm/proxy/clients/openai_client.py +60 -2
  44. helm/proxy/clients/test_huggingface_model_registry.py +57 -0
  45. helm/proxy/clients/together_client.py +17 -2
  46. helm/proxy/clients/yalm_tokenizer/voc_100b.sp +0 -0
  47. helm/proxy/clients/yalm_tokenizer/yalm_tokenizer.py +8 -2
  48. helm/proxy/models.py +82 -2
  49. {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/LICENSE +0 -0
  50. {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/top_level.txt +0 -0
@@ -30,6 +30,7 @@ from .microsoft_client import MicrosoftClient
30
30
  from .perspective_api_client import PerspectiveAPIClient
31
31
  from .yalm_tokenizer_client import YaLMTokenizerClient
32
32
  from .simple_client import SimpleClient
33
+ from helm.proxy.clients.huggingface_model_registry import get_huggingface_model_config
33
34
 
34
35
 
35
36
  class AutoClient(Client):
@@ -54,15 +55,17 @@ class AutoClient(Client):
54
55
  # TODO: Allow setting CacheConfig.follower_cache_path from a command line flag.
55
56
  return SqliteCacheConfig(client_cache_path)
56
57
 
57
- def get_client(self, request: Request) -> Client:
58
- """Return a client based on `organization`, creating it if necessary."""
59
- organization: str = request.model_organization
60
- client: Optional[Client] = self.clients.get(organization)
58
+ def _get_client(self, model: str) -> Client:
59
+ """Return a client based on the model, creating it if necessary."""
60
+ client: Optional[Client] = self.clients.get(model)
61
61
 
62
62
  if client is None:
63
+ organization: str = model.split("/")[0]
63
64
  cache_config: CacheConfig = self._build_cache_config(organization)
64
65
 
65
- if organization == "openai":
66
+ if get_huggingface_model_config(model):
67
+ client = HuggingFaceClient(cache_config=cache_config)
68
+ elif organization == "openai":
66
69
  # TODO: add ChatGPT to the OpenAIClient when it's supported.
67
70
  # We're using a separate client for now since we're using an unofficial Python library.
68
71
  # See https://github.com/acheong08/ChatGPT/wiki/Setup on how to get a valid session token.
@@ -72,13 +75,14 @@ class AutoClient(Client):
72
75
  # TODO: use `cache_config` above. Since this feature is still experimental,
73
76
  # save queries and responses in a separate collection.
74
77
  cache_config=self._build_cache_config("ChatGPT"),
75
- tokenizer_client=self.get_tokenizer_client("huggingface"),
78
+ tokenizer_client=self._get_tokenizer_client("huggingface"),
76
79
  )
77
80
 
78
81
  org_id = self.credentials.get("openaiOrgId", None)
79
82
  client = OpenAIClient(
80
83
  api_key=self.credentials["openaiApiKey"],
81
84
  cache_config=cache_config,
85
+ tokenizer_client=self._get_tokenizer_client("huggingface"),
82
86
  chat_gpt_client=chat_gpt_client,
83
87
  org_id=org_id,
84
88
  )
@@ -113,13 +117,13 @@ class AutoClient(Client):
113
117
  elif organization == "simple":
114
118
  client = SimpleClient(cache_config=cache_config)
115
119
  else:
116
- raise ValueError(f"Unknown organization: {organization}")
117
- self.clients[organization] = client
120
+ raise ValueError(f"Could not find client for model: {model}")
121
+ self.clients[model] = client
118
122
  return client
119
123
 
120
124
  def make_request(self, request: Request) -> RequestResult:
121
125
  """
122
- Dispatch based on the organization in the name of the model (e.g., openai/davinci).
126
+ Dispatch based on the the name of the model (e.g., openai/davinci).
123
127
  Retries if request fails.
124
128
  """
125
129
 
@@ -128,28 +132,30 @@ class AutoClient(Client):
128
132
  def make_request_with_retry(client: Client, request: Request) -> RequestResult:
129
133
  return client.make_request(request)
130
134
 
131
- organization: str = request.model_organization
132
- client: Client = self.get_client(request)
135
+ client: Client = self._get_client(request.model)
133
136
 
134
137
  try:
135
138
  return make_request_with_retry(client=client, request=request)
136
139
  except RetryError as e:
137
140
  last_attempt: Attempt = e.last_attempt
138
141
  retry_error: str = (
139
- f"Failed to make request to {organization} after retrying {last_attempt.attempt_number} times"
142
+ f"Failed to make request to {request.model} after retrying {last_attempt.attempt_number} times"
140
143
  )
141
144
  hlog(retry_error)
142
145
 
143
146
  # Notify our user that we failed to make the request even after retrying.
144
147
  return replace(last_attempt.value, error=f"{retry_error}. Error: {last_attempt.value.error}")
145
148
 
146
- def get_tokenizer_client(self, organization: str) -> Client:
147
- """Return a client based on `organization`, creating it if necessary."""
148
- client: Optional[Client] = self.tokenizer_clients.get(organization)
149
+ def _get_tokenizer_client(self, tokenizer: str) -> Client:
150
+ """Return a client based on the tokenizer, creating it if necessary."""
151
+ organization: str = tokenizer.split("/")[0]
152
+ client: Optional[Client] = self.tokenizer_clients.get(tokenizer)
149
153
 
150
154
  if client is None:
151
155
  cache_config: CacheConfig = self._build_cache_config(organization)
152
- if organization in [
156
+ if get_huggingface_model_config(tokenizer):
157
+ client = HuggingFaceClient(cache_config=cache_config)
158
+ elif organization in [
153
159
  "anthropic",
154
160
  "bigscience",
155
161
  "bigcode",
@@ -175,19 +181,18 @@ class AutoClient(Client):
175
181
  elif organization == "simple":
176
182
  client = SimpleClient(cache_config=cache_config)
177
183
  else:
178
- raise ValueError(f"Unknown organization: {organization}")
179
- self.tokenizer_clients[organization] = client
184
+ raise ValueError(f"Could not find tokenizer client for model: {tokenizer}")
185
+ self.tokenizer_clients[tokenizer] = client
180
186
  return client
181
187
 
182
188
  def tokenize(self, request: TokenizationRequest) -> TokenizationRequestResult:
183
- """Tokenizes based on the organization in the name of the tokenizer (e.g., huggingface/gpt2)."""
189
+ """Tokenizes based on the name of the tokenizer (e.g., huggingface/gpt2)."""
184
190
 
185
191
  @retry_request
186
192
  def tokenize_with_retry(client: Client, request: TokenizationRequest) -> TokenizationRequestResult:
187
193
  return client.tokenize(request)
188
194
 
189
- organization: str = request.tokenizer_organization
190
- client: Client = self.get_tokenizer_client(organization)
195
+ client: Client = self._get_tokenizer_client(request.tokenizer)
191
196
 
192
197
  try:
193
198
  return tokenize_with_retry(client=client, request=request)
@@ -198,14 +203,13 @@ class AutoClient(Client):
198
203
  return replace(last_attempt.value, error=f"{retry_error}. Error: {last_attempt.value.error}")
199
204
 
200
205
  def decode(self, request: DecodeRequest) -> DecodeRequestResult:
201
- """Decodes based on the organization in the name of the tokenizer (e.g., huggingface/gpt2)."""
206
+ """Decodes based on the the name of the tokenizer (e.g., huggingface/gpt2)."""
202
207
 
203
208
  @retry_request
204
209
  def decode_with_retry(client: Client, request: DecodeRequest) -> DecodeRequestResult:
205
210
  return client.decode(request)
206
211
 
207
- organization: str = request.tokenizer_organization
208
- client: Client = self.get_tokenizer_client(organization)
212
+ client: Client = self._get_tokenizer_client(request.tokenizer)
209
213
 
210
214
  try:
211
215
  return decode_with_retry(client=client, request=request)
@@ -16,20 +16,25 @@ from helm.common.tokenization_request import (
16
16
  )
17
17
  from .client import Client, wrap_request_time, truncate_sequence
18
18
  from .huggingface_tokenizer import HuggingFaceTokenizers
19
+ from helm.proxy.clients.huggingface_model_registry import HuggingFaceModelConfig, get_huggingface_model_config
19
20
 
20
21
 
21
22
  class HuggingFaceServer:
22
- def __init__(self, model_name: str):
23
+ def __init__(self, model_config: HuggingFaceModelConfig):
23
24
  if torch.cuda.is_available():
24
25
  hlog("CUDA is available, initializing with a GPU...")
25
26
  self.device: str = "cuda:0"
26
27
  else:
27
28
  self.device = "cpu"
28
-
29
- with htrack_block("Loading model"):
30
- self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).to(self.device)
31
- with htrack_block("Loading tokenizer"):
32
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
29
+ model_kwargs = {}
30
+ if model_config.revision:
31
+ model_kwargs["revision"] = model_config.revision
32
+ with htrack_block(f"Loading Hugging Face model for config {model_config}"):
33
+ self.model = AutoModelForCausalLM.from_pretrained(
34
+ model_config.model_id, trust_remote_code=True, **model_kwargs
35
+ ).to(self.device)
36
+ with htrack_block(f"Loading Hugging Face tokenizer model for config {model_config}"):
37
+ self.tokenizer = AutoTokenizer.from_pretrained(model_config.model_id, **model_kwargs)
33
38
 
34
39
  def serve_request(self, raw_request: Dict[str, Any]):
35
40
  encoded_input = self.tokenizer(raw_request["prompt"], return_tensors="pt").to(self.device)
@@ -113,17 +118,25 @@ class HuggingFaceClient(Client):
113
118
  self.cache = Cache(cache_config)
114
119
  self.model_server_instances: Dict[str, HuggingFaceServer] = {}
115
120
 
116
- def get_model_server_instance(self, model_engine) -> HuggingFaceServer:
117
- if model_engine not in self.model_server_instances:
118
- if model_engine == "gpt-j-6b":
119
- self.model_server_instances[model_engine] = HuggingFaceServer("EleutherAI/gpt-j-6B")
120
- elif model_engine == "gpt2":
121
- self.model_server_instances[model_engine] = HuggingFaceServer("gpt2")
122
- elif model_engine == "santacoder":
123
- self.model_server_instances[model_engine] = HuggingFaceServer("bigcode/santacoder")
121
+ def get_model_server_instance(self, model) -> HuggingFaceServer:
122
+ if model not in self.model_server_instances:
123
+ model_config = get_huggingface_model_config(model)
124
+ if model_config:
125
+ self.model_server_instances[model] = HuggingFaceServer(model_config)
126
+ elif model == "EleutherAI/gpt-j-6B":
127
+ self.model_server_instances[model] = HuggingFaceServer(
128
+ HuggingFaceModelConfig.from_string("EleutherAI/gpt-j-6B")
129
+ )
130
+ elif model == "huggingface/gpt2":
131
+ self.model_server_instances[model] = HuggingFaceServer(HuggingFaceModelConfig.from_string("gpt2"))
132
+ elif model == "bigcode/santacoder":
133
+ self.model_server_instances[model] = HuggingFaceServer(
134
+ HuggingFaceModelConfig.from_string("bigcode/santacoder")
135
+ )
124
136
  else:
125
- raise Exception("Unknown model!")
126
- return self.model_server_instances[model_engine]
137
+ raise Exception(f"Unknown HuggingFace model: {model}")
138
+
139
+ return self.model_server_instances[model]
127
140
 
128
141
  def make_request(self, request: Request) -> RequestResult:
129
142
  # Embedding not supported for this model
@@ -148,7 +161,7 @@ class HuggingFaceClient(Client):
148
161
 
149
162
  # Get cached model server instance if possible (to save on model and tokenizer
150
163
  # loading times).
151
- model_server_instance: HuggingFaceServer = self.get_model_server_instance(request.model_engine)
164
+ model_server_instance: HuggingFaceServer = self.get_model_server_instance(request.model)
152
165
 
153
166
  try:
154
167
 
@@ -0,0 +1,111 @@
1
+ from typing import Dict, Optional
2
+ from dataclasses import dataclass
3
+ import re
4
+ from helm.common.hierarchical_logger import hlog
5
+ from helm.proxy.models import (
6
+ Model,
7
+ ALL_MODELS,
8
+ MODEL_NAME_TO_MODEL,
9
+ TEXT_MODEL_TAG,
10
+ FULL_FUNCTIONALITY_TEXT_MODEL_TAG,
11
+ )
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class HuggingFaceModelConfig:
16
+ namespace: Optional[str]
17
+ """Name of the group or user that owns the model. e.g. 'stanford-crfm'
18
+
19
+ May be None if the model (e.g. gpt2) does not have a namespace."""
20
+
21
+ model_name: str
22
+ """Name of the model. e.g. 'BioMedLM'
23
+
24
+ Does not include the namespace."""
25
+
26
+ revision: Optional[str]
27
+ """Revision of the model to use e.g. 'main'.
28
+
29
+ If None, use the default revision."""
30
+
31
+ @property
32
+ def model_id(self) -> str:
33
+ """Return the model ID.
34
+
35
+ Examples:
36
+ - 'gpt2'
37
+ - 'stanford-crfm/BioMedLM'"""
38
+ if self.namespace:
39
+ return f"{self.namespace}/{self.model_name}"
40
+ return self.model_name
41
+
42
+ def __str__(self) -> str:
43
+ """Return the full model name used by HELM in the format "[namespace/]model_name[@revision]".
44
+
45
+ Examples:
46
+ - 'gpt2'
47
+ - 'stanford-crfm/BioMedLM'
48
+ - 'stanford-crfm/BioMedLM@main'"""
49
+ result = self.model_name
50
+ if self.namespace:
51
+ result = f"{self.namespace}/{result}"
52
+ if self.revision:
53
+ result = f"{result}@{self.revision}"
54
+ return result
55
+
56
+ @staticmethod
57
+ def from_string(raw: str) -> "HuggingFaceModelConfig":
58
+ """Parses a string in the format "[namespace/]model_name[@revision]" to a HuggingFaceModelConfig.
59
+
60
+ Examples:
61
+ - 'gpt2'
62
+ - 'stanford-crfm/BioMedLM'
63
+ - 'stanford-crfm/BioMedLM@main'"""
64
+ pattern = r"((?P<namespace>[^/@]+)/)?(?P<model_name>[^/@]+)(@(?P<revision>[^/@]+))?"
65
+ match = re.fullmatch(pattern, raw)
66
+ if not match:
67
+ raise ValueError(f"Could not parse model name: '{raw}'; Expected format: [namespace/]model_name[@revision]")
68
+ model_name = match.group("model_name")
69
+ assert model_name
70
+ return HuggingFaceModelConfig(
71
+ namespace=match.group("namespace"), model_name=model_name, revision=match.group("revision")
72
+ )
73
+
74
+
75
+ _huggingface_model_registry: Dict[str, HuggingFaceModelConfig] = {}
76
+
77
+
78
+ def register_huggingface_model_config(model_name: str) -> HuggingFaceModelConfig:
79
+ """Register a AutoModelForCausalLM model from Hugging Face Model Hub for later use.
80
+
81
+ model_name format: namespace/model_name[@revision]"""
82
+ config = HuggingFaceModelConfig.from_string(model_name)
83
+ if config.model_id in _huggingface_model_registry:
84
+ raise ValueError(f"A Hugging Face model is already registered for model_id {model_name}")
85
+ _huggingface_model_registry[model_name] = config
86
+
87
+ # HELM model names require a namespace
88
+ if not config.namespace:
89
+ raise Exception("Registration of Hugging Face models without a namespace is not supported")
90
+ if model_name in MODEL_NAME_TO_MODEL:
91
+ raise ValueError(f"A HELM model is already registered for model name: {model_name}")
92
+ description = f"HuggingFace model {config.model_id}"
93
+ if config.revision:
94
+ description += f" at revision {config.revision}"
95
+ model = Model(
96
+ group=config.namespace,
97
+ name=model_name,
98
+ display_name=model_name,
99
+ creator_organization=config.namespace,
100
+ description=description,
101
+ tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG],
102
+ )
103
+ MODEL_NAME_TO_MODEL[model_name] = model
104
+ ALL_MODELS.append(model)
105
+ hlog(f"Registered Hugging Face model: {model} config: {config}")
106
+ return config
107
+
108
+
109
+ def get_huggingface_model_config(model_name: str) -> Optional[HuggingFaceModelConfig]:
110
+ """Returns a HuggingFaceModelConfig for the model_id."""
111
+ return _huggingface_model_registry.get(model_name)
@@ -1,10 +1,12 @@
1
1
  import os
2
- from typing import Any, Dict
2
+ from typing import Any, Dict, Optional
3
3
 
4
4
  from transformers import AutoTokenizer
5
5
 
6
6
  from helm.common.hierarchical_logger import htrack_block, hlog
7
7
 
8
+ from helm.proxy.clients.huggingface_model_registry import get_huggingface_model_config
9
+
8
10
 
9
11
  class HuggingFaceTokenizers:
10
12
 
@@ -17,8 +19,11 @@ class HuggingFaceTokenizers:
17
19
  Returns the tokenizer.
18
20
  """
19
21
 
20
- def load_tokenizer(hf_tokenizer_name: str):
22
+ def load_tokenizer(hf_tokenizer_name: str, revision: Optional[str] = None):
21
23
  """Loads tokenizer using files from disk if they exist. Otherwise, downloads from HuggingFace."""
24
+ tokenizer_kwargs = {}
25
+ if revision is not None:
26
+ tokenizer_kwargs["revision"] = revision
22
27
  try:
23
28
  # From the Hugging Face documentation, "local_files_only(defaults to False) —
24
29
  # Whether or not to only look at local files".
@@ -29,10 +34,14 @@ class HuggingFaceTokenizers:
29
34
  # From https://huggingface.co/course/chapter6/3, "slow tokenizers are those written in Python inside
30
35
  # the Hugging Face Transformers library, while the fast versions are the ones provided by Hugging Face
31
36
  # Tokenizers, which are written in Rust." So, use the "fast" version of the tokenizers if available.
32
- return AutoTokenizer.from_pretrained(hf_tokenizer_name, local_files_only=True, use_fast=True)
37
+ return AutoTokenizer.from_pretrained(
38
+ hf_tokenizer_name, local_files_only=True, use_fast=True, **tokenizer_kwargs
39
+ )
33
40
  except OSError:
34
41
  hlog(f"Local files do not exist for HuggingFace tokenizer: {hf_tokenizer_name}. Downloading...")
35
- return AutoTokenizer.from_pretrained(hf_tokenizer_name, local_files_only=False, use_fast=True)
42
+ return AutoTokenizer.from_pretrained(
43
+ hf_tokenizer_name, local_files_only=False, use_fast=True, **tokenizer_kwargs
44
+ )
36
45
 
37
46
  if tokenizer_name not in HuggingFaceTokenizers.tokenizers:
38
47
  with htrack_block(f"Loading {tokenizer_name} with Hugging Face Transformers"):
@@ -41,7 +50,12 @@ class HuggingFaceTokenizers:
41
50
 
42
51
  # Weights are cached at ~/.cache/huggingface/transformers.
43
52
  hf_tokenizer_name: str
44
- if tokenizer_name == "huggingface/gpt2":
53
+ revision: Optional[str] = None
54
+ model_config = get_huggingface_model_config(tokenizer_name)
55
+ if model_config:
56
+ hf_tokenizer_name = model_config.model_id
57
+ revision = model_config.revision
58
+ elif tokenizer_name == "huggingface/gpt2":
45
59
  hf_tokenizer_name = "gpt2"
46
60
  elif tokenizer_name == "EleutherAI/gpt-j-6B":
47
61
  # Not a typo: Named "gpt-j-6B" instead of "gpt-j-6b" in Hugging Face
@@ -58,12 +72,14 @@ class HuggingFaceTokenizers:
58
72
  hf_tokenizer_name = "t5-11b"
59
73
  elif tokenizer_name == "google/ul2":
60
74
  hf_tokenizer_name = "google/ul2"
75
+ elif tokenizer_name == "google/flan-t5-xxl":
76
+ hf_tokenizer_name = "google/flan-t5-xxl"
61
77
  elif tokenizer_name == "bigcode/santacoder":
62
78
  hf_tokenizer_name = "bigcode/santacoder"
63
79
  else:
64
- raise ValueError(f"Unsupported tokenizer: {tokenizer_name}")
80
+ raise ValueError(f"Unsupported HuggingFace tokenizer: {tokenizer_name}")
65
81
 
66
82
  # Keep the tokenizer in memory, so we don't recreate it for future requests
67
- HuggingFaceTokenizers.tokenizers[tokenizer_name] = load_tokenizer(hf_tokenizer_name)
83
+ HuggingFaceTokenizers.tokenizers[tokenizer_name] = load_tokenizer(hf_tokenizer_name, revision)
68
84
 
69
85
  return HuggingFaceTokenizers.tokenizers[tokenizer_name]
@@ -1,5 +1,5 @@
1
1
  from dataclasses import replace
2
- from typing import Any, Dict, List, Optional
2
+ from typing import Any, Dict, List, Optional, cast
3
3
 
4
4
  import openai
5
5
 
@@ -24,6 +24,7 @@ class OpenAIClient(Client):
24
24
  self,
25
25
  api_key: str,
26
26
  cache_config: CacheConfig,
27
+ tokenizer_client: Client,
27
28
  chat_gpt_client: Optional[ChatGPTClient] = None,
28
29
  org_id: Optional[str] = None,
29
30
  ):
@@ -31,8 +32,12 @@ class OpenAIClient(Client):
31
32
  self.api_key: str = api_key
32
33
  self.api_base: str = "https://api.openai.com/v1"
33
34
  self.cache = Cache(cache_config)
35
+ self.tokenizer_client: Client = tokenizer_client
34
36
  self.chat_gpt_client: Optional[ChatGPTClient] = chat_gpt_client
35
37
 
38
+ def _is_chat_model_engine(self, model_engine: str):
39
+ return model_engine.startswith("gpt-3.5")
40
+
36
41
  def make_request(self, request: Request) -> RequestResult:
37
42
  if request.model_engine == "chat-gpt":
38
43
  assert self.chat_gpt_client is not None
@@ -44,6 +49,28 @@ class OpenAIClient(Client):
44
49
  "input": request.prompt,
45
50
  "engine": request.model_engine,
46
51
  }
52
+ elif self._is_chat_model_engine(request.model_engine):
53
+ raw_request = {
54
+ "model": request.model_engine,
55
+ # For now, put the whole prompt in a single user message, and expect the response
56
+ # to be returned in a single assistant message.
57
+ # TODO: Support ChatML for creating multiple messages with different roles.
58
+ # See: https://github.com/openai/openai-python/blob/main/chatml.md
59
+ "messages": [{"role": "user", "content": request.prompt}],
60
+ "temperature": request.temperature,
61
+ "top_p": request.top_p,
62
+ "n": request.num_completions,
63
+ # Note: Setting stop to ["\n"] results in an error
64
+ # See: https://community.openai.com/t/stop-n-in-gpt-3-5-turbo-leads-to-500-error/87815/15
65
+ # TODO: Handle this in the adapter.
66
+ "stop": request.stop_sequences or [], # API doesn't like empty list
67
+ # Note: Chat models may require adding an extra token to max_tokens
68
+ # for the internal special role token.
69
+ # TODO: Handle this in the adapter.
70
+ "max_tokens": request.max_tokens,
71
+ "presence_penalty": request.presence_penalty,
72
+ "frequency_penalty": request.frequency_penalty,
73
+ }
47
74
  else:
48
75
  raw_request = {
49
76
  "engine": request.model_engine,
@@ -74,6 +101,14 @@ class OpenAIClient(Client):
74
101
  openai.api_base = self.api_base
75
102
  return openai.Embedding.create(**raw_request)
76
103
 
104
+ elif self._is_chat_model_engine(request.model_engine):
105
+
106
+ def do_it():
107
+ openai.organization = self.org_id
108
+ openai.api_key = self.api_key
109
+ openai.api_base = self.api_base
110
+ return openai.ChatCompletion.create(**raw_request)
111
+
77
112
  else:
78
113
 
79
114
  def do_it():
@@ -95,14 +130,37 @@ class OpenAIClient(Client):
95
130
  # needs to be populated, and `embedding` should be an empty list and vice-versa.
96
131
  embedding: List[float] = []
97
132
  completions: List[Sequence] = []
133
+ tokens: List[Token]
98
134
  if request.embedding:
99
135
  # If the user is requesting an embedding instead of completion
100
136
  # then completions would be left as an empty list. The embedding needs to be set.
101
137
  embedding = response["data"][0]["embedding"]
138
+ elif self._is_chat_model_engine(request.model_engine):
139
+ for raw_completion in response["choices"]:
140
+ # The ChatGPT API doesn't support echo. If `echo_prompt` is true, combine the prompt and completion.
141
+ raw_completion_content = raw_completion["message"]["content"]
142
+ text: str = request.prompt + raw_completion_content if request.echo_prompt else raw_completion_content
143
+ # The ChatGPT API doesn't return us tokens or logprobs, so we tokenize ourselves.
144
+ tokenization_result: TokenizationRequestResult = self.tokenizer_client.tokenize(
145
+ # We're assuming ChatGPT uses the GPT-2 tokenizer.
146
+ TokenizationRequest(text, tokenizer="huggingface/gpt2")
147
+ )
148
+ # Log probs are not currently not supported by the ChatGPT, so set to 0 for now.
149
+ tokens = [
150
+ Token(text=cast(str, raw_token), logprob=0, top_logprobs={})
151
+ for raw_token in tokenization_result.raw_tokens
152
+ ]
153
+ completion = Sequence(
154
+ text=text,
155
+ logprob=0, # ChatGPT does not provide logprobs
156
+ tokens=tokens,
157
+ finish_reason={"reason": raw_completion["finish_reason"]},
158
+ )
159
+ completions.append(truncate_sequence(completion, request)) # Truncate the text by stop sequences
102
160
  else:
103
161
  for raw_completion in response["choices"]:
104
162
  sequence_logprob = 0
105
- tokens: List[Token] = []
163
+ tokens = []
106
164
 
107
165
  raw_data = raw_completion["logprobs"]
108
166
  for text, logprob, top_logprobs in zip(
@@ -0,0 +1,57 @@
1
+ import pytest
2
+ import unittest
3
+ from typing import List, Tuple
4
+
5
+ from helm.benchmark.run_expander import ModelRunExpander
6
+ from helm.proxy.clients.huggingface_model_registry import (
7
+ HuggingFaceModelConfig,
8
+ register_huggingface_model_config,
9
+ get_huggingface_model_config,
10
+ )
11
+ from helm.proxy.models import get_all_models, get_all_text_models
12
+
13
+
14
+ @pytest.mark.parametrize("model_name", ["EleutherAI/pythia-70m"])
15
+ def test_hf_model_register(model_name):
16
+ register_huggingface_model_config(model_name)
17
+ assert model_name in ModelRunExpander("all").values
18
+ assert model_name in get_all_models()
19
+ assert model_name in get_all_text_models()
20
+
21
+
22
+ class TestHuggingFaceModelRegistry(unittest.TestCase):
23
+ def test_round_trip(self):
24
+ config_pairs: List[Tuple[str, HuggingFaceModelConfig]] = [
25
+ ("gpt2", HuggingFaceModelConfig(namespace=None, model_name="gpt2", revision=None)),
26
+ (
27
+ "stanford-crfm/BioMedLM",
28
+ HuggingFaceModelConfig(namespace="stanford-crfm", model_name="BioMedLM", revision=None),
29
+ ),
30
+ (
31
+ "stanford-crfm/BioMedLM@main",
32
+ HuggingFaceModelConfig(namespace="stanford-crfm", model_name="BioMedLM", revision="main"),
33
+ ),
34
+ ]
35
+ for expected_model_name, expected_model_config in config_pairs:
36
+ actual_model_config = HuggingFaceModelConfig.from_string(expected_model_name)
37
+ actual_model_name = str(actual_model_config)
38
+ self.assertEqual(actual_model_name, expected_model_name)
39
+ self.assertEqual(actual_model_config, expected_model_config)
40
+
41
+ def test_model_id(self):
42
+ config_pairs: List[Tuple[str, str]] = [
43
+ ("gpt2", "gpt2"),
44
+ ("stanford-crfm/BioMedLM", "stanford-crfm/BioMedLM"),
45
+ ("stanford-crfm/BioMedLM@main", "stanford-crfm/BioMedLM"),
46
+ ]
47
+ for expected_model_name, expected_model_id in config_pairs:
48
+ actual_model_config = HuggingFaceModelConfig.from_string(expected_model_name)
49
+ self.assertEqual(actual_model_config.model_id, expected_model_id)
50
+
51
+ def test_register_huggingface_model_config(self):
52
+ register_huggingface_model_config("stanford-crfm/BioMedLM@main")
53
+ expected_model_config = HuggingFaceModelConfig(
54
+ namespace="stanford-crfm", model_name="BioMedLM", revision="main"
55
+ )
56
+ actual_model_config = get_huggingface_model_config("stanford-crfm/BioMedLM@main")
57
+ self.assertEqual(actual_model_config, expected_model_config)
@@ -12,6 +12,22 @@ from helm.common.tokenization_request import (
12
12
  from .client import Client, wrap_request_time, truncate_sequence
13
13
 
14
14
 
15
+ MODEL_ALIASES = {
16
+ "flan-t5-xxl": "flan-t5-xxl-hf",
17
+ "h3-2.7b": "h3-2.7b-h3",
18
+ }
19
+ """Together model name aliases.
20
+
21
+ HELM users use a shorter model name (e.g. together/flan-t5-xxl)
22
+ whereas the Together client sends and caches requests using
23
+ a longer model name that is suffixed with the implementation framework
24
+ (e.g. flan-t5-xxl-hf). This allows trackcing exactly which
25
+ implementation was used in the cached results, since some results may
26
+ be different depending on the implementation (e.g. efficiency metrics).
27
+ This also allows future migration of results in the case of changes of
28
+ available implementations on Together."""
29
+
30
+
15
31
  def fix_text(x: str, model: str) -> str:
16
32
  """Fix text that comes back from the API."""
17
33
  x = x.replace("▁", " ")
@@ -31,8 +47,7 @@ class TogetherClient(Client):
31
47
  # Following the examples from https://github.com/togethercomputer/open-models-api
32
48
  return {
33
49
  "request_type": "language-model-inference",
34
- # TODO: the Together API expects "together/" in the model name
35
- "model": request.model,
50
+ "model": MODEL_ALIASES.get(request.model_engine, request.model_engine),
36
51
  "prompt": request.prompt,
37
52
  "temperature": request.temperature,
38
53
  "n": request.num_completions,
@@ -1,3 +1,4 @@
1
+ import importlib_resources as resources
1
2
  import torch
2
3
  import sentencepiece as spm
3
4
 
@@ -7,6 +8,10 @@ adapted from https://github.com/yandex/YaLM-100B/blob/main/megatron_lm/megatron/
7
8
  """
8
9
 
9
10
 
11
+ YALM_TOKENIZER_PACKAGE: str = "helm.proxy.clients.yalm_tokenizer"
12
+ YALM_TOKENIZER_VOCAB_FILENAME: str = "voc_100b.sp"
13
+
14
+
10
15
  def convert_to_unicode(text):
11
16
  """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
12
17
  if isinstance(text, bytes):
@@ -28,9 +33,10 @@ class YaLMTokenizer:
28
33
  MASK_TOKEN = "[MASK]"
29
34
  MAX_SEQUENCE_LENGTH = 2048
30
35
 
31
- def __init__(self, vocab_file="src/helm/proxy/clients/yalm_tokenizer/voc_100b.sp"):
36
+ def __init__(self):
32
37
  self.name = "sp"
33
- self._tokenizer = spm.SentencePieceProcessor(model_file=vocab_file)
38
+ vocab_file_path = str(resources.files(YALM_TOKENIZER_PACKAGE).joinpath(YALM_TOKENIZER_VOCAB_FILENAME))
39
+ self._tokenizer = spm.SentencePieceProcessor(model_file=vocab_file_path)
34
40
  self._vocab_words = self._get_vocab_words()
35
41
  self.encoder = {token: idx for idx, token in enumerate(self._vocab_words)}
36
42
  self.decoder = {idx: token for idx, token in enumerate(self._vocab_words)}