crfm-helm 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/METADATA +11 -8
- {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/RECORD +67 -38
- {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/WHEEL +1 -1
- {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/entry_points.txt +2 -1
- helm/benchmark/__init__.py +13 -0
- helm/benchmark/adaptation/adapter_spec.py +3 -0
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -7
- helm/benchmark/augmentations/correct_to_misspelling.json +1 -0
- helm/benchmark/contamination/__init__.py +0 -0
- helm/benchmark/metrics/classification_metrics.py +70 -0
- helm/benchmark/metrics/machine_translation_metrics.py +36 -0
- helm/benchmark/metrics/summarization_metrics.py +7 -8
- helm/benchmark/metrics/test_classification_metrics.py +150 -0
- helm/benchmark/presentation/create_plots.py +617 -0
- helm/benchmark/presentation/run_display.py +7 -48
- helm/benchmark/presentation/summarize.py +4 -2
- helm/benchmark/presentation/test_create_plots.py +32 -0
- helm/benchmark/run.py +144 -48
- helm/benchmark/run_expander.py +164 -47
- helm/benchmark/run_specs.py +346 -39
- helm/benchmark/runner.py +34 -6
- helm/benchmark/scenarios/copyright_scenario.py +1 -1
- helm/benchmark/scenarios/covid_dialog_scenario.py +84 -0
- helm/benchmark/scenarios/imdb_listdir.json +50014 -0
- helm/benchmark/scenarios/lex_glue_scenario.py +253 -0
- helm/benchmark/scenarios/lextreme_scenario.py +458 -0
- helm/benchmark/scenarios/me_q_sum_scenario.py +86 -0
- helm/benchmark/scenarios/med_dialog_scenario.py +132 -0
- helm/benchmark/scenarios/med_mcqa_scenario.py +102 -0
- helm/benchmark/scenarios/med_paragraph_simplification_scenario.py +119 -0
- helm/benchmark/scenarios/med_qa_scenario.py +96 -0
- helm/benchmark/scenarios/opinions_qa_scenario.py +194 -0
- helm/benchmark/scenarios/scenario.py +5 -0
- helm/benchmark/scenarios/the_pile_scenario.py +1 -1
- helm/benchmark/scenarios/wmt_14_scenario.py +96 -0
- helm/benchmark/static/benchmarking.css +14 -0
- helm/benchmark/static/benchmarking.js +43 -0
- helm/benchmark/static/index.html +2 -0
- helm/benchmark/static/json-urls.js +4 -0
- helm/benchmark/static/plot-captions.js +16 -0
- helm/benchmark/static/schema.yaml +154 -1
- helm/benchmark/window_services/cohere_window_service.py +20 -0
- helm/benchmark/window_services/flan_t5_window_service.py +29 -0
- helm/benchmark/window_services/huggingface_window_service.py +39 -0
- helm/benchmark/window_services/santacoder_window_service.py +27 -0
- helm/benchmark/window_services/test_flan_t5_window_service.py +12 -0
- helm/benchmark/window_services/wider_ai21_window_service.py +13 -0
- helm/benchmark/window_services/window_service_factory.py +34 -7
- helm/common/codec.py +123 -0
- helm/common/general.py +12 -5
- helm/common/test_codec.py +144 -0
- helm/proxy/clients/aleph_alpha_client.py +47 -28
- helm/proxy/clients/auto_client.py +32 -24
- helm/proxy/clients/google_client.py +88 -0
- helm/proxy/clients/huggingface_client.py +32 -16
- helm/proxy/clients/huggingface_model_registry.py +111 -0
- helm/proxy/clients/huggingface_tokenizer.py +25 -7
- helm/proxy/clients/openai_client.py +60 -2
- helm/proxy/clients/test_huggingface_model_registry.py +57 -0
- helm/proxy/clients/test_huggingface_tokenizer.py +3 -0
- helm/proxy/clients/together_client.py +17 -2
- helm/proxy/clients/yalm_tokenizer/voc_100b.sp +0 -0
- helm/proxy/clients/yalm_tokenizer/yalm_tokenizer.py +8 -2
- helm/proxy/models.py +115 -7
- helm/proxy/test_models.py +1 -1
- helm/benchmark/presentation/present.py +0 -249
- {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/LICENSE +0 -0
- {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from typing import List, Dict
|
|
2
|
+
|
|
3
|
+
from helm.common.cache import Cache, CacheConfig
|
|
4
|
+
from helm.common.request import Request, RequestResult, Sequence, Token
|
|
5
|
+
from helm.common.tokenization_request import (
|
|
6
|
+
TokenizationRequest,
|
|
7
|
+
TokenizationRequestResult,
|
|
8
|
+
DecodeRequest,
|
|
9
|
+
DecodeRequestResult,
|
|
10
|
+
)
|
|
11
|
+
from .client import Client, truncate_sequence
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GoogleClient(Client):
|
|
15
|
+
"""
|
|
16
|
+
Client for the Google models. There isn't an API for their language models.
|
|
17
|
+
We receive and process completions offline.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def convert_to_raw_request(request: Request) -> Dict:
|
|
22
|
+
return {
|
|
23
|
+
"best_of": request.top_k_per_token,
|
|
24
|
+
"echo": request.echo_prompt,
|
|
25
|
+
"logprobs": request.top_k_per_token,
|
|
26
|
+
"max_tokens": request.max_tokens,
|
|
27
|
+
"model": request.model_engine,
|
|
28
|
+
"n": request.num_completions,
|
|
29
|
+
"prompt": request.prompt,
|
|
30
|
+
"request_type": "language-model-inference",
|
|
31
|
+
"stop": request.stop_sequences or None,
|
|
32
|
+
"temperature": request.temperature,
|
|
33
|
+
"top_p": request.top_p,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
def __init__(self, cache_config: CacheConfig):
|
|
37
|
+
self.cache = Cache(cache_config)
|
|
38
|
+
|
|
39
|
+
def make_request(self, request: Request) -> RequestResult:
|
|
40
|
+
raw_request = GoogleClient.convert_to_raw_request(request)
|
|
41
|
+
cache_key: Dict = Client.make_cache_key(raw_request, request)
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
|
|
45
|
+
def fail():
|
|
46
|
+
raise RuntimeError(
|
|
47
|
+
f"The result has not been uploaded to the cache for the following request: {cache_key}"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# If results are not cached for a given query, fail fast
|
|
51
|
+
response, cached = self.cache.get(cache_key, fail)
|
|
52
|
+
except RuntimeError as e:
|
|
53
|
+
error: str = f"GoogleClient error: {e}"
|
|
54
|
+
return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
|
|
55
|
+
|
|
56
|
+
# Expect the result to be structured the same way as a response from OpenAI API.
|
|
57
|
+
completions: List[Sequence] = []
|
|
58
|
+
for raw_completion in response["choices"]:
|
|
59
|
+
sequence_logprob = 0
|
|
60
|
+
tokens: List[Token] = []
|
|
61
|
+
|
|
62
|
+
raw_data = raw_completion["logprobs"]
|
|
63
|
+
for text, logprob in zip(raw_data["tokens"], raw_data["token_logprobs"]):
|
|
64
|
+
tokens.append(Token(text=text, logprob=logprob or 0, top_logprobs=dict()))
|
|
65
|
+
sequence_logprob += logprob or 0
|
|
66
|
+
|
|
67
|
+
completion = Sequence(
|
|
68
|
+
text=raw_completion["text"],
|
|
69
|
+
logprob=sequence_logprob,
|
|
70
|
+
tokens=tokens,
|
|
71
|
+
finish_reason={"reason": raw_completion["finish_reason"]},
|
|
72
|
+
)
|
|
73
|
+
completion = truncate_sequence(completion, request)
|
|
74
|
+
completions.append(completion)
|
|
75
|
+
|
|
76
|
+
return RequestResult(
|
|
77
|
+
success=True,
|
|
78
|
+
cached=cached,
|
|
79
|
+
request_time=response["request_time"],
|
|
80
|
+
completions=completions,
|
|
81
|
+
embedding=[],
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def tokenize(self, request: TokenizationRequest) -> TokenizationRequestResult:
|
|
85
|
+
raise NotImplementedError("Use the HuggingFaceClient to tokenize.")
|
|
86
|
+
|
|
87
|
+
def decode(self, request: DecodeRequest) -> DecodeRequestResult:
|
|
88
|
+
raise NotImplementedError("Use the HuggingFaceClient to decode.")
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from copy import deepcopy
|
|
1
2
|
import torch
|
|
2
3
|
from dataclasses import asdict
|
|
3
4
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
@@ -15,24 +16,29 @@ from helm.common.tokenization_request import (
|
|
|
15
16
|
)
|
|
16
17
|
from .client import Client, wrap_request_time, truncate_sequence
|
|
17
18
|
from .huggingface_tokenizer import HuggingFaceTokenizers
|
|
19
|
+
from helm.proxy.clients.huggingface_model_registry import HuggingFaceModelConfig, get_huggingface_model_config
|
|
18
20
|
|
|
19
21
|
|
|
20
22
|
class HuggingFaceServer:
|
|
21
|
-
def __init__(self,
|
|
23
|
+
def __init__(self, model_config: HuggingFaceModelConfig):
|
|
22
24
|
if torch.cuda.is_available():
|
|
23
25
|
hlog("CUDA is available, initializing with a GPU...")
|
|
24
26
|
self.device: str = "cuda:0"
|
|
25
27
|
else:
|
|
26
28
|
self.device = "cpu"
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
with htrack_block("Loading
|
|
31
|
-
self.
|
|
29
|
+
model_kwargs = {}
|
|
30
|
+
if model_config.revision:
|
|
31
|
+
model_kwargs["revision"] = model_config.revision
|
|
32
|
+
with htrack_block(f"Loading Hugging Face model for config {model_config}"):
|
|
33
|
+
self.model = AutoModelForCausalLM.from_pretrained(
|
|
34
|
+
model_config.model_id, trust_remote_code=True, **model_kwargs
|
|
35
|
+
).to(self.device)
|
|
36
|
+
with htrack_block(f"Loading Hugging Face tokenizer model for config {model_config}"):
|
|
37
|
+
self.tokenizer = AutoTokenizer.from_pretrained(model_config.model_id, **model_kwargs)
|
|
32
38
|
|
|
33
39
|
def serve_request(self, raw_request: Dict[str, Any]):
|
|
34
40
|
encoded_input = self.tokenizer(raw_request["prompt"], return_tensors="pt").to(self.device)
|
|
35
|
-
|
|
41
|
+
raw_request = deepcopy(raw_request)
|
|
36
42
|
raw_request["do_sample"] = True
|
|
37
43
|
raw_request["return_dict_in_generate"] = True
|
|
38
44
|
raw_request["output_scores"] = True
|
|
@@ -112,15 +118,25 @@ class HuggingFaceClient(Client):
|
|
|
112
118
|
self.cache = Cache(cache_config)
|
|
113
119
|
self.model_server_instances: Dict[str, HuggingFaceServer] = {}
|
|
114
120
|
|
|
115
|
-
def get_model_server_instance(self,
|
|
116
|
-
if
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
+
def get_model_server_instance(self, model) -> HuggingFaceServer:
|
|
122
|
+
if model not in self.model_server_instances:
|
|
123
|
+
model_config = get_huggingface_model_config(model)
|
|
124
|
+
if model_config:
|
|
125
|
+
self.model_server_instances[model] = HuggingFaceServer(model_config)
|
|
126
|
+
elif model == "EleutherAI/gpt-j-6B":
|
|
127
|
+
self.model_server_instances[model] = HuggingFaceServer(
|
|
128
|
+
HuggingFaceModelConfig.from_string("EleutherAI/gpt-j-6B")
|
|
129
|
+
)
|
|
130
|
+
elif model == "huggingface/gpt2":
|
|
131
|
+
self.model_server_instances[model] = HuggingFaceServer(HuggingFaceModelConfig.from_string("gpt2"))
|
|
132
|
+
elif model == "bigcode/santacoder":
|
|
133
|
+
self.model_server_instances[model] = HuggingFaceServer(
|
|
134
|
+
HuggingFaceModelConfig.from_string("bigcode/santacoder")
|
|
135
|
+
)
|
|
121
136
|
else:
|
|
122
|
-
raise Exception("Unknown model
|
|
123
|
-
|
|
137
|
+
raise Exception(f"Unknown HuggingFace model: {model}")
|
|
138
|
+
|
|
139
|
+
return self.model_server_instances[model]
|
|
124
140
|
|
|
125
141
|
def make_request(self, request: Request) -> RequestResult:
|
|
126
142
|
# Embedding not supported for this model
|
|
@@ -145,7 +161,7 @@ class HuggingFaceClient(Client):
|
|
|
145
161
|
|
|
146
162
|
# Get cached model server instance if possible (to save on model and tokenizer
|
|
147
163
|
# loading times).
|
|
148
|
-
model_server_instance: HuggingFaceServer = self.get_model_server_instance(request.
|
|
164
|
+
model_server_instance: HuggingFaceServer = self.get_model_server_instance(request.model)
|
|
149
165
|
|
|
150
166
|
try:
|
|
151
167
|
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from typing import Dict, Optional
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
import re
|
|
4
|
+
from helm.common.hierarchical_logger import hlog
|
|
5
|
+
from helm.proxy.models import (
|
|
6
|
+
Model,
|
|
7
|
+
ALL_MODELS,
|
|
8
|
+
MODEL_NAME_TO_MODEL,
|
|
9
|
+
TEXT_MODEL_TAG,
|
|
10
|
+
FULL_FUNCTIONALITY_TEXT_MODEL_TAG,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class HuggingFaceModelConfig:
|
|
16
|
+
namespace: Optional[str]
|
|
17
|
+
"""Name of the group or user that owns the model. e.g. 'stanford-crfm'
|
|
18
|
+
|
|
19
|
+
May be None if the model (e.g. gpt2) does not have a namespace."""
|
|
20
|
+
|
|
21
|
+
model_name: str
|
|
22
|
+
"""Name of the model. e.g. 'BioMedLM'
|
|
23
|
+
|
|
24
|
+
Does not include the namespace."""
|
|
25
|
+
|
|
26
|
+
revision: Optional[str]
|
|
27
|
+
"""Revision of the model to use e.g. 'main'.
|
|
28
|
+
|
|
29
|
+
If None, use the default revision."""
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def model_id(self) -> str:
|
|
33
|
+
"""Return the model ID.
|
|
34
|
+
|
|
35
|
+
Examples:
|
|
36
|
+
- 'gpt2'
|
|
37
|
+
- 'stanford-crfm/BioMedLM'"""
|
|
38
|
+
if self.namespace:
|
|
39
|
+
return f"{self.namespace}/{self.model_name}"
|
|
40
|
+
return self.model_name
|
|
41
|
+
|
|
42
|
+
def __str__(self) -> str:
|
|
43
|
+
"""Return the full model name used by HELM in the format "[namespace/]model_name[@revision]".
|
|
44
|
+
|
|
45
|
+
Examples:
|
|
46
|
+
- 'gpt2'
|
|
47
|
+
- 'stanford-crfm/BioMedLM'
|
|
48
|
+
- 'stanford-crfm/BioMedLM@main'"""
|
|
49
|
+
result = self.model_name
|
|
50
|
+
if self.namespace:
|
|
51
|
+
result = f"{self.namespace}/{result}"
|
|
52
|
+
if self.revision:
|
|
53
|
+
result = f"{result}@{self.revision}"
|
|
54
|
+
return result
|
|
55
|
+
|
|
56
|
+
@staticmethod
|
|
57
|
+
def from_string(raw: str) -> "HuggingFaceModelConfig":
|
|
58
|
+
"""Parses a string in the format "[namespace/]model_name[@revision]" to a HuggingFaceModelConfig.
|
|
59
|
+
|
|
60
|
+
Examples:
|
|
61
|
+
- 'gpt2'
|
|
62
|
+
- 'stanford-crfm/BioMedLM'
|
|
63
|
+
- 'stanford-crfm/BioMedLM@main'"""
|
|
64
|
+
pattern = r"((?P<namespace>[^/@]+)/)?(?P<model_name>[^/@]+)(@(?P<revision>[^/@]+))?"
|
|
65
|
+
match = re.fullmatch(pattern, raw)
|
|
66
|
+
if not match:
|
|
67
|
+
raise ValueError(f"Could not parse model name: '{raw}'; Expected format: [namespace/]model_name[@revision]")
|
|
68
|
+
model_name = match.group("model_name")
|
|
69
|
+
assert model_name
|
|
70
|
+
return HuggingFaceModelConfig(
|
|
71
|
+
namespace=match.group("namespace"), model_name=model_name, revision=match.group("revision")
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
_huggingface_model_registry: Dict[str, HuggingFaceModelConfig] = {}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def register_huggingface_model_config(model_name: str) -> HuggingFaceModelConfig:
|
|
79
|
+
"""Register a AutoModelForCausalLM model from Hugging Face Model Hub for later use.
|
|
80
|
+
|
|
81
|
+
model_name format: namespace/model_name[@revision]"""
|
|
82
|
+
config = HuggingFaceModelConfig.from_string(model_name)
|
|
83
|
+
if config.model_id in _huggingface_model_registry:
|
|
84
|
+
raise ValueError(f"A Hugging Face model is already registered for model_id {model_name}")
|
|
85
|
+
_huggingface_model_registry[model_name] = config
|
|
86
|
+
|
|
87
|
+
# HELM model names require a namespace
|
|
88
|
+
if not config.namespace:
|
|
89
|
+
raise Exception("Registration of Hugging Face models without a namespace is not supported")
|
|
90
|
+
if model_name in MODEL_NAME_TO_MODEL:
|
|
91
|
+
raise ValueError(f"A HELM model is already registered for model name: {model_name}")
|
|
92
|
+
description = f"HuggingFace model {config.model_id}"
|
|
93
|
+
if config.revision:
|
|
94
|
+
description += f" at revision {config.revision}"
|
|
95
|
+
model = Model(
|
|
96
|
+
group=config.namespace,
|
|
97
|
+
name=model_name,
|
|
98
|
+
display_name=model_name,
|
|
99
|
+
creator_organization=config.namespace,
|
|
100
|
+
description=description,
|
|
101
|
+
tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG],
|
|
102
|
+
)
|
|
103
|
+
MODEL_NAME_TO_MODEL[model_name] = model
|
|
104
|
+
ALL_MODELS.append(model)
|
|
105
|
+
hlog(f"Registered Hugging Face model: {model} config: {config}")
|
|
106
|
+
return config
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def get_huggingface_model_config(model_name: str) -> Optional[HuggingFaceModelConfig]:
|
|
110
|
+
"""Returns a HuggingFaceModelConfig for the model_id."""
|
|
111
|
+
return _huggingface_model_registry.get(model_name)
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import Any, Dict
|
|
2
|
+
from typing import Any, Dict, Optional
|
|
3
3
|
|
|
4
4
|
from transformers import AutoTokenizer
|
|
5
5
|
|
|
6
6
|
from helm.common.hierarchical_logger import htrack_block, hlog
|
|
7
7
|
|
|
8
|
+
from helm.proxy.clients.huggingface_model_registry import get_huggingface_model_config
|
|
9
|
+
|
|
8
10
|
|
|
9
11
|
class HuggingFaceTokenizers:
|
|
10
12
|
|
|
@@ -17,8 +19,11 @@ class HuggingFaceTokenizers:
|
|
|
17
19
|
Returns the tokenizer.
|
|
18
20
|
"""
|
|
19
21
|
|
|
20
|
-
def load_tokenizer(hf_tokenizer_name: str):
|
|
22
|
+
def load_tokenizer(hf_tokenizer_name: str, revision: Optional[str] = None):
|
|
21
23
|
"""Loads tokenizer using files from disk if they exist. Otherwise, downloads from HuggingFace."""
|
|
24
|
+
tokenizer_kwargs = {}
|
|
25
|
+
if revision is not None:
|
|
26
|
+
tokenizer_kwargs["revision"] = revision
|
|
22
27
|
try:
|
|
23
28
|
# From the Hugging Face documentation, "local_files_only(defaults to False) —
|
|
24
29
|
# Whether or not to only look at local files".
|
|
@@ -29,10 +34,14 @@ class HuggingFaceTokenizers:
|
|
|
29
34
|
# From https://huggingface.co/course/chapter6/3, "slow tokenizers are those written in Python inside
|
|
30
35
|
# the Hugging Face Transformers library, while the fast versions are the ones provided by Hugging Face
|
|
31
36
|
# Tokenizers, which are written in Rust." So, use the "fast" version of the tokenizers if available.
|
|
32
|
-
return AutoTokenizer.from_pretrained(
|
|
37
|
+
return AutoTokenizer.from_pretrained(
|
|
38
|
+
hf_tokenizer_name, local_files_only=True, use_fast=True, **tokenizer_kwargs
|
|
39
|
+
)
|
|
33
40
|
except OSError:
|
|
34
41
|
hlog(f"Local files do not exist for HuggingFace tokenizer: {hf_tokenizer_name}. Downloading...")
|
|
35
|
-
return AutoTokenizer.from_pretrained(
|
|
42
|
+
return AutoTokenizer.from_pretrained(
|
|
43
|
+
hf_tokenizer_name, local_files_only=False, use_fast=True, **tokenizer_kwargs
|
|
44
|
+
)
|
|
36
45
|
|
|
37
46
|
if tokenizer_name not in HuggingFaceTokenizers.tokenizers:
|
|
38
47
|
with htrack_block(f"Loading {tokenizer_name} with Hugging Face Transformers"):
|
|
@@ -41,7 +50,12 @@ class HuggingFaceTokenizers:
|
|
|
41
50
|
|
|
42
51
|
# Weights are cached at ~/.cache/huggingface/transformers.
|
|
43
52
|
hf_tokenizer_name: str
|
|
44
|
-
|
|
53
|
+
revision: Optional[str] = None
|
|
54
|
+
model_config = get_huggingface_model_config(tokenizer_name)
|
|
55
|
+
if model_config:
|
|
56
|
+
hf_tokenizer_name = model_config.model_id
|
|
57
|
+
revision = model_config.revision
|
|
58
|
+
elif tokenizer_name == "huggingface/gpt2":
|
|
45
59
|
hf_tokenizer_name = "gpt2"
|
|
46
60
|
elif tokenizer_name == "EleutherAI/gpt-j-6B":
|
|
47
61
|
# Not a typo: Named "gpt-j-6B" instead of "gpt-j-6b" in Hugging Face
|
|
@@ -58,10 +72,14 @@ class HuggingFaceTokenizers:
|
|
|
58
72
|
hf_tokenizer_name = "t5-11b"
|
|
59
73
|
elif tokenizer_name == "google/ul2":
|
|
60
74
|
hf_tokenizer_name = "google/ul2"
|
|
75
|
+
elif tokenizer_name == "google/flan-t5-xxl":
|
|
76
|
+
hf_tokenizer_name = "google/flan-t5-xxl"
|
|
77
|
+
elif tokenizer_name == "bigcode/santacoder":
|
|
78
|
+
hf_tokenizer_name = "bigcode/santacoder"
|
|
61
79
|
else:
|
|
62
|
-
raise ValueError(f"Unsupported tokenizer: {tokenizer_name}")
|
|
80
|
+
raise ValueError(f"Unsupported HuggingFace tokenizer: {tokenizer_name}")
|
|
63
81
|
|
|
64
82
|
# Keep the tokenizer in memory, so we don't recreate it for future requests
|
|
65
|
-
HuggingFaceTokenizers.tokenizers[tokenizer_name] = load_tokenizer(hf_tokenizer_name)
|
|
83
|
+
HuggingFaceTokenizers.tokenizers[tokenizer_name] = load_tokenizer(hf_tokenizer_name, revision)
|
|
66
84
|
|
|
67
85
|
return HuggingFaceTokenizers.tokenizers[tokenizer_name]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from dataclasses import replace
|
|
2
|
-
from typing import Any, Dict, List, Optional
|
|
2
|
+
from typing import Any, Dict, List, Optional, cast
|
|
3
3
|
|
|
4
4
|
import openai
|
|
5
5
|
|
|
@@ -24,6 +24,7 @@ class OpenAIClient(Client):
|
|
|
24
24
|
self,
|
|
25
25
|
api_key: str,
|
|
26
26
|
cache_config: CacheConfig,
|
|
27
|
+
tokenizer_client: Client,
|
|
27
28
|
chat_gpt_client: Optional[ChatGPTClient] = None,
|
|
28
29
|
org_id: Optional[str] = None,
|
|
29
30
|
):
|
|
@@ -31,8 +32,12 @@ class OpenAIClient(Client):
|
|
|
31
32
|
self.api_key: str = api_key
|
|
32
33
|
self.api_base: str = "https://api.openai.com/v1"
|
|
33
34
|
self.cache = Cache(cache_config)
|
|
35
|
+
self.tokenizer_client: Client = tokenizer_client
|
|
34
36
|
self.chat_gpt_client: Optional[ChatGPTClient] = chat_gpt_client
|
|
35
37
|
|
|
38
|
+
def _is_chat_model_engine(self, model_engine: str):
|
|
39
|
+
return model_engine.startswith("gpt-3.5")
|
|
40
|
+
|
|
36
41
|
def make_request(self, request: Request) -> RequestResult:
|
|
37
42
|
if request.model_engine == "chat-gpt":
|
|
38
43
|
assert self.chat_gpt_client is not None
|
|
@@ -44,6 +49,28 @@ class OpenAIClient(Client):
|
|
|
44
49
|
"input": request.prompt,
|
|
45
50
|
"engine": request.model_engine,
|
|
46
51
|
}
|
|
52
|
+
elif self._is_chat_model_engine(request.model_engine):
|
|
53
|
+
raw_request = {
|
|
54
|
+
"model": request.model_engine,
|
|
55
|
+
# For now, put the whole prompt in a single user message, and expect the response
|
|
56
|
+
# to be returned in a single assistant message.
|
|
57
|
+
# TODO: Support ChatML for creating multiple messages with different roles.
|
|
58
|
+
# See: https://github.com/openai/openai-python/blob/main/chatml.md
|
|
59
|
+
"messages": [{"role": "user", "content": request.prompt}],
|
|
60
|
+
"temperature": request.temperature,
|
|
61
|
+
"top_p": request.top_p,
|
|
62
|
+
"n": request.num_completions,
|
|
63
|
+
# Note: Setting stop to ["\n"] results in an error
|
|
64
|
+
# See: https://community.openai.com/t/stop-n-in-gpt-3-5-turbo-leads-to-500-error/87815/15
|
|
65
|
+
# TODO: Handle this in the adapter.
|
|
66
|
+
"stop": request.stop_sequences or [], # API doesn't like empty list
|
|
67
|
+
# Note: Chat models may require adding an extra token to max_tokens
|
|
68
|
+
# for the internal special role token.
|
|
69
|
+
# TODO: Handle this in the adapter.
|
|
70
|
+
"max_tokens": request.max_tokens,
|
|
71
|
+
"presence_penalty": request.presence_penalty,
|
|
72
|
+
"frequency_penalty": request.frequency_penalty,
|
|
73
|
+
}
|
|
47
74
|
else:
|
|
48
75
|
raw_request = {
|
|
49
76
|
"engine": request.model_engine,
|
|
@@ -74,6 +101,14 @@ class OpenAIClient(Client):
|
|
|
74
101
|
openai.api_base = self.api_base
|
|
75
102
|
return openai.Embedding.create(**raw_request)
|
|
76
103
|
|
|
104
|
+
elif self._is_chat_model_engine(request.model_engine):
|
|
105
|
+
|
|
106
|
+
def do_it():
|
|
107
|
+
openai.organization = self.org_id
|
|
108
|
+
openai.api_key = self.api_key
|
|
109
|
+
openai.api_base = self.api_base
|
|
110
|
+
return openai.ChatCompletion.create(**raw_request)
|
|
111
|
+
|
|
77
112
|
else:
|
|
78
113
|
|
|
79
114
|
def do_it():
|
|
@@ -95,14 +130,37 @@ class OpenAIClient(Client):
|
|
|
95
130
|
# needs to be populated, and `embedding` should be an empty list and vice-versa.
|
|
96
131
|
embedding: List[float] = []
|
|
97
132
|
completions: List[Sequence] = []
|
|
133
|
+
tokens: List[Token]
|
|
98
134
|
if request.embedding:
|
|
99
135
|
# If the user is requesting an embedding instead of completion
|
|
100
136
|
# then completions would be left as an empty list. The embedding needs to be set.
|
|
101
137
|
embedding = response["data"][0]["embedding"]
|
|
138
|
+
elif self._is_chat_model_engine(request.model_engine):
|
|
139
|
+
for raw_completion in response["choices"]:
|
|
140
|
+
# The ChatGPT API doesn't support echo. If `echo_prompt` is true, combine the prompt and completion.
|
|
141
|
+
raw_completion_content = raw_completion["message"]["content"]
|
|
142
|
+
text: str = request.prompt + raw_completion_content if request.echo_prompt else raw_completion_content
|
|
143
|
+
# The ChatGPT API doesn't return us tokens or logprobs, so we tokenize ourselves.
|
|
144
|
+
tokenization_result: TokenizationRequestResult = self.tokenizer_client.tokenize(
|
|
145
|
+
# We're assuming ChatGPT uses the GPT-2 tokenizer.
|
|
146
|
+
TokenizationRequest(text, tokenizer="huggingface/gpt2")
|
|
147
|
+
)
|
|
148
|
+
# Log probs are not currently not supported by the ChatGPT, so set to 0 for now.
|
|
149
|
+
tokens = [
|
|
150
|
+
Token(text=cast(str, raw_token), logprob=0, top_logprobs={})
|
|
151
|
+
for raw_token in tokenization_result.raw_tokens
|
|
152
|
+
]
|
|
153
|
+
completion = Sequence(
|
|
154
|
+
text=text,
|
|
155
|
+
logprob=0, # ChatGPT does not provide logprobs
|
|
156
|
+
tokens=tokens,
|
|
157
|
+
finish_reason={"reason": raw_completion["finish_reason"]},
|
|
158
|
+
)
|
|
159
|
+
completions.append(truncate_sequence(completion, request)) # Truncate the text by stop sequences
|
|
102
160
|
else:
|
|
103
161
|
for raw_completion in response["choices"]:
|
|
104
162
|
sequence_logprob = 0
|
|
105
|
-
tokens
|
|
163
|
+
tokens = []
|
|
106
164
|
|
|
107
165
|
raw_data = raw_completion["logprobs"]
|
|
108
166
|
for text, logprob, top_logprobs in zip(
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import unittest
|
|
3
|
+
from typing import List, Tuple
|
|
4
|
+
|
|
5
|
+
from helm.benchmark.run_expander import ModelRunExpander
|
|
6
|
+
from helm.proxy.clients.huggingface_model_registry import (
|
|
7
|
+
HuggingFaceModelConfig,
|
|
8
|
+
register_huggingface_model_config,
|
|
9
|
+
get_huggingface_model_config,
|
|
10
|
+
)
|
|
11
|
+
from helm.proxy.models import get_all_models, get_all_text_models
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@pytest.mark.parametrize("model_name", ["EleutherAI/pythia-70m"])
|
|
15
|
+
def test_hf_model_register(model_name):
|
|
16
|
+
register_huggingface_model_config(model_name)
|
|
17
|
+
assert model_name in ModelRunExpander("all").values
|
|
18
|
+
assert model_name in get_all_models()
|
|
19
|
+
assert model_name in get_all_text_models()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TestHuggingFaceModelRegistry(unittest.TestCase):
|
|
23
|
+
def test_round_trip(self):
|
|
24
|
+
config_pairs: List[Tuple[str, HuggingFaceModelConfig]] = [
|
|
25
|
+
("gpt2", HuggingFaceModelConfig(namespace=None, model_name="gpt2", revision=None)),
|
|
26
|
+
(
|
|
27
|
+
"stanford-crfm/BioMedLM",
|
|
28
|
+
HuggingFaceModelConfig(namespace="stanford-crfm", model_name="BioMedLM", revision=None),
|
|
29
|
+
),
|
|
30
|
+
(
|
|
31
|
+
"stanford-crfm/BioMedLM@main",
|
|
32
|
+
HuggingFaceModelConfig(namespace="stanford-crfm", model_name="BioMedLM", revision="main"),
|
|
33
|
+
),
|
|
34
|
+
]
|
|
35
|
+
for expected_model_name, expected_model_config in config_pairs:
|
|
36
|
+
actual_model_config = HuggingFaceModelConfig.from_string(expected_model_name)
|
|
37
|
+
actual_model_name = str(actual_model_config)
|
|
38
|
+
self.assertEqual(actual_model_name, expected_model_name)
|
|
39
|
+
self.assertEqual(actual_model_config, expected_model_config)
|
|
40
|
+
|
|
41
|
+
def test_model_id(self):
|
|
42
|
+
config_pairs: List[Tuple[str, str]] = [
|
|
43
|
+
("gpt2", "gpt2"),
|
|
44
|
+
("stanford-crfm/BioMedLM", "stanford-crfm/BioMedLM"),
|
|
45
|
+
("stanford-crfm/BioMedLM@main", "stanford-crfm/BioMedLM"),
|
|
46
|
+
]
|
|
47
|
+
for expected_model_name, expected_model_id in config_pairs:
|
|
48
|
+
actual_model_config = HuggingFaceModelConfig.from_string(expected_model_name)
|
|
49
|
+
self.assertEqual(actual_model_config.model_id, expected_model_id)
|
|
50
|
+
|
|
51
|
+
def test_register_huggingface_model_config(self):
|
|
52
|
+
register_huggingface_model_config("stanford-crfm/BioMedLM@main")
|
|
53
|
+
expected_model_config = HuggingFaceModelConfig(
|
|
54
|
+
namespace="stanford-crfm", model_name="BioMedLM", revision="main"
|
|
55
|
+
)
|
|
56
|
+
actual_model_config = get_huggingface_model_config("stanford-crfm/BioMedLM@main")
|
|
57
|
+
self.assertEqual(actual_model_config, expected_model_config)
|
|
@@ -39,6 +39,9 @@ class TestHuggingFaceTokenizers:
|
|
|
39
39
|
def test_get_tokenizer_ul2(self):
|
|
40
40
|
TestHuggingFaceTokenizers.verify_get_tokenizer("google/ul2", 58)
|
|
41
41
|
|
|
42
|
+
def test_get_santacoder(self):
|
|
43
|
+
TestHuggingFaceTokenizers.verify_get_tokenizer("bigcode/santacoder", 62)
|
|
44
|
+
|
|
42
45
|
def test_gpt2_tokenize_eos(self):
|
|
43
46
|
eos_token: str = "<|endoftext|>"
|
|
44
47
|
tokenizer = HuggingFaceTokenizers.get_tokenizer("huggingface/gpt2")
|
|
@@ -12,6 +12,22 @@ from helm.common.tokenization_request import (
|
|
|
12
12
|
from .client import Client, wrap_request_time, truncate_sequence
|
|
13
13
|
|
|
14
14
|
|
|
15
|
+
MODEL_ALIASES = {
|
|
16
|
+
"flan-t5-xxl": "flan-t5-xxl-hf",
|
|
17
|
+
"h3-2.7b": "h3-2.7b-h3",
|
|
18
|
+
}
|
|
19
|
+
"""Together model name aliases.
|
|
20
|
+
|
|
21
|
+
HELM users use a shorter model name (e.g. together/flan-t5-xxl)
|
|
22
|
+
whereas the Together client sends and caches requests using
|
|
23
|
+
a longer model name that is suffixed with the implementation framework
|
|
24
|
+
(e.g. flan-t5-xxl-hf). This allows trackcing exactly which
|
|
25
|
+
implementation was used in the cached results, since some results may
|
|
26
|
+
be different depending on the implementation (e.g. efficiency metrics).
|
|
27
|
+
This also allows future migration of results in the case of changes of
|
|
28
|
+
available implementations on Together."""
|
|
29
|
+
|
|
30
|
+
|
|
15
31
|
def fix_text(x: str, model: str) -> str:
|
|
16
32
|
"""Fix text that comes back from the API."""
|
|
17
33
|
x = x.replace("▁", " ")
|
|
@@ -31,8 +47,7 @@ class TogetherClient(Client):
|
|
|
31
47
|
# Following the examples from https://github.com/togethercomputer/open-models-api
|
|
32
48
|
return {
|
|
33
49
|
"request_type": "language-model-inference",
|
|
34
|
-
|
|
35
|
-
"model": request.model,
|
|
50
|
+
"model": MODEL_ALIASES.get(request.model_engine, request.model_engine),
|
|
36
51
|
"prompt": request.prompt,
|
|
37
52
|
"temperature": request.temperature,
|
|
38
53
|
"n": request.num_completions,
|
|
Binary file
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import importlib_resources as resources
|
|
1
2
|
import torch
|
|
2
3
|
import sentencepiece as spm
|
|
3
4
|
|
|
@@ -7,6 +8,10 @@ adapted from https://github.com/yandex/YaLM-100B/blob/main/megatron_lm/megatron/
|
|
|
7
8
|
"""
|
|
8
9
|
|
|
9
10
|
|
|
11
|
+
YALM_TOKENIZER_PACKAGE: str = "helm.proxy.clients.yalm_tokenizer"
|
|
12
|
+
YALM_TOKENIZER_VOCAB_FILENAME: str = "voc_100b.sp"
|
|
13
|
+
|
|
14
|
+
|
|
10
15
|
def convert_to_unicode(text):
|
|
11
16
|
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
|
|
12
17
|
if isinstance(text, bytes):
|
|
@@ -28,9 +33,10 @@ class YaLMTokenizer:
|
|
|
28
33
|
MASK_TOKEN = "[MASK]"
|
|
29
34
|
MAX_SEQUENCE_LENGTH = 2048
|
|
30
35
|
|
|
31
|
-
def __init__(self
|
|
36
|
+
def __init__(self):
|
|
32
37
|
self.name = "sp"
|
|
33
|
-
|
|
38
|
+
vocab_file_path = str(resources.files(YALM_TOKENIZER_PACKAGE).joinpath(YALM_TOKENIZER_VOCAB_FILENAME))
|
|
39
|
+
self._tokenizer = spm.SentencePieceProcessor(model_file=vocab_file_path)
|
|
34
40
|
self._vocab_words = self._get_vocab_words()
|
|
35
41
|
self.encoder = {token: idx for idx, token in enumerate(self._vocab_words)}
|
|
36
42
|
self.decoder = {idx: token for idx, token in enumerate(self._vocab_words)}
|