crfm-helm 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/METADATA +41 -57
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/RECORD +197 -152
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +32 -31
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
- helm/benchmark/adaptation/common_adapter_specs.py +2 -0
- helm/benchmark/annotation/air_bench_annotator.py +64 -0
- helm/benchmark/annotation/annotator_factory.py +6 -0
- helm/benchmark/annotation/anthropic_red_team_annotator.py +70 -0
- helm/benchmark/annotation/call_center_annotator.py +247 -0
- helm/benchmark/annotation/financebench_annotator.py +79 -0
- helm/benchmark/annotation/harm_bench_annotator.py +68 -0
- helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
- helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
- helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
- helm/benchmark/annotation/live_qa_annotator.py +71 -0
- helm/benchmark/annotation/medication_qa_annotator.py +68 -0
- helm/benchmark/annotation/model_as_judge.py +45 -0
- helm/benchmark/annotation/simple_safety_tests_annotator.py +64 -0
- helm/benchmark/annotation/xstest_annotator.py +110 -0
- helm/benchmark/augmentations/translate_perturbation.py +1 -0
- helm/benchmark/huggingface_registration.py +16 -6
- helm/benchmark/metrics/air_bench_metrics.py +56 -0
- helm/benchmark/metrics/annotation_metrics.py +108 -0
- helm/benchmark/metrics/bhasa_metrics.py +188 -0
- helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
- helm/benchmark/metrics/code_metrics_helper.py +11 -1
- helm/benchmark/metrics/fin_qa_metrics.py +60 -0
- helm/benchmark/metrics/fin_qa_metrics_helper.py +398 -0
- helm/benchmark/metrics/gpt4v_originality_critique_metrics.py +126 -0
- helm/benchmark/metrics/instruction_following_critique_metrics.py +1 -0
- helm/benchmark/metrics/live_qa_metrics.py +23 -0
- helm/benchmark/metrics/medication_qa_metrics.py +23 -0
- helm/benchmark/metrics/prometheus_vision_critique_metrics.py +185 -0
- helm/benchmark/metrics/reka_vibe_critique_metrics.py +158 -0
- helm/benchmark/metrics/safety_metrics.py +57 -0
- helm/benchmark/metrics/summac/model_summac.py +3 -3
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
- helm/benchmark/metrics/unitxt_metrics.py +20 -10
- helm/benchmark/metrics/vision_language/emd_utils.py +4 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +30 -72
- helm/benchmark/metrics/vision_language/image_utils.py +1 -1
- helm/benchmark/model_metadata_registry.py +3 -3
- helm/benchmark/presentation/schema.py +54 -4
- helm/benchmark/presentation/test_run_entry.py +1 -0
- helm/benchmark/presentation/test_schema.py +11 -0
- helm/benchmark/run.py +31 -2
- helm/benchmark/run_expander.py +113 -10
- helm/benchmark/run_spec_factory.py +4 -0
- helm/benchmark/run_specs/air_bench_run_specs.py +40 -0
- helm/benchmark/run_specs/bhasa_run_specs.py +638 -0
- helm/benchmark/run_specs/call_center_run_specs.py +152 -0
- helm/benchmark/run_specs/classic_run_specs.py +15 -11
- helm/benchmark/run_specs/decodingtrust_run_specs.py +11 -9
- helm/benchmark/run_specs/experimental_run_specs.py +85 -0
- helm/benchmark/run_specs/finance_run_specs.py +110 -0
- helm/benchmark/run_specs/safety_run_specs.py +154 -0
- helm/benchmark/run_specs/vlm_run_specs.py +251 -57
- helm/benchmark/scenarios/air_bench_scenario.py +50 -0
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
- helm/benchmark/scenarios/banking77_scenario.py +51 -0
- helm/benchmark/scenarios/bhasa_scenario.py +1798 -0
- helm/benchmark/scenarios/call_center_scenario.py +84 -0
- helm/benchmark/scenarios/ci_mcqa_scenario.py +80 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
- helm/benchmark/scenarios/entity_data_imputation_scenario.py +8 -2
- helm/benchmark/scenarios/ewok_scenario.py +116 -0
- helm/benchmark/scenarios/fin_qa_scenario.py +119 -0
- helm/benchmark/scenarios/financebench_scenario.py +53 -0
- helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
- helm/benchmark/scenarios/scenario.py +1 -1
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
- helm/benchmark/scenarios/test_air_bench_scenario.py +27 -0
- helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
- helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
- helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
- helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
- helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
- helm/benchmark/scenarios/test_math_scenario.py +2 -8
- helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
- helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
- helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
- helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +5 -5
- helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
- helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
- helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +13 -2
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -7
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -5
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +44 -13
- helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
- helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/pairs_scenario.py +7 -6
- helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
- helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +5 -5
- helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +98 -0
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
- helm/benchmark/scenarios/xstest_scenario.py +35 -0
- helm/benchmark/server.py +1 -6
- helm/benchmark/static/schema_air_bench.yaml +3149 -0
- helm/benchmark/static/schema_bhasa.yaml +709 -0
- helm/benchmark/static/schema_call_center.yaml +232 -0
- helm/benchmark/static/schema_classic.yaml +3 -59
- helm/benchmark/static/schema_cleva.yaml +768 -0
- helm/benchmark/static/schema_decodingtrust.yaml +444 -0
- helm/benchmark/static/schema_ewok.yaml +367 -0
- helm/benchmark/static/schema_finance.yaml +189 -0
- helm/benchmark/static/schema_image2struct.yaml +588 -0
- helm/benchmark/static/schema_instruction_following.yaml +3 -52
- helm/benchmark/static/schema_lite.yaml +3 -61
- helm/benchmark/static/schema_medical.yaml +255 -0
- helm/benchmark/static/schema_mmlu.yaml +3 -61
- helm/benchmark/static/schema_safety.yaml +247 -0
- helm/benchmark/static/schema_tables.yaml +317 -0
- helm/benchmark/static/schema_thai.yaml +244 -0
- helm/benchmark/static/schema_unitxt.yaml +3 -61
- helm/benchmark/static/{schema_vlm.yaml → schema_vhelm.yaml} +304 -298
- helm/benchmark/static/schema_vhelm_lite.yaml +4 -59
- helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
- helm/benchmark/static_build/assets/air-overview-d2e6c49f.png +0 -0
- helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
- helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
- helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
- helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
- helm/benchmark/static_build/assets/index-58f97dcd.js +10 -0
- helm/benchmark/static_build/assets/overview-74aea3d8.png +0 -0
- helm/benchmark/static_build/assets/process-flow-bd2eba96.png +0 -0
- helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
- helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
- helm/benchmark/static_build/index.html +2 -2
- helm/benchmark/window_services/test_openai_window_service.py +8 -8
- helm/clients/ai21_client.py +71 -1
- helm/clients/anthropic_client.py +50 -28
- helm/clients/auto_client.py +11 -0
- helm/clients/client.py +24 -7
- helm/clients/cohere_client.py +98 -3
- helm/clients/huggingface_client.py +79 -19
- helm/clients/nvidia_nim_client.py +35 -0
- helm/clients/openai_client.py +11 -5
- helm/clients/palmyra_client.py +25 -0
- helm/clients/perspective_api_client.py +11 -6
- helm/clients/reka_client.py +189 -0
- helm/clients/test_client.py +7 -9
- helm/clients/test_huggingface_client.py +19 -3
- helm/clients/test_together_client.py +72 -2
- helm/clients/together_client.py +129 -23
- helm/clients/vertexai_client.py +62 -18
- helm/clients/vision_language/huggingface_vlm_client.py +1 -0
- helm/clients/vision_language/open_flamingo_client.py +1 -2
- helm/clients/vision_language/paligemma_client.py +146 -0
- helm/clients/vision_language/palmyra_vision_client.py +99 -0
- helm/clients/yi_client.py +31 -0
- helm/common/critique_request.py +10 -1
- helm/common/images_utils.py +25 -0
- helm/common/mongo_key_value_store.py +2 -1
- helm/common/request.py +16 -0
- helm/config/model_deployments.yaml +740 -363
- helm/config/model_metadata.yaml +824 -128
- helm/config/tokenizer_configs.yaml +207 -10
- helm/proxy/critique/model_critique_client.py +32 -4
- helm/proxy/example_queries.py +14 -21
- helm/proxy/services/server_service.py +2 -3
- helm/proxy/token_counters/test_auto_token_counter.py +2 -2
- helm/tokenizers/ai21_tokenizer.py +51 -59
- helm/tokenizers/auto_tokenizer.py +1 -1
- helm/tokenizers/cohere_tokenizer.py +29 -62
- helm/tokenizers/huggingface_tokenizer.py +35 -13
- helm/tokenizers/test_ai21_tokenizer.py +48 -0
- helm/tokenizers/test_cohere_tokenizer.py +39 -0
- helm/tokenizers/test_huggingface_tokenizer.py +5 -1
- helm/benchmark/static/benchmarking.css +0 -156
- helm/benchmark/static/benchmarking.js +0 -1705
- helm/benchmark/static/config.js +0 -3
- helm/benchmark/static/general.js +0 -122
- helm/benchmark/static/images/crfm-logo.png +0 -0
- helm/benchmark/static/images/helm-logo-simple.png +0 -0
- helm/benchmark/static/images/helm-logo.png +0 -0
- helm/benchmark/static/images/language-model-helm.png +0 -0
- helm/benchmark/static/images/organizations/ai21.png +0 -0
- helm/benchmark/static/images/organizations/anthropic.png +0 -0
- helm/benchmark/static/images/organizations/bigscience.png +0 -0
- helm/benchmark/static/images/organizations/cohere.png +0 -0
- helm/benchmark/static/images/organizations/eleutherai.png +0 -0
- helm/benchmark/static/images/organizations/google.png +0 -0
- helm/benchmark/static/images/organizations/meta.png +0 -0
- helm/benchmark/static/images/organizations/microsoft.png +0 -0
- helm/benchmark/static/images/organizations/nvidia.png +0 -0
- helm/benchmark/static/images/organizations/openai.png +0 -0
- helm/benchmark/static/images/organizations/together.png +0 -0
- helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
- helm/benchmark/static/images/organizations/yandex.png +0 -0
- helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
- helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
- helm/benchmark/static/index.html +0 -68
- helm/benchmark/static/info-icon.png +0 -0
- helm/benchmark/static/json-urls.js +0 -69
- helm/benchmark/static/plot-captions.js +0 -27
- helm/benchmark/static/schema_image2structure.yaml +0 -304
- helm/benchmark/static/utils.js +0 -285
- helm/benchmark/static_build/assets/index-737eef9e.js +0 -10
- helm/benchmark/static_build/assets/index-878a1094.css +0 -1
- helm/benchmark/window_services/ai21_window_service.py +0 -247
- helm/benchmark/window_services/cohere_window_service.py +0 -101
- helm/benchmark/window_services/test_ai21_window_service.py +0 -163
- helm/benchmark/window_services/test_cohere_window_service.py +0 -75
- helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
- helm/benchmark/window_services/test_ice_window_service.py +0 -327
- helm/tokenizers/ice_tokenizer.py +0 -30
- helm/tokenizers/test_ice_tokenizer.py +0 -57
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/LICENSE +0 -0
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/top_level.txt +0 -0
- /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
- /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/__init__.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/__init__.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
|
@@ -1,83 +1,50 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
from typing import Any, Dict, List, Optional
|
|
2
|
+
|
|
3
|
+
import cohere
|
|
4
|
+
from cohere.manually_maintained.tokenizers import get_hf_tokenizer
|
|
4
5
|
|
|
5
6
|
from helm.common.cache import CacheConfig
|
|
6
7
|
from helm.common.tokenization_request import (
|
|
7
8
|
TokenizationRequest,
|
|
8
|
-
DecodeRequest,
|
|
9
|
-
DecodeRequestResult,
|
|
10
9
|
TokenizationToken,
|
|
11
10
|
)
|
|
12
|
-
from helm.
|
|
13
|
-
from .caching_tokenizer import CachingTokenizer
|
|
14
|
-
|
|
11
|
+
from helm.tokenizers.caching_tokenizer import CachingTokenizer
|
|
15
12
|
|
|
16
|
-
class CohereTokenizer(CachingTokenizer):
|
|
17
|
-
# From "https://docs.cohere.ai/versioning-reference",
|
|
18
|
-
# "this version [2021-11-08] introduces multiple generations, meaning that the generations endpoint will
|
|
19
|
-
# now accept a num_generations argument in the JSON and will always return an array of generations"
|
|
20
|
-
# Note that the API version is decoupled from the model version.
|
|
21
|
-
DEFAULT_API_VERSION: str = "2021-11-08"
|
|
22
13
|
|
|
23
|
-
|
|
14
|
+
class CohereLocalTokenizer(CachingTokenizer):
|
|
15
|
+
"""Cohere tokenizer using the Cohere Python library."""
|
|
24
16
|
|
|
25
|
-
|
|
26
|
-
# be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters."
|
|
27
|
-
# However, even sending a request with 60,000 characters sometimes fails, so we set the
|
|
28
|
-
# maximum length to 50,000, which is about 8,333 tokens.
|
|
29
|
-
# TODO: followed up with Cohere support with an example of a failure case
|
|
30
|
-
TOKENIZE_API_MAX_TEXT_LENGTH: int = 50_000
|
|
31
|
-
|
|
32
|
-
def __init__(self, api_key: str, cache_config: CacheConfig) -> None:
|
|
17
|
+
def __init__(self, api_key: Optional[str], cache_config: CacheConfig) -> None:
|
|
33
18
|
super().__init__(cache_config)
|
|
34
|
-
self.
|
|
19
|
+
self.client = cohere.Client(api_key)
|
|
35
20
|
|
|
36
21
|
def _tokenization_request_to_cache_key(self, request: TokenizationRequest) -> Dict[str, Any]:
|
|
37
|
-
|
|
38
|
-
return {"text": request.text}
|
|
22
|
+
return {"text": request.text, "tokenizer": request.tokenizer}
|
|
39
23
|
|
|
40
24
|
def _tokenize_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
|
|
41
|
-
"""
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
"token_strings": ["token", "ize", " me", "!", " :", "D"]
|
|
49
|
-
}
|
|
50
|
-
"""
|
|
51
|
-
text: str = request["text"]
|
|
52
|
-
assert (
|
|
53
|
-
1 <= len(text) <= CohereTokenizer.TOKENIZE_API_MAX_TEXT_LENGTH
|
|
54
|
-
), f"Invalid text length: {len(text)}. Valid length: [1..{CohereTokenizer.TOKENIZE_API_MAX_TEXT_LENGTH:,d}]"
|
|
55
|
-
|
|
56
|
-
response = requests.request(
|
|
57
|
-
method="POST",
|
|
58
|
-
url=get_cohere_url(CohereTokenizer.TOKENIZE_ENDPOINT),
|
|
59
|
-
headers={
|
|
60
|
-
"Authorization": f"BEARER {self.api_key}",
|
|
61
|
-
"Content-Type": "application/json",
|
|
62
|
-
"Cohere-Version": DEFAULT_COHERE_API_VERSION,
|
|
63
|
-
},
|
|
64
|
-
data=json.dumps(request),
|
|
25
|
+
model: str = request["tokenizer"].split("/")[1]
|
|
26
|
+
# Workaround for https://github.com/cohere-ai/cohere-python/issues/493
|
|
27
|
+
# `token_strings` are always set to `[]`, so we have to populate it ourselves.
|
|
28
|
+
response = self.client.tokenize(text=request["text"], model=model)
|
|
29
|
+
response_dict = response.dict()
|
|
30
|
+
response_dict["token_strings"] = get_hf_tokenizer(self.client, model).decode_batch(
|
|
31
|
+
[[token] for token in response.tokens]
|
|
65
32
|
)
|
|
66
|
-
|
|
67
|
-
assert "message" not in result.keys(), f"Request failed with error {result['message']}"
|
|
68
|
-
assert "tokens" in result and "token_strings" in result, f"Invalid response: {result}"
|
|
69
|
-
# This output format is used to preserve our existing Cache (10/17/2023)
|
|
70
|
-
return result
|
|
33
|
+
return response_dict
|
|
71
34
|
|
|
72
35
|
def _tokenization_raw_response_to_tokens(
|
|
73
36
|
self, response: Dict[str, Any], request: TokenizationRequest
|
|
74
37
|
) -> List[TokenizationToken]:
|
|
75
|
-
tokens =
|
|
76
|
-
|
|
38
|
+
tokens: List[TokenizationToken] = []
|
|
39
|
+
if request.encode:
|
|
40
|
+
tokens = [TokenizationToken(token) for token in response["tokens"]]
|
|
41
|
+
else:
|
|
42
|
+
tokens = [TokenizationToken(token) for token in response["token_strings"]]
|
|
43
|
+
if request.truncation:
|
|
44
|
+
tokens = tokens[: request.max_length]
|
|
45
|
+
return tokens
|
|
77
46
|
|
|
78
47
|
def _decode_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def decode(self, request: DecodeRequest) -> DecodeRequestResult:
|
|
83
|
-
raise NotImplementedError("The Cohere API does not support decoding.")
|
|
48
|
+
model: str = request["tokenizer"].split("/")[1]
|
|
49
|
+
response = self.client.detokenize(tokens=request["tokens"], model=model)
|
|
50
|
+
return response.dict()
|
|
@@ -29,8 +29,17 @@ class HuggingFaceTokenizer(CachingTokenizer):
|
|
|
29
29
|
_tokenizers: Dict[str, WrappedPreTrainedTokenizer] = {}
|
|
30
30
|
_tokenizers_lock: Lock = Lock()
|
|
31
31
|
|
|
32
|
-
def __init__(
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
cache_config: CacheConfig,
|
|
35
|
+
tokenizer_name: str,
|
|
36
|
+
pretrained_model_name_or_path: Optional[str] = None,
|
|
37
|
+
**kwargs,
|
|
38
|
+
):
|
|
33
39
|
super().__init__(cache_config=cache_config)
|
|
40
|
+
self._helm_tokenizer_name = (
|
|
41
|
+
tokenizer_name # HELM tokenizer name (e.g. "huggingface/gpt2"), *not* Hugging Face Hub Model ID
|
|
42
|
+
)
|
|
34
43
|
self._pretrained_model_name_or_path = pretrained_model_name_or_path
|
|
35
44
|
self._kwargs = kwargs
|
|
36
45
|
|
|
@@ -40,7 +49,10 @@ class HuggingFaceTokenizer(CachingTokenizer):
|
|
|
40
49
|
# To avoid deadlocks when using HuggingFace tokenizers with multiple processes
|
|
41
50
|
# TODO: Figure out if we actually need this.
|
|
42
51
|
os.environ["TOKENIZERS_PARALLELISM"] = "False"
|
|
43
|
-
|
|
52
|
+
from_pretrained_kwargs = {**kwargs}
|
|
53
|
+
# If unspecified, set `use_fast=True` by default.
|
|
54
|
+
if "use_fast" not in from_pretrained_kwargs:
|
|
55
|
+
from_pretrained_kwargs["use_fast"] = True
|
|
44
56
|
try:
|
|
45
57
|
# From the Hugging Face documentation, "local_files_only(defaults to False) —
|
|
46
58
|
# Whether or not to only look at local files".
|
|
@@ -53,14 +65,14 @@ class HuggingFaceTokenizer(CachingTokenizer):
|
|
|
53
65
|
# Tokenizers, which are written in Rust." So, use the "fast" version of the tokenizers if available.
|
|
54
66
|
return WrappedPreTrainedTokenizer(
|
|
55
67
|
AutoTokenizer.from_pretrained(
|
|
56
|
-
pretrained_model_name_or_path, local_files_only=True,
|
|
68
|
+
pretrained_model_name_or_path, local_files_only=True, **from_pretrained_kwargs
|
|
57
69
|
)
|
|
58
70
|
)
|
|
59
71
|
except OSError:
|
|
60
72
|
hlog(f"Local files do not exist for HuggingFace tokenizer: {pretrained_model_name_or_path}. Downloading...")
|
|
61
73
|
return WrappedPreTrainedTokenizer(
|
|
62
74
|
AutoTokenizer.from_pretrained(
|
|
63
|
-
pretrained_model_name_or_path, local_files_only=False,
|
|
75
|
+
pretrained_model_name_or_path, local_files_only=False, **from_pretrained_kwargs
|
|
64
76
|
)
|
|
65
77
|
)
|
|
66
78
|
|
|
@@ -84,21 +96,26 @@ class HuggingFaceTokenizer(CachingTokenizer):
|
|
|
84
96
|
)
|
|
85
97
|
return HuggingFaceTokenizer._tokenizers[helm_tokenizer_name]
|
|
86
98
|
|
|
87
|
-
def
|
|
88
|
-
"""
|
|
99
|
+
def get_wrapped_tokenizer(self) -> WrappedPreTrainedTokenizer:
|
|
100
|
+
"""Get the underlying Hugging Face WrappedPreTrainedTokenizer."""
|
|
89
101
|
pretrained_model_name_or_path = (
|
|
90
|
-
self._pretrained_model_name_or_path if self._pretrained_model_name_or_path else
|
|
102
|
+
self._pretrained_model_name_or_path if self._pretrained_model_name_or_path else self._helm_tokenizer_name
|
|
91
103
|
)
|
|
92
104
|
return HuggingFaceTokenizer.get_tokenizer(
|
|
93
|
-
helm_tokenizer_name=
|
|
105
|
+
helm_tokenizer_name=self._helm_tokenizer_name,
|
|
94
106
|
pretrained_model_name_or_path=pretrained_model_name_or_path,
|
|
95
107
|
**self._kwargs,
|
|
96
108
|
)
|
|
97
109
|
|
|
98
110
|
def _tokenize_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
|
|
111
|
+
if request["tokenizer"] != self._helm_tokenizer_name:
|
|
112
|
+
raise ValueError(
|
|
113
|
+
f"This HuggingFaceTokenizer expects tokenizer to be {self._helm_tokenizer_name} "
|
|
114
|
+
"but instead the request has tokenizer {request['tokenizer']}"
|
|
115
|
+
)
|
|
99
116
|
if request["encode"]:
|
|
100
117
|
if request["truncation"]:
|
|
101
|
-
with self.
|
|
118
|
+
with self.get_wrapped_tokenizer() as tokenizer:
|
|
102
119
|
tokens = tokenizer.encode(
|
|
103
120
|
request["text"],
|
|
104
121
|
truncation=request["truncation"],
|
|
@@ -106,7 +123,7 @@ class HuggingFaceTokenizer(CachingTokenizer):
|
|
|
106
123
|
add_special_tokens=False,
|
|
107
124
|
)
|
|
108
125
|
else:
|
|
109
|
-
with self.
|
|
126
|
+
with self.get_wrapped_tokenizer() as tokenizer:
|
|
110
127
|
tokens = tokenizer.encode(request["text"], add_special_tokens=False)
|
|
111
128
|
else:
|
|
112
129
|
if "gpt" in request["tokenizer"] or request["tokenizer"] in [
|
|
@@ -118,7 +135,7 @@ class HuggingFaceTokenizer(CachingTokenizer):
|
|
|
118
135
|
# convert_tokens_to_string method. We prefer to use this method instead
|
|
119
136
|
# of the hacky cleanup_tokens method below as it might handle cases
|
|
120
137
|
# we haven't thought of in cleanup_tokens.
|
|
121
|
-
with self.
|
|
138
|
+
with self.get_wrapped_tokenizer() as tokenizer:
|
|
122
139
|
tokens = [
|
|
123
140
|
tokenizer.convert_tokens_to_string([token]) for token in tokenizer.tokenize(request["text"])
|
|
124
141
|
]
|
|
@@ -131,7 +148,7 @@ class HuggingFaceTokenizer(CachingTokenizer):
|
|
|
131
148
|
# But this replaces all the "▁" characters by "", which is not what we want.
|
|
132
149
|
# This would be problematic as tokenize(" Hello", encode=False) would return ["Hello"]
|
|
133
150
|
# Just like tokenize("Hello", encode=False) would return ["Hello"].
|
|
134
|
-
with self.
|
|
151
|
+
with self.get_wrapped_tokenizer() as tokenizer:
|
|
135
152
|
tokens = tokenizer.tokenize(request["text"])
|
|
136
153
|
# Some tokenizers (e.g. Qwen/Qwen-7B) return the tokens as bytes, so we have to decode them to strings.
|
|
137
154
|
if tokens and type(tokens[0]) == bytes:
|
|
@@ -140,7 +157,12 @@ class HuggingFaceTokenizer(CachingTokenizer):
|
|
|
140
157
|
return {"tokens": tokens}
|
|
141
158
|
|
|
142
159
|
def _decode_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
|
|
143
|
-
|
|
160
|
+
if request["tokenizer"] != self._helm_tokenizer_name:
|
|
161
|
+
raise ValueError(
|
|
162
|
+
f"This HuggingFaceTokenizer expects tokenizer to be {self._helm_tokenizer_name} "
|
|
163
|
+
"but instead the request has tokenizer {request['tokenizer']}"
|
|
164
|
+
)
|
|
165
|
+
with self.get_wrapped_tokenizer() as tokenizer:
|
|
144
166
|
text = tokenizer.decode(
|
|
145
167
|
request["tokens"], clean_up_tokenization_spaces=request["clean_up_tokenization_spaces"]
|
|
146
168
|
)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from helm.common.cache import BlackHoleCacheConfig
|
|
4
|
+
from helm.common.tokenization_request import (
|
|
5
|
+
DecodeRequest,
|
|
6
|
+
TokenizationRequest,
|
|
7
|
+
TokenizationToken,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@pytest.mark.models
|
|
12
|
+
def test_tokenize():
|
|
13
|
+
from helm.tokenizers.ai21_tokenizer import AI21LocalTokenizer
|
|
14
|
+
|
|
15
|
+
tokenizer = AI21LocalTokenizer(cache_config=BlackHoleCacheConfig())
|
|
16
|
+
request = TokenizationRequest(tokenizer="ai21/jamba-instruct-tokenizer", text="otter 🦦")
|
|
17
|
+
result = tokenizer.tokenize(request)
|
|
18
|
+
assert result.success
|
|
19
|
+
assert not result.cached
|
|
20
|
+
assert result.tokens == [
|
|
21
|
+
TokenizationToken(token) for token in ["ot", "ter", "▁", "<0xF0>", "<0x9F>", "<0xA6>", "<0xA6>"]
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@pytest.mark.models
|
|
26
|
+
def test_encode():
|
|
27
|
+
from helm.tokenizers.ai21_tokenizer import AI21LocalTokenizer
|
|
28
|
+
|
|
29
|
+
tokenizer = AI21LocalTokenizer(cache_config=BlackHoleCacheConfig())
|
|
30
|
+
request = TokenizationRequest(tokenizer="ai21/jamba-instruct-tokenizer", text="otter 🦦", encode=True)
|
|
31
|
+
result = tokenizer.tokenize(request)
|
|
32
|
+
assert result.success
|
|
33
|
+
assert not result.cached
|
|
34
|
+
assert result.tokens == [TokenizationToken(token) for token in [1860, 1901, 62934, 1784, 1703, 1710, 1710]]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@pytest.mark.models
|
|
38
|
+
def test_decode():
|
|
39
|
+
from helm.tokenizers.ai21_tokenizer import AI21LocalTokenizer
|
|
40
|
+
|
|
41
|
+
tokenizer = AI21LocalTokenizer(cache_config=BlackHoleCacheConfig())
|
|
42
|
+
request = DecodeRequest(
|
|
43
|
+
tokenizer="ai21/jamba-instruct-tokenizer", tokens=[1860, 1901, 62934, 1784, 1703, 1710, 1710]
|
|
44
|
+
)
|
|
45
|
+
result = tokenizer.decode(request)
|
|
46
|
+
assert result.success
|
|
47
|
+
assert not result.cached
|
|
48
|
+
assert result.text == "otter 🦦"
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from helm.common.cache import BlackHoleCacheConfig
|
|
4
|
+
from helm.common.tokenization_request import (
|
|
5
|
+
DecodeRequest,
|
|
6
|
+
TokenizationRequest,
|
|
7
|
+
TokenizationToken,
|
|
8
|
+
)
|
|
9
|
+
from helm.tokenizers.cohere_tokenizer import CohereLocalTokenizer
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@pytest.mark.models
|
|
13
|
+
def test_tokenize():
|
|
14
|
+
tokenizer = CohereLocalTokenizer(api_key=None, cache_config=BlackHoleCacheConfig())
|
|
15
|
+
request = TokenizationRequest(tokenizer="cohere/command", text="otter 🦦")
|
|
16
|
+
result = tokenizer.tokenize(request)
|
|
17
|
+
assert result.success
|
|
18
|
+
assert not result.cached
|
|
19
|
+
assert result.tokens == [TokenizationToken(token) for token in ["ot", "ter", " �", "�", "�"]]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pytest.mark.models
|
|
23
|
+
def test_encode():
|
|
24
|
+
tokenizer = CohereLocalTokenizer(api_key=None, cache_config=BlackHoleCacheConfig())
|
|
25
|
+
request = TokenizationRequest(tokenizer="cohere/command", text="otter 🦦", encode=True)
|
|
26
|
+
result = tokenizer.tokenize(request)
|
|
27
|
+
assert result.success
|
|
28
|
+
assert not result.cached
|
|
29
|
+
assert result.tokens == [TokenizationToken(token) for token in [1741, 1779, 7728, 107, 107]]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@pytest.mark.models
|
|
33
|
+
def test_decode():
|
|
34
|
+
tokenizer = CohereLocalTokenizer(api_key=None, cache_config=BlackHoleCacheConfig())
|
|
35
|
+
request = DecodeRequest(tokenizer="cohere/command", tokens=[1741, 1779, 7728, 107, 107])
|
|
36
|
+
result = tokenizer.decode(request)
|
|
37
|
+
assert result.success
|
|
38
|
+
assert not result.cached
|
|
39
|
+
assert result.text == "otter 🦦"
|
|
@@ -17,7 +17,11 @@ class TestHuggingFaceGPT2Tokenizer:
|
|
|
17
17
|
def setup_method(self, method):
|
|
18
18
|
cache_file = tempfile.NamedTemporaryFile(delete=False)
|
|
19
19
|
self.cache_path: str = cache_file.name
|
|
20
|
-
self.tokenizer = HuggingFaceTokenizer(
|
|
20
|
+
self.tokenizer = HuggingFaceTokenizer(
|
|
21
|
+
SqliteCacheConfig(self.cache_path),
|
|
22
|
+
tokenizer_name="huggingface/gpt2",
|
|
23
|
+
pretrained_model_name_or_path="openai-community/gpt2",
|
|
24
|
+
)
|
|
21
25
|
|
|
22
26
|
def teardown_method(self, method):
|
|
23
27
|
os.remove(self.cache_path)
|
|
@@ -1,156 +0,0 @@
|
|
|
1
|
-
.correct {
|
|
2
|
-
background-color: #dfffdf;
|
|
3
|
-
}
|
|
4
|
-
|
|
5
|
-
.wrong {
|
|
6
|
-
background-color: #ffdfdf;
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
.scenario-info {
|
|
10
|
-
margin-top: 30px;
|
|
11
|
-
margin-bottom: 30px;
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
td {
|
|
15
|
-
padding-left: 15px;
|
|
16
|
-
padding-right: 15px;
|
|
17
|
-
padding-top: 5px;
|
|
18
|
-
padding-bottom: 5px;
|
|
19
|
-
word-wrap: break-word;
|
|
20
|
-
max-width: 900px;
|
|
21
|
-
vertical-align: top;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
.results-table {
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
.table-container {
|
|
28
|
-
margin-top: 30px;
|
|
29
|
-
margin-bottom: 30px;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
tr {
|
|
33
|
-
border: solid;
|
|
34
|
-
border-color: #f0f0f0;
|
|
35
|
-
border-width: 1px 0;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
.results-table thead tr {
|
|
39
|
-
background-color: #f9f9f9;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
.logprob {
|
|
43
|
-
font-size: 8pt;
|
|
44
|
-
font-style: italic;
|
|
45
|
-
color: gray;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
.list-header {
|
|
49
|
-
font-size: 24px;
|
|
50
|
-
font-weight: bold;
|
|
51
|
-
}
|
|
52
|
-
.list-item {
|
|
53
|
-
color: black;
|
|
54
|
-
font-size: 14px;
|
|
55
|
-
white-space: nowrap;
|
|
56
|
-
}
|
|
57
|
-
.list-item-todo {
|
|
58
|
-
color: lightgray;
|
|
59
|
-
}
|
|
60
|
-
.list-item:hover {
|
|
61
|
-
color: black;
|
|
62
|
-
text-decoration: none;
|
|
63
|
-
background-color: lightgray;
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
.main-link {
|
|
67
|
-
color: white;
|
|
68
|
-
background-color: #53A0C0;
|
|
69
|
-
}
|
|
70
|
-
.main-link:hover {
|
|
71
|
-
color: lightgray;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
.access-open {
|
|
75
|
-
background-color: lightgreen;
|
|
76
|
-
width: 100px;
|
|
77
|
-
}
|
|
78
|
-
.access-limited {
|
|
79
|
-
background-color: yellow;
|
|
80
|
-
width: 100px;
|
|
81
|
-
}
|
|
82
|
-
.access-restricted {
|
|
83
|
-
background-color: orange;
|
|
84
|
-
width: 100px;
|
|
85
|
-
}
|
|
86
|
-
.access-closed {
|
|
87
|
-
background-color: lightgray;
|
|
88
|
-
width: 100px;
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
.technical-details {
|
|
92
|
-
font-size: 10px;
|
|
93
|
-
font-style: italic;
|
|
94
|
-
color: gray;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
.logo-container {
|
|
98
|
-
display: flex;
|
|
99
|
-
flex-flow: row wrap;
|
|
100
|
-
justify-content: space-between;
|
|
101
|
-
padding: 20px;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
.logo-item {
|
|
105
|
-
margin: auto;
|
|
106
|
-
padding: 10px;
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
.instance-input {
|
|
110
|
-
font-style: italic;
|
|
111
|
-
background-color: #f5f5f5;
|
|
112
|
-
margin-left: 20px;
|
|
113
|
-
white-space: pre-wrap;
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
.instance-reference {
|
|
117
|
-
font-style: italic;
|
|
118
|
-
background-color: #f5f5f5;
|
|
119
|
-
white-space: pre-wrap;
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
.taxonomy-table {
|
|
123
|
-
margin: 10px;
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
thead .table-sort-column {
|
|
127
|
-
background-color: #ffe599;
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
tbody .table-sort-column {
|
|
131
|
-
background-color: #fff2cc;
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
.prompt {
|
|
135
|
-
font-style: italic;
|
|
136
|
-
background-color: #f5f5f5;
|
|
137
|
-
white-space: pre-wrap;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
.plot {
|
|
141
|
-
margin: 15px;
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
.plot img {
|
|
145
|
-
margin: 10px;
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
.plot-caption {
|
|
149
|
-
color: #555;
|
|
150
|
-
font-style: italic;
|
|
151
|
-
margin: 5px;
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
.prediction-text {
|
|
155
|
-
white-space: pre-wrap;
|
|
156
|
-
}
|