crfm-helm 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/METADATA +41 -57
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/RECORD +197 -152
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapter_spec.py +32 -31
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
- helm/benchmark/adaptation/common_adapter_specs.py +2 -0
- helm/benchmark/annotation/air_bench_annotator.py +64 -0
- helm/benchmark/annotation/annotator_factory.py +6 -0
- helm/benchmark/annotation/anthropic_red_team_annotator.py +70 -0
- helm/benchmark/annotation/call_center_annotator.py +247 -0
- helm/benchmark/annotation/financebench_annotator.py +79 -0
- helm/benchmark/annotation/harm_bench_annotator.py +68 -0
- helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
- helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
- helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
- helm/benchmark/annotation/live_qa_annotator.py +71 -0
- helm/benchmark/annotation/medication_qa_annotator.py +68 -0
- helm/benchmark/annotation/model_as_judge.py +45 -0
- helm/benchmark/annotation/simple_safety_tests_annotator.py +64 -0
- helm/benchmark/annotation/xstest_annotator.py +110 -0
- helm/benchmark/augmentations/translate_perturbation.py +1 -0
- helm/benchmark/huggingface_registration.py +16 -6
- helm/benchmark/metrics/air_bench_metrics.py +56 -0
- helm/benchmark/metrics/annotation_metrics.py +108 -0
- helm/benchmark/metrics/bhasa_metrics.py +188 -0
- helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
- helm/benchmark/metrics/code_metrics_helper.py +11 -1
- helm/benchmark/metrics/fin_qa_metrics.py +60 -0
- helm/benchmark/metrics/fin_qa_metrics_helper.py +398 -0
- helm/benchmark/metrics/gpt4v_originality_critique_metrics.py +126 -0
- helm/benchmark/metrics/instruction_following_critique_metrics.py +1 -0
- helm/benchmark/metrics/live_qa_metrics.py +23 -0
- helm/benchmark/metrics/medication_qa_metrics.py +23 -0
- helm/benchmark/metrics/prometheus_vision_critique_metrics.py +185 -0
- helm/benchmark/metrics/reka_vibe_critique_metrics.py +158 -0
- helm/benchmark/metrics/safety_metrics.py +57 -0
- helm/benchmark/metrics/summac/model_summac.py +3 -3
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
- helm/benchmark/metrics/unitxt_metrics.py +20 -10
- helm/benchmark/metrics/vision_language/emd_utils.py +4 -0
- helm/benchmark/metrics/vision_language/image_metrics.py +30 -72
- helm/benchmark/metrics/vision_language/image_utils.py +1 -1
- helm/benchmark/model_metadata_registry.py +3 -3
- helm/benchmark/presentation/schema.py +54 -4
- helm/benchmark/presentation/test_run_entry.py +1 -0
- helm/benchmark/presentation/test_schema.py +11 -0
- helm/benchmark/run.py +31 -2
- helm/benchmark/run_expander.py +113 -10
- helm/benchmark/run_spec_factory.py +4 -0
- helm/benchmark/run_specs/air_bench_run_specs.py +40 -0
- helm/benchmark/run_specs/bhasa_run_specs.py +638 -0
- helm/benchmark/run_specs/call_center_run_specs.py +152 -0
- helm/benchmark/run_specs/classic_run_specs.py +15 -11
- helm/benchmark/run_specs/decodingtrust_run_specs.py +11 -9
- helm/benchmark/run_specs/experimental_run_specs.py +85 -0
- helm/benchmark/run_specs/finance_run_specs.py +110 -0
- helm/benchmark/run_specs/safety_run_specs.py +154 -0
- helm/benchmark/run_specs/vlm_run_specs.py +251 -57
- helm/benchmark/scenarios/air_bench_scenario.py +50 -0
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
- helm/benchmark/scenarios/banking77_scenario.py +51 -0
- helm/benchmark/scenarios/bhasa_scenario.py +1798 -0
- helm/benchmark/scenarios/call_center_scenario.py +84 -0
- helm/benchmark/scenarios/ci_mcqa_scenario.py +80 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
- helm/benchmark/scenarios/entity_data_imputation_scenario.py +8 -2
- helm/benchmark/scenarios/ewok_scenario.py +116 -0
- helm/benchmark/scenarios/fin_qa_scenario.py +119 -0
- helm/benchmark/scenarios/financebench_scenario.py +53 -0
- helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
- helm/benchmark/scenarios/scenario.py +1 -1
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
- helm/benchmark/scenarios/test_air_bench_scenario.py +27 -0
- helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
- helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
- helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
- helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
- helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
- helm/benchmark/scenarios/test_math_scenario.py +2 -8
- helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
- helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
- helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
- helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +5 -5
- helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
- helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
- helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +13 -2
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -7
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -5
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +44 -13
- helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
- helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/pairs_scenario.py +7 -6
- helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
- helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +5 -5
- helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +98 -0
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
- helm/benchmark/scenarios/xstest_scenario.py +35 -0
- helm/benchmark/server.py +1 -6
- helm/benchmark/static/schema_air_bench.yaml +3149 -0
- helm/benchmark/static/schema_bhasa.yaml +709 -0
- helm/benchmark/static/schema_call_center.yaml +232 -0
- helm/benchmark/static/schema_classic.yaml +3 -59
- helm/benchmark/static/schema_cleva.yaml +768 -0
- helm/benchmark/static/schema_decodingtrust.yaml +444 -0
- helm/benchmark/static/schema_ewok.yaml +367 -0
- helm/benchmark/static/schema_finance.yaml +189 -0
- helm/benchmark/static/schema_image2struct.yaml +588 -0
- helm/benchmark/static/schema_instruction_following.yaml +3 -52
- helm/benchmark/static/schema_lite.yaml +3 -61
- helm/benchmark/static/schema_medical.yaml +255 -0
- helm/benchmark/static/schema_mmlu.yaml +3 -61
- helm/benchmark/static/schema_safety.yaml +247 -0
- helm/benchmark/static/schema_tables.yaml +317 -0
- helm/benchmark/static/schema_thai.yaml +244 -0
- helm/benchmark/static/schema_unitxt.yaml +3 -61
- helm/benchmark/static/{schema_vlm.yaml → schema_vhelm.yaml} +304 -298
- helm/benchmark/static/schema_vhelm_lite.yaml +4 -59
- helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
- helm/benchmark/static_build/assets/air-overview-d2e6c49f.png +0 -0
- helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
- helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
- helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
- helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
- helm/benchmark/static_build/assets/index-58f97dcd.js +10 -0
- helm/benchmark/static_build/assets/overview-74aea3d8.png +0 -0
- helm/benchmark/static_build/assets/process-flow-bd2eba96.png +0 -0
- helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
- helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
- helm/benchmark/static_build/index.html +2 -2
- helm/benchmark/window_services/test_openai_window_service.py +8 -8
- helm/clients/ai21_client.py +71 -1
- helm/clients/anthropic_client.py +50 -28
- helm/clients/auto_client.py +11 -0
- helm/clients/client.py +24 -7
- helm/clients/cohere_client.py +98 -3
- helm/clients/huggingface_client.py +79 -19
- helm/clients/nvidia_nim_client.py +35 -0
- helm/clients/openai_client.py +11 -5
- helm/clients/palmyra_client.py +25 -0
- helm/clients/perspective_api_client.py +11 -6
- helm/clients/reka_client.py +189 -0
- helm/clients/test_client.py +7 -9
- helm/clients/test_huggingface_client.py +19 -3
- helm/clients/test_together_client.py +72 -2
- helm/clients/together_client.py +129 -23
- helm/clients/vertexai_client.py +62 -18
- helm/clients/vision_language/huggingface_vlm_client.py +1 -0
- helm/clients/vision_language/open_flamingo_client.py +1 -2
- helm/clients/vision_language/paligemma_client.py +146 -0
- helm/clients/vision_language/palmyra_vision_client.py +99 -0
- helm/clients/yi_client.py +31 -0
- helm/common/critique_request.py +10 -1
- helm/common/images_utils.py +25 -0
- helm/common/mongo_key_value_store.py +2 -1
- helm/common/request.py +16 -0
- helm/config/model_deployments.yaml +740 -363
- helm/config/model_metadata.yaml +824 -128
- helm/config/tokenizer_configs.yaml +207 -10
- helm/proxy/critique/model_critique_client.py +32 -4
- helm/proxy/example_queries.py +14 -21
- helm/proxy/services/server_service.py +2 -3
- helm/proxy/token_counters/test_auto_token_counter.py +2 -2
- helm/tokenizers/ai21_tokenizer.py +51 -59
- helm/tokenizers/auto_tokenizer.py +1 -1
- helm/tokenizers/cohere_tokenizer.py +29 -62
- helm/tokenizers/huggingface_tokenizer.py +35 -13
- helm/tokenizers/test_ai21_tokenizer.py +48 -0
- helm/tokenizers/test_cohere_tokenizer.py +39 -0
- helm/tokenizers/test_huggingface_tokenizer.py +5 -1
- helm/benchmark/static/benchmarking.css +0 -156
- helm/benchmark/static/benchmarking.js +0 -1705
- helm/benchmark/static/config.js +0 -3
- helm/benchmark/static/general.js +0 -122
- helm/benchmark/static/images/crfm-logo.png +0 -0
- helm/benchmark/static/images/helm-logo-simple.png +0 -0
- helm/benchmark/static/images/helm-logo.png +0 -0
- helm/benchmark/static/images/language-model-helm.png +0 -0
- helm/benchmark/static/images/organizations/ai21.png +0 -0
- helm/benchmark/static/images/organizations/anthropic.png +0 -0
- helm/benchmark/static/images/organizations/bigscience.png +0 -0
- helm/benchmark/static/images/organizations/cohere.png +0 -0
- helm/benchmark/static/images/organizations/eleutherai.png +0 -0
- helm/benchmark/static/images/organizations/google.png +0 -0
- helm/benchmark/static/images/organizations/meta.png +0 -0
- helm/benchmark/static/images/organizations/microsoft.png +0 -0
- helm/benchmark/static/images/organizations/nvidia.png +0 -0
- helm/benchmark/static/images/organizations/openai.png +0 -0
- helm/benchmark/static/images/organizations/together.png +0 -0
- helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
- helm/benchmark/static/images/organizations/yandex.png +0 -0
- helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
- helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
- helm/benchmark/static/index.html +0 -68
- helm/benchmark/static/info-icon.png +0 -0
- helm/benchmark/static/json-urls.js +0 -69
- helm/benchmark/static/plot-captions.js +0 -27
- helm/benchmark/static/schema_image2structure.yaml +0 -304
- helm/benchmark/static/utils.js +0 -285
- helm/benchmark/static_build/assets/index-737eef9e.js +0 -10
- helm/benchmark/static_build/assets/index-878a1094.css +0 -1
- helm/benchmark/window_services/ai21_window_service.py +0 -247
- helm/benchmark/window_services/cohere_window_service.py +0 -101
- helm/benchmark/window_services/test_ai21_window_service.py +0 -163
- helm/benchmark/window_services/test_cohere_window_service.py +0 -75
- helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
- helm/benchmark/window_services/test_ice_window_service.py +0 -327
- helm/tokenizers/ice_tokenizer.py +0 -30
- helm/tokenizers/test_ice_tokenizer.py +0 -57
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/LICENSE +0 -0
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/top_level.txt +0 -0
- /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
- /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/__init__.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/__init__.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
|
@@ -17,11 +17,31 @@ tokenizer_configs:
|
|
|
17
17
|
prefix_token: "<s>"
|
|
18
18
|
|
|
19
19
|
# AI21
|
|
20
|
-
- name: ai21/
|
|
20
|
+
- name: ai21/j2-tokenizer
|
|
21
21
|
tokenizer_spec:
|
|
22
|
-
class_name: "helm.tokenizers.ai21_tokenizer.
|
|
23
|
-
end_of_text_token: "
|
|
24
|
-
prefix_token: ""
|
|
22
|
+
class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
|
|
23
|
+
end_of_text_token: "<|endoftext|>"
|
|
24
|
+
prefix_token: "<|startoftext|>"
|
|
25
|
+
- name: ai21/jamba-tokenizer
|
|
26
|
+
tokenizer_spec:
|
|
27
|
+
class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
|
|
28
|
+
end_of_text_token: "<|endoftext|>"
|
|
29
|
+
prefix_token: "<|startoftext|>"
|
|
30
|
+
- name: ai21/jamba-instruct-tokenizer
|
|
31
|
+
tokenizer_spec:
|
|
32
|
+
class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
|
|
33
|
+
end_of_text_token: "<|endoftext|>"
|
|
34
|
+
prefix_token: "<|startoftext|>"
|
|
35
|
+
- name: ai21/jamba-1.5-mini-tokenizer
|
|
36
|
+
tokenizer_spec:
|
|
37
|
+
class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
|
|
38
|
+
end_of_text_token: "<|endoftext|>"
|
|
39
|
+
prefix_token: "<|startoftext|>"
|
|
40
|
+
- name: ai21/jamba-1.5-large-tokenizer
|
|
41
|
+
tokenizer_spec:
|
|
42
|
+
class_name: "helm.tokenizers.ai21_tokenizer.AI21LocalTokenizer"
|
|
43
|
+
end_of_text_token: "<|endoftext|>"
|
|
44
|
+
prefix_token: "<|startoftext|>"
|
|
25
45
|
|
|
26
46
|
# AlephAlpha
|
|
27
47
|
- name: AlephAlpha/luminous-base
|
|
@@ -45,6 +65,24 @@ tokenizer_configs:
|
|
|
45
65
|
end_of_text_token: ""
|
|
46
66
|
prefix_token: ""
|
|
47
67
|
|
|
68
|
+
# Alibaba DAMO Academy
|
|
69
|
+
|
|
70
|
+
- name: damo/seallm-7b-v2
|
|
71
|
+
tokenizer_spec:
|
|
72
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
73
|
+
args:
|
|
74
|
+
pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2
|
|
75
|
+
end_of_text_token: "</s>"
|
|
76
|
+
prefix_token: "<s>"
|
|
77
|
+
|
|
78
|
+
- name: damo/seallm-7b-v2.5
|
|
79
|
+
tokenizer_spec:
|
|
80
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
81
|
+
args:
|
|
82
|
+
pretrained_model_name_or_path: SeaLLMs/SeaLLM-7B-v2.5
|
|
83
|
+
end_of_text_token: "<eos>"
|
|
84
|
+
prefix_token: "<bos>"
|
|
85
|
+
|
|
48
86
|
# Anthropic
|
|
49
87
|
- name: anthropic/claude
|
|
50
88
|
tokenizer_spec:
|
|
@@ -69,7 +107,7 @@ tokenizer_configs:
|
|
|
69
107
|
tokenizer_spec:
|
|
70
108
|
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
71
109
|
end_of_text_token: "</s>"
|
|
72
|
-
prefix_token: "
|
|
110
|
+
prefix_token: "<s>"
|
|
73
111
|
- name: bigscience/T0pp
|
|
74
112
|
tokenizer_spec:
|
|
75
113
|
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
@@ -77,11 +115,45 @@ tokenizer_configs:
|
|
|
77
115
|
prefix_token: ""
|
|
78
116
|
|
|
79
117
|
# Cohere
|
|
80
|
-
- name: cohere/
|
|
118
|
+
- name: cohere/command
|
|
81
119
|
tokenizer_spec:
|
|
82
|
-
class_name: "helm.tokenizers.cohere_tokenizer.
|
|
83
|
-
end_of_text_token: ""
|
|
84
|
-
prefix_token: "
|
|
120
|
+
class_name: "helm.tokenizers.cohere_tokenizer.CohereLocalTokenizer"
|
|
121
|
+
end_of_text_token: "<EOS_TOKEN>"
|
|
122
|
+
prefix_token: "<BOS_TOKEN>"
|
|
123
|
+
|
|
124
|
+
- name: cohere/command-light
|
|
125
|
+
tokenizer_spec:
|
|
126
|
+
class_name: "helm.tokenizers.cohere_tokenizer.CohereLocalTokenizer"
|
|
127
|
+
end_of_text_token: "<EOS_TOKEN>"
|
|
128
|
+
prefix_token: "<BOS_TOKEN>"
|
|
129
|
+
|
|
130
|
+
- name: cohere/command-r
|
|
131
|
+
tokenizer_spec:
|
|
132
|
+
class_name: "helm.tokenizers.cohere_tokenizer.CohereLocalTokenizer"
|
|
133
|
+
end_of_text_token: "<EOS_TOKEN>"
|
|
134
|
+
prefix_token: "<BOS_TOKEN>"
|
|
135
|
+
|
|
136
|
+
- name: cohere/command-r-plus
|
|
137
|
+
tokenizer_spec:
|
|
138
|
+
class_name: "helm.tokenizers.cohere_tokenizer.CohereLocalTokenizer"
|
|
139
|
+
end_of_text_token: "<EOS_TOKEN>"
|
|
140
|
+
prefix_token: "<BOS_TOKEN>"
|
|
141
|
+
|
|
142
|
+
- name: cohere/c4ai-command-r-v01
|
|
143
|
+
tokenizer_spec:
|
|
144
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
145
|
+
args:
|
|
146
|
+
pretrained_model_name_or_path: CohereForAI/c4ai-command-r-v01
|
|
147
|
+
end_of_text_token: "<EOS_TOKEN>"
|
|
148
|
+
prefix_token: "<BOS_TOKEN>"
|
|
149
|
+
|
|
150
|
+
- name: cohere/c4ai-command-r-plus
|
|
151
|
+
tokenizer_spec:
|
|
152
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
153
|
+
args:
|
|
154
|
+
pretrained_model_name_or_path: CohereForAI/c4ai-command-r-plus
|
|
155
|
+
end_of_text_token: "<EOS_TOKEN>"
|
|
156
|
+
prefix_token: "<BOS_TOKEN>"
|
|
85
157
|
|
|
86
158
|
# Databricks
|
|
87
159
|
- name: databricks/dbrx-instruct
|
|
@@ -159,6 +231,11 @@ tokenizer_configs:
|
|
|
159
231
|
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
160
232
|
end_of_text_token: "<eos>"
|
|
161
233
|
prefix_token: "<bos>"
|
|
234
|
+
- name: google/gemma-2-9b
|
|
235
|
+
tokenizer_spec:
|
|
236
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
237
|
+
end_of_text_token: "<eos>"
|
|
238
|
+
prefix_token: "<bos>"
|
|
162
239
|
|
|
163
240
|
# Hf-internal-testing
|
|
164
241
|
|
|
@@ -240,6 +317,14 @@ tokenizer_configs:
|
|
|
240
317
|
prefix_token: "<|begin_of_text|>"
|
|
241
318
|
end_of_text_token: "<|end_of_text|>"
|
|
242
319
|
|
|
320
|
+
- name: meta/llama-3.1-8b
|
|
321
|
+
tokenizer_spec:
|
|
322
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
323
|
+
args:
|
|
324
|
+
pretrained_model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
|
|
325
|
+
prefix_token: "<|begin_of_text|>"
|
|
326
|
+
end_of_text_token: "<|end_of_text|>"
|
|
327
|
+
|
|
243
328
|
# 01-ai
|
|
244
329
|
- name: 01-ai/Yi-6B
|
|
245
330
|
tokenizer_spec:
|
|
@@ -247,6 +332,17 @@ tokenizer_configs:
|
|
|
247
332
|
end_of_text_token: "</s>"
|
|
248
333
|
prefix_token: "<s>"
|
|
249
334
|
|
|
335
|
+
# AI Singapore
|
|
336
|
+
- name: aisingapore/sea-lion-7b
|
|
337
|
+
tokenizer_spec:
|
|
338
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
339
|
+
args:
|
|
340
|
+
trust_remote_code: true
|
|
341
|
+
use_fast: false
|
|
342
|
+
end_of_text_token: "<|endoftext|>"
|
|
343
|
+
prefix_token: ""
|
|
344
|
+
|
|
345
|
+
|
|
250
346
|
|
|
251
347
|
# Allen Institute for AI
|
|
252
348
|
# The allenai/olmo-7b requires Python 3.9 or newer.
|
|
@@ -259,6 +355,12 @@ tokenizer_configs:
|
|
|
259
355
|
end_of_text_token: "<|endoftext|>"
|
|
260
356
|
prefix_token: ""
|
|
261
357
|
|
|
358
|
+
- name: allenai/OLMo-1.7-7B-hf
|
|
359
|
+
tokenizer_spec:
|
|
360
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
361
|
+
end_of_text_token: "<|endoftext|>"
|
|
362
|
+
prefix_token: ""
|
|
363
|
+
|
|
262
364
|
|
|
263
365
|
# Microsoft
|
|
264
366
|
- name: microsoft/phi-2
|
|
@@ -267,6 +369,20 @@ tokenizer_configs:
|
|
|
267
369
|
end_of_text_token: "<|endoftext|>"
|
|
268
370
|
prefix_token: "<|endoftext|>"
|
|
269
371
|
|
|
372
|
+
- name: microsoft/phi-3-small-8k-instruct
|
|
373
|
+
tokenizer_spec:
|
|
374
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
375
|
+
args:
|
|
376
|
+
trust_remote_code: true
|
|
377
|
+
end_of_text_token: "<|endoftext|>"
|
|
378
|
+
prefix_token: "<|endoftext|>"
|
|
379
|
+
|
|
380
|
+
- name: microsoft/phi-3-medium-4k-instruct
|
|
381
|
+
tokenizer_spec:
|
|
382
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
383
|
+
end_of_text_token: "<|endoftext|>"
|
|
384
|
+
prefix_token: "<s>"
|
|
385
|
+
|
|
270
386
|
# Mistralai
|
|
271
387
|
- name: mistralai/Mistral-7B-v0.1
|
|
272
388
|
tokenizer_spec:
|
|
@@ -274,6 +390,36 @@ tokenizer_configs:
|
|
|
274
390
|
end_of_text_token: "</s>"
|
|
275
391
|
prefix_token: "<s>"
|
|
276
392
|
|
|
393
|
+
- name: mistralai/Mistral-7B-Instruct-v0.1
|
|
394
|
+
tokenizer_spec:
|
|
395
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
396
|
+
end_of_text_token: "</s>"
|
|
397
|
+
prefix_token: "<s>"
|
|
398
|
+
|
|
399
|
+
- name: mistralai/Mistral-7B-Instruct-v0.2
|
|
400
|
+
tokenizer_spec:
|
|
401
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
402
|
+
end_of_text_token: "</s>"
|
|
403
|
+
prefix_token: "<s>"
|
|
404
|
+
|
|
405
|
+
- name: mistralai/Mistral-7B-Instruct-v0.3
|
|
406
|
+
tokenizer_spec:
|
|
407
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
408
|
+
end_of_text_token: "</s>"
|
|
409
|
+
prefix_token: "<s>"
|
|
410
|
+
|
|
411
|
+
- name: mistralai/Mistral-Nemo-Base-2407
|
|
412
|
+
tokenizer_spec:
|
|
413
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
414
|
+
end_of_text_token: "</s>"
|
|
415
|
+
prefix_token: "<s>"
|
|
416
|
+
|
|
417
|
+
- name: mistralai/Mistral-Large-Instruct-2407
|
|
418
|
+
tokenizer_spec:
|
|
419
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
420
|
+
end_of_text_token: "</s>"
|
|
421
|
+
prefix_token: "<s>"
|
|
422
|
+
|
|
277
423
|
# Neurips
|
|
278
424
|
- name: neurips/local
|
|
279
425
|
tokenizer_spec:
|
|
@@ -281,19 +427,43 @@ tokenizer_configs:
|
|
|
281
427
|
end_of_text_token: "<|endoftext|>"
|
|
282
428
|
prefix_token: "<|endoftext|>"
|
|
283
429
|
|
|
284
|
-
#
|
|
430
|
+
# NVIDIA
|
|
431
|
+
- name: nvidia/nemotron-4-340b-instruct
|
|
432
|
+
tokenizer_spec:
|
|
433
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
434
|
+
args:
|
|
435
|
+
pretrained_model_name_or_path: Xenova/Nemotron-4-340B-Instruct-Tokenizer
|
|
436
|
+
revision: b7aa0de92cda9f9e722d58d6ca90f46ae17d4701
|
|
437
|
+
end_of_text_token: "<|endoftext|>"
|
|
438
|
+
prefix_token: "<|endoftext|>"
|
|
439
|
+
|
|
440
|
+
# OpenAI
|
|
285
441
|
- name: openai/cl100k_base
|
|
286
442
|
tokenizer_spec:
|
|
287
443
|
class_name: "helm.tokenizers.tiktoken_tokenizer.TiktokenTokenizer"
|
|
288
444
|
end_of_text_token: "<|endoftext|>"
|
|
289
445
|
prefix_token: "<|endoftext|>"
|
|
290
446
|
|
|
447
|
+
- name: openai/o200k_base
|
|
448
|
+
tokenizer_spec:
|
|
449
|
+
class_name: "helm.tokenizers.tiktoken_tokenizer.TiktokenTokenizer"
|
|
450
|
+
end_of_text_token: "<|endoftext|>"
|
|
451
|
+
prefix_token: "<|endoftext|>"
|
|
452
|
+
|
|
291
453
|
- name: openai/clip-vit-large-patch14
|
|
292
454
|
tokenizer_spec:
|
|
293
455
|
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
294
456
|
end_of_text_token: ""
|
|
295
457
|
prefix_token: ""
|
|
296
458
|
|
|
459
|
+
# OpenThaiGPT
|
|
460
|
+
- name: openthaigpt/openthaigpt-1.0.0-7b-chat
|
|
461
|
+
tokenizer_spec:
|
|
462
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
463
|
+
end_of_text_token: "</s>"
|
|
464
|
+
prefix_token: "<s>"
|
|
465
|
+
|
|
466
|
+
# Qwen
|
|
297
467
|
- name: qwen/qwen-7b
|
|
298
468
|
tokenizer_spec:
|
|
299
469
|
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
@@ -311,6 +481,14 @@ tokenizer_configs:
|
|
|
311
481
|
end_of_text_token: "<|endoftext|>"
|
|
312
482
|
prefix_token: ""
|
|
313
483
|
|
|
484
|
+
- name: qwen/qwen2-72b-instruct
|
|
485
|
+
tokenizer_spec:
|
|
486
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
487
|
+
args:
|
|
488
|
+
pretrained_model_name_or_path: Qwen/Qwen2-72B-Instruct
|
|
489
|
+
end_of_text_token: <|im_end|>"
|
|
490
|
+
prefix_token: "<|im_start|>'"
|
|
491
|
+
|
|
314
492
|
- name: qwen/qwen-vl
|
|
315
493
|
tokenizer_spec:
|
|
316
494
|
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
@@ -331,10 +509,22 @@ tokenizer_configs:
|
|
|
331
509
|
end_of_text_token: "<|endoftext|>"
|
|
332
510
|
prefix_token: ""
|
|
333
511
|
|
|
512
|
+
# SambaLingo
|
|
513
|
+
- name: sambanova/sambalingo-thai-base
|
|
514
|
+
tokenizer_spec:
|
|
515
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
516
|
+
args:
|
|
517
|
+
pretrained_model_name_or_path: sambanovasystems/SambaLingo-Thai-Base
|
|
518
|
+
end_of_text_token: "</s>"
|
|
519
|
+
prefix_token: "<s>"
|
|
520
|
+
|
|
334
521
|
# Snowflake
|
|
335
522
|
- name: snowflake/snowflake-arctic-instruct
|
|
336
523
|
tokenizer_spec:
|
|
337
524
|
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
525
|
+
args:
|
|
526
|
+
pretrained_model_name_or_path: Snowflake/snowflake-arctic-instruct
|
|
527
|
+
trust_remote_code: true
|
|
338
528
|
end_of_text_token: "<|im_end|>"
|
|
339
529
|
prefix_token: "<|im_start|>"
|
|
340
530
|
|
|
@@ -352,6 +542,13 @@ tokenizer_configs:
|
|
|
352
542
|
end_of_text_token: "</s>"
|
|
353
543
|
prefix_token: ""
|
|
354
544
|
|
|
545
|
+
# Typhoon
|
|
546
|
+
- name: scb10x/typhoon-7b
|
|
547
|
+
tokenizer_spec:
|
|
548
|
+
class_name: "helm.tokenizers.huggingface_tokenizer.HuggingFaceTokenizer"
|
|
549
|
+
end_of_text_token: "</s>"
|
|
550
|
+
prefix_token: "<s>"
|
|
551
|
+
|
|
355
552
|
# Writer
|
|
356
553
|
- name: writer/gpt2
|
|
357
554
|
tokenizer_spec:
|
|
@@ -15,6 +15,7 @@ from helm.common.optional_dependencies import handle_module_not_found_error
|
|
|
15
15
|
from helm.common.request import Request, RequestResult, GeneratedOutput
|
|
16
16
|
from helm.clients.client import Client
|
|
17
17
|
from helm.proxy.critique.critique_client import CritiqueClient
|
|
18
|
+
from helm.common.media_object import MultimediaObject, MediaObject
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
class CritiqueParseError(Exception):
|
|
@@ -24,6 +25,8 @@ class CritiqueParseError(Exception):
|
|
|
24
25
|
class ModelCritiqueClient(CritiqueClient):
|
|
25
26
|
"""A CritiqueClient that queries a Model to answer CritiqueRequests."""
|
|
26
27
|
|
|
28
|
+
VISION_LANGUAGE_MODELS = ["openai/gpt-4-vision", "reka/reka", "huggingface/prometheus-vision"]
|
|
29
|
+
|
|
27
30
|
def __init__(self, client: Client, model_name):
|
|
28
31
|
self._client = client
|
|
29
32
|
self._model_name = model_name
|
|
@@ -31,6 +34,11 @@ class ModelCritiqueClient(CritiqueClient):
|
|
|
31
34
|
get_default_model_deployment_for_model(model_name, warn_arg_deprecated=False, ignore_deprecated=True)
|
|
32
35
|
or self._model_name
|
|
33
36
|
)
|
|
37
|
+
self.vision_language = False
|
|
38
|
+
for vision_language_model_name in self.VISION_LANGUAGE_MODELS:
|
|
39
|
+
if model_name.startswith(vision_language_model_name):
|
|
40
|
+
self.vision_language = True
|
|
41
|
+
break
|
|
34
42
|
|
|
35
43
|
def _interpolate_fields(self, text: str, fields: Dict[str, str]) -> str:
|
|
36
44
|
for key, value in fields.items():
|
|
@@ -58,10 +66,15 @@ class ModelCritiqueClient(CritiqueClient):
|
|
|
58
66
|
|
|
59
67
|
requests: List[Request] = []
|
|
60
68
|
for question in task.questions:
|
|
61
|
-
prompt: str
|
|
69
|
+
prompt: str
|
|
70
|
+
if len(question.text) > 0:
|
|
71
|
+
prompt = base_prompt + "\n\n" + self._question_to_prompt(question, fields)
|
|
72
|
+
else:
|
|
73
|
+
# We may don't want to add extra newlines and prompts
|
|
74
|
+
# if the question text is empty (e.g., the Vibe-Eval evaluator).
|
|
75
|
+
prompt = base_prompt
|
|
62
76
|
if question.question_type == "free_response":
|
|
63
|
-
|
|
64
|
-
max_tokens = 100
|
|
77
|
+
max_tokens = 100 if task.max_tokens is None else task.max_tokens
|
|
65
78
|
elif question.question_type == "checkbox":
|
|
66
79
|
# We multiply by 2 because the model will generate a comma after each option.
|
|
67
80
|
max_tokens = len(question.options) * 2
|
|
@@ -78,12 +91,21 @@ class ModelCritiqueClient(CritiqueClient):
|
|
|
78
91
|
|
|
79
92
|
prompt = anthropic.HUMAN_PROMPT + prompt + anthropic.AI_PROMPT
|
|
80
93
|
|
|
94
|
+
multimodal_prompt: Optional[MultimediaObject] = None
|
|
95
|
+
if self.vision_language:
|
|
96
|
+
assert question.media_object is not None, "Expect media_object for vision-language models"
|
|
97
|
+
image_media: MediaObject = question.media_object
|
|
98
|
+
text_media: MediaObject = MediaObject(text=prompt, content_type="text/plain")
|
|
99
|
+
multimodal_prompt = MultimediaObject(media_objects=[image_media, text_media])
|
|
100
|
+
prompt = "" # set to empty string to avoid conflicts with multimodal_prompt
|
|
101
|
+
|
|
81
102
|
request = Request(
|
|
82
103
|
model=self._model_name,
|
|
83
104
|
model_deployment=self._model_deployment_name,
|
|
84
105
|
prompt=prompt,
|
|
85
106
|
max_tokens=max_tokens,
|
|
86
107
|
echo_prompt=False,
|
|
108
|
+
multimodal_prompt=multimodal_prompt,
|
|
87
109
|
)
|
|
88
110
|
requests.append(request)
|
|
89
111
|
return requests
|
|
@@ -124,7 +146,13 @@ class ModelCritiqueClient(CritiqueClient):
|
|
|
124
146
|
raise CritiqueParseError(
|
|
125
147
|
f"Invalid answer: {completion}. Multiple choice questions should have one answer."
|
|
126
148
|
)
|
|
127
|
-
|
|
149
|
+
letter_answer = answers[0]
|
|
150
|
+
choice_rank = string.ascii_uppercase.index(letter_answer)
|
|
151
|
+
if choice_rank >= len(question.options):
|
|
152
|
+
raise CritiqueParseError(
|
|
153
|
+
f"Invalid answer: {completion}. The answer is out of range of the options: {question.options}"
|
|
154
|
+
)
|
|
155
|
+
return letter_answer
|
|
128
156
|
except CritiqueParseError as e:
|
|
129
157
|
# If there was an error parsing the answer, we assume the user did not answer the question.
|
|
130
158
|
hlog(f"Error parsing answer: {e}. Skipping question (and so the respondent entirely)")
|
helm/proxy/example_queries.py
CHANGED
|
@@ -22,7 +22,6 @@ example_queries = [
|
|
|
22
22
|
temperature: 0.5 # Medium amount of randomness
|
|
23
23
|
stop_sequences: [.] # Stop when you hit a period
|
|
24
24
|
model: openai/gpt-3.5-turbo-0613
|
|
25
|
-
model_deployment: openai/gpt-3.5-turbo-0613
|
|
26
25
|
"""
|
|
27
26
|
),
|
|
28
27
|
environments="",
|
|
@@ -35,24 +34,24 @@ example_queries = [
|
|
|
35
34
|
stop_sequences: [\\n] # Stop when you hit a newline
|
|
36
35
|
num_completions: 5 # Generate many samples
|
|
37
36
|
model: openai/gpt-3.5-turbo-0613
|
|
38
|
-
model_deployment: openai/gpt-3.5-turbo-0613
|
|
39
37
|
"""
|
|
40
38
|
),
|
|
41
39
|
environments="",
|
|
42
40
|
),
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
),
|
|
41
|
+
# Disabled because `max_tokens: 0` no longer works on the OpenAI API
|
|
42
|
+
# Query(
|
|
43
|
+
# prompt="The quick brown fox jumps over the lazy dog.",
|
|
44
|
+
# settings=dedent(
|
|
45
|
+
# """
|
|
46
|
+
# echo_prompt: true # Analyze the prompt
|
|
47
|
+
# max_tokens: 0 # Don't generate any more
|
|
48
|
+
# top_k_per_token: 5 # Show alternatives for each position
|
|
49
|
+
# model: openai/text-davinci-002
|
|
50
|
+
# model_deployment: openai/text-davinci-002
|
|
51
|
+
# """
|
|
52
|
+
# ),
|
|
53
|
+
# environments=dedent(""),
|
|
54
|
+
# ),
|
|
56
55
|
Query(
|
|
57
56
|
prompt="Odd numbers: 1 -> 3 -> 5",
|
|
58
57
|
settings=dedent(
|
|
@@ -60,7 +59,6 @@ example_queries = [
|
|
|
60
59
|
temperature: 0 # Deterministic
|
|
61
60
|
max_tokens: 50
|
|
62
61
|
model: openai/gpt-3.5-turbo-0613
|
|
63
|
-
model_deployment: openai/gpt-3.5-turbo-0613
|
|
64
62
|
"""
|
|
65
63
|
),
|
|
66
64
|
environments="",
|
|
@@ -73,7 +71,6 @@ example_queries = [
|
|
|
73
71
|
stop_sequences: [.]
|
|
74
72
|
# Try out multiple models
|
|
75
73
|
model: ${model}
|
|
76
|
-
model_deployment: ${model}
|
|
77
74
|
"""
|
|
78
75
|
),
|
|
79
76
|
environments=dedent(
|
|
@@ -100,7 +97,6 @@ example_queries = [
|
|
|
100
97
|
num_completions: 5
|
|
101
98
|
# Try out multiple models
|
|
102
99
|
model: ${model}
|
|
103
|
-
model_deployment: ${model}
|
|
104
100
|
"""
|
|
105
101
|
),
|
|
106
102
|
environments=dedent(
|
|
@@ -136,7 +132,6 @@ example_queries = [
|
|
|
136
132
|
top_k_per_token: 4
|
|
137
133
|
# Try out multiple models
|
|
138
134
|
model: ${model}
|
|
139
|
-
model_deployment: ${model}
|
|
140
135
|
"""
|
|
141
136
|
),
|
|
142
137
|
environments=dedent(
|
|
@@ -150,7 +145,6 @@ example_queries = [
|
|
|
150
145
|
settings=dedent(
|
|
151
146
|
"""
|
|
152
147
|
model: openai/gpt-3.5-turbo-0613
|
|
153
|
-
model_deployment: openai/gpt-3.5-turbo-0613
|
|
154
148
|
"""
|
|
155
149
|
),
|
|
156
150
|
environments="",
|
|
@@ -163,7 +157,6 @@ example_queries = [
|
|
|
163
157
|
stop_sequences: [\\n]
|
|
164
158
|
# Try out multiple models
|
|
165
159
|
model: ${model}
|
|
166
|
-
model_deployment: ${model}
|
|
167
160
|
"""
|
|
168
161
|
),
|
|
169
162
|
environments=dedent(
|
|
@@ -25,7 +25,6 @@ from helm.common.hierarchical_logger import hlog
|
|
|
25
25
|
from helm.proxy.accounts import Accounts, Account
|
|
26
26
|
from helm.clients.auto_client import AutoClient
|
|
27
27
|
from helm.clients.moderation_api_client import ModerationAPIClient
|
|
28
|
-
from helm.clients.perspective_api_client import PerspectiveAPIClient
|
|
29
28
|
from helm.clients.image_generation.nudity_check_client import NudityCheckClient
|
|
30
29
|
from helm.clients.gcs_client import GCSClient
|
|
31
30
|
from helm.clients.clip_score_client import CLIPScoreClient
|
|
@@ -75,7 +74,7 @@ class ServerService(Service):
|
|
|
75
74
|
# Lazily instantiate the following clients
|
|
76
75
|
self.moderation_api_client: Optional[ModerationAPIClient] = None
|
|
77
76
|
self.toxicity_classifier_client: Optional[ToxicityClassifierClient] = None
|
|
78
|
-
self.perspective_api_client: Optional[
|
|
77
|
+
self.perspective_api_client: Optional[ToxicityClassifierClient] = None
|
|
79
78
|
self.nudity_check_client: Optional[NudityCheckClient] = None
|
|
80
79
|
self.clip_score_client: Optional[CLIPScoreClient] = None
|
|
81
80
|
self.gcs_client: Optional[GCSClient] = None
|
|
@@ -119,7 +118,7 @@ class ServerService(Service):
|
|
|
119
118
|
return "codex"
|
|
120
119
|
elif model_deployment.startswith("openai/dall-e-"):
|
|
121
120
|
return "dall_e"
|
|
122
|
-
elif model_deployment.startswith("openai/gpt-4
|
|
121
|
+
elif model_deployment.startswith("openai/gpt-4"):
|
|
123
122
|
return "gpt4"
|
|
124
123
|
else:
|
|
125
124
|
return "gpt3"
|
|
@@ -13,8 +13,8 @@ class TestAutoTokenCounter:
|
|
|
13
13
|
)
|
|
14
14
|
# The following prompt has 51 tokens according to the GPT-2 tokenizer
|
|
15
15
|
request = Request(
|
|
16
|
-
model="openai/
|
|
17
|
-
model_deployment="
|
|
16
|
+
model="openai/gpt2",
|
|
17
|
+
model_deployment="huggingface/gpt2",
|
|
18
18
|
prompt="The Center for Research on Foundation Models (CRFM) is "
|
|
19
19
|
"an interdisciplinary initiative born out of the Stanford "
|
|
20
20
|
"Institute for Human-Centered Artificial Intelligence (HAI) "
|
|
@@ -1,60 +1,52 @@
|
|
|
1
|
-
|
|
2
|
-
import
|
|
3
|
-
|
|
4
|
-
from
|
|
5
|
-
|
|
6
|
-
from helm.
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
self.
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
# If 'tokens' is not present in the response, assume request failed.
|
|
39
|
-
if "tokens" not in response:
|
|
40
|
-
handle_failed_request(api_type="tokenizer", response=response)
|
|
41
|
-
|
|
42
|
-
return response
|
|
43
|
-
|
|
44
|
-
try:
|
|
45
|
-
response, cached = self.cache.get(raw_request, do_it)
|
|
46
|
-
except AI21RequestError:
|
|
47
|
-
return TokenizationRequestResult(success=False, cached=False, text="", tokens=[])
|
|
48
|
-
|
|
49
|
-
# Each token is represented like this in the response:
|
|
50
|
-
# {'token': '▁Hello', 'textRange': {'start': 0, 'end': 5}}
|
|
51
|
-
tokens: List[TokenizationToken] = []
|
|
52
|
-
for token_dict in response["tokens"]:
|
|
53
|
-
tokens.append(
|
|
54
|
-
TokenizationToken(value=token_dict["token"], text_range=from_dict(TextRange, token_dict["textRange"]))
|
|
1
|
+
import threading
|
|
2
|
+
from typing import Any, Dict
|
|
3
|
+
|
|
4
|
+
from helm.common.cache import CacheConfig
|
|
5
|
+
from helm.common.optional_dependencies import handle_module_not_found_error
|
|
6
|
+
from helm.tokenizers.caching_tokenizer import CachingTokenizer
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from ai21_tokenizer import Tokenizer as SDKTokenizer
|
|
10
|
+
from ai21_tokenizer.base_tokenizer import BaseTokenizer
|
|
11
|
+
except ModuleNotFoundError as e:
|
|
12
|
+
handle_module_not_found_error(e, ["ai21"])
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AI21LocalTokenizer(CachingTokenizer):
|
|
16
|
+
"""AI21 tokenizer using the AI21 Python library."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, cache_config: CacheConfig) -> None:
|
|
19
|
+
super().__init__(cache_config)
|
|
20
|
+
self._tokenizers_lock = threading.Lock()
|
|
21
|
+
self.tokenizers: Dict[str, BaseTokenizer] = {}
|
|
22
|
+
|
|
23
|
+
def _get_tokenizer(self, tokenizer_name: str) -> BaseTokenizer:
|
|
24
|
+
with self._tokenizers_lock:
|
|
25
|
+
if tokenizer_name not in self.tokenizers:
|
|
26
|
+
self.tokenizers[tokenizer_name] = SDKTokenizer.get_tokenizer(tokenizer_name)
|
|
27
|
+
return self.tokenizers[tokenizer_name]
|
|
28
|
+
|
|
29
|
+
def _tokenize_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
|
|
30
|
+
tokenizer_name = request["tokenizer"].split("/")[1]
|
|
31
|
+
tokenizer = self._get_tokenizer(tokenizer_name)
|
|
32
|
+
if request["truncation"]:
|
|
33
|
+
token_ids = tokenizer.encode(
|
|
34
|
+
text=request["text"],
|
|
35
|
+
truncation=request["truncation"],
|
|
36
|
+
max_length=request["max_length"],
|
|
37
|
+
add_special_tokens=False,
|
|
55
38
|
)
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
39
|
+
else:
|
|
40
|
+
token_ids = tokenizer.encode(
|
|
41
|
+
text=request["text"],
|
|
42
|
+
add_special_tokens=False,
|
|
43
|
+
)
|
|
44
|
+
if request["encode"]:
|
|
45
|
+
return {"tokens": token_ids}
|
|
46
|
+
else:
|
|
47
|
+
return {"tokens": tokenizer.convert_ids_to_tokens(token_ids)}
|
|
48
|
+
|
|
49
|
+
def _decode_do_it(self, request: Dict[str, Any]) -> Dict[str, Any]:
|
|
50
|
+
tokenizer_name = request["tokenizer"].split("/")[1]
|
|
51
|
+
tokenizer = self._get_tokenizer(tokenizer_name)
|
|
52
|
+
return {"text": tokenizer.decode(request["tokens"])}
|
|
@@ -41,7 +41,7 @@ class AutoTokenizer(Tokenizer):
|
|
|
41
41
|
if tokenizer_config:
|
|
42
42
|
tokenizer_spec = inject_object_spec_args(
|
|
43
43
|
tokenizer_config.tokenizer_spec,
|
|
44
|
-
constant_bindings={"cache_config": cache_config},
|
|
44
|
+
constant_bindings={"cache_config": cache_config, "tokenizer_name": tokenizer_name},
|
|
45
45
|
provider_bindings={
|
|
46
46
|
"api_key": lambda: provide_api_key(self.credentials, organization),
|
|
47
47
|
"project_id": lambda: self.credentials.get(organization + "ProjectId", None), # VertexAI
|