crfm-helm 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/METADATA +10 -8
- {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/RECORD +50 -37
- {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/WHEEL +1 -1
- {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/entry_points.txt +1 -0
- helm/benchmark/__init__.py +2 -0
- helm/benchmark/adaptation/adapter_spec.py +3 -0
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -7
- helm/benchmark/contamination/__init__.py +0 -0
- helm/benchmark/metrics/classification_metrics.py +28 -23
- helm/benchmark/metrics/test_classification_metrics.py +44 -9
- helm/benchmark/presentation/create_plots.py +617 -0
- helm/benchmark/presentation/summarize.py +4 -2
- helm/benchmark/presentation/test_create_plots.py +32 -0
- helm/benchmark/run.py +23 -1
- helm/benchmark/run_expander.py +161 -47
- helm/benchmark/run_specs.py +84 -10
- helm/benchmark/runner.py +31 -3
- helm/benchmark/scenarios/copyright_scenario.py +1 -1
- helm/benchmark/scenarios/imdb_listdir.json +50014 -0
- helm/benchmark/scenarios/lex_glue_scenario.py +58 -17
- helm/benchmark/scenarios/lextreme_scenario.py +37 -25
- helm/benchmark/scenarios/opinions_qa_scenario.py +194 -0
- helm/benchmark/scenarios/scenario.py +5 -0
- helm/benchmark/scenarios/the_pile_scenario.py +1 -1
- helm/benchmark/static/benchmarking.css +14 -0
- helm/benchmark/static/benchmarking.js +43 -0
- helm/benchmark/static/index.html +2 -0
- helm/benchmark/static/json-urls.js +4 -0
- helm/benchmark/static/plot-captions.js +16 -0
- helm/benchmark/static/schema.yaml +66 -8
- helm/benchmark/window_services/cohere_window_service.py +20 -0
- helm/benchmark/window_services/flan_t5_window_service.py +29 -0
- helm/benchmark/window_services/huggingface_window_service.py +39 -0
- helm/benchmark/window_services/test_flan_t5_window_service.py +12 -0
- helm/benchmark/window_services/wider_ai21_window_service.py +13 -0
- helm/benchmark/window_services/window_service_factory.py +27 -6
- helm/common/general.py +12 -5
- helm/proxy/clients/aleph_alpha_client.py +47 -28
- helm/proxy/clients/auto_client.py +28 -24
- helm/proxy/clients/huggingface_client.py +30 -17
- helm/proxy/clients/huggingface_model_registry.py +111 -0
- helm/proxy/clients/huggingface_tokenizer.py +23 -7
- helm/proxy/clients/openai_client.py +60 -2
- helm/proxy/clients/test_huggingface_model_registry.py +57 -0
- helm/proxy/clients/together_client.py +17 -2
- helm/proxy/clients/yalm_tokenizer/voc_100b.sp +0 -0
- helm/proxy/clients/yalm_tokenizer/yalm_tokenizer.py +8 -2
- helm/proxy/models.py +82 -2
- {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/LICENSE +0 -0
- {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/top_level.txt +0 -0
helm/proxy/models.py
CHANGED
|
@@ -11,8 +11,14 @@ EMBEDDING_MODEL_TAG: str = "embedding"
|
|
|
11
11
|
FULL_FUNCTIONALITY_TEXT_MODEL_TAG: str = "full_functionality_text"
|
|
12
12
|
LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG: str = "limited_functionality_text"
|
|
13
13
|
|
|
14
|
+
# ChatML format
|
|
15
|
+
CHATML_MODEL_TAG: str = "chatml"
|
|
16
|
+
|
|
14
17
|
# For OpenAI models with wider context windows
|
|
15
|
-
WIDER_CONTEXT_WINDOW_TAG: str = "wider_context_window"
|
|
18
|
+
WIDER_CONTEXT_WINDOW_TAG: str = "wider_context_window" # 4000 tokens
|
|
19
|
+
|
|
20
|
+
# For AI21 Jurassic-2 models with wider context windows
|
|
21
|
+
AI21_WIDER_CONTEXT_WINDOW_TAG: str = "ai21_wider_context_window"
|
|
16
22
|
|
|
17
23
|
# To fetch models that use these tokenizers
|
|
18
24
|
GPT2_TOKENIZER_TAG: str = "gpt2_tokenizer"
|
|
@@ -122,6 +128,31 @@ ALL_MODELS = [
|
|
|
122
128
|
description="Jurassic-1 Large (7.5B parameters)",
|
|
123
129
|
tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, AI21_TOKENIZER_TAG],
|
|
124
130
|
),
|
|
131
|
+
# AI21 Jurassic-2 Models: https://www.ai21.com/blog/introducing-j2
|
|
132
|
+
Model(
|
|
133
|
+
group="jurassic",
|
|
134
|
+
creator_organization="AI21 Labs",
|
|
135
|
+
name="ai21/j2-jumbo",
|
|
136
|
+
display_name="Jurassic-2 Jumbo (178B)",
|
|
137
|
+
description="Jurassic-2 Jumbo (178B parameters)",
|
|
138
|
+
tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, AI21_TOKENIZER_TAG],
|
|
139
|
+
),
|
|
140
|
+
Model(
|
|
141
|
+
group="jurassic",
|
|
142
|
+
creator_organization="AI21 Labs",
|
|
143
|
+
name="ai21/j2-grande",
|
|
144
|
+
display_name="Jurassic-2 Grande (17B)",
|
|
145
|
+
description="Jurassic-2 Grande (17B parameters) with a few tweaks to the training process.",
|
|
146
|
+
tags=[TEXT_MODEL_TAG, AI21_WIDER_CONTEXT_WINDOW_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, AI21_TOKENIZER_TAG],
|
|
147
|
+
),
|
|
148
|
+
Model(
|
|
149
|
+
group="jurassic",
|
|
150
|
+
creator_organization="AI21 Labs",
|
|
151
|
+
name="ai21/j2-large",
|
|
152
|
+
display_name="Jurassic-2 Large (7.5B)",
|
|
153
|
+
description="Jurassic-2 Large (7.5B parameters)",
|
|
154
|
+
tags=[TEXT_MODEL_TAG, AI21_WIDER_CONTEXT_WINDOW_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, AI21_TOKENIZER_TAG],
|
|
155
|
+
),
|
|
125
156
|
# Aleph Alpha's Luminous models: https://docs.aleph-alpha.com/docs/introduction/luminous
|
|
126
157
|
Model(
|
|
127
158
|
group="luminous",
|
|
@@ -250,6 +281,24 @@ ALL_MODELS = [
|
|
|
250
281
|
description="Cohere small v20220720 (410M parameters)",
|
|
251
282
|
tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, COHERE_TOKENIZER_TAG],
|
|
252
283
|
),
|
|
284
|
+
Model(
|
|
285
|
+
group="cohere",
|
|
286
|
+
creator_organization="Cohere",
|
|
287
|
+
name="cohere/command-medium-beta",
|
|
288
|
+
display_name="Cohere Command beta (6.1B)",
|
|
289
|
+
description="Cohere Command beta (6.1B parameters) is fine-tuned from the medium model "
|
|
290
|
+
"to respond well with instruction-like prompts",
|
|
291
|
+
tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, COHERE_TOKENIZER_TAG],
|
|
292
|
+
),
|
|
293
|
+
Model(
|
|
294
|
+
group="cohere",
|
|
295
|
+
creator_organization="Cohere",
|
|
296
|
+
name="cohere/command-xlarge-beta",
|
|
297
|
+
display_name="Cohere Command beta (52.4B)",
|
|
298
|
+
description="Cohere Command beta (52.4B parameters) is fine-tuned from the XL model "
|
|
299
|
+
"to respond well with instruction-like prompts",
|
|
300
|
+
tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, COHERE_TOKENIZER_TAG],
|
|
301
|
+
),
|
|
253
302
|
# EleutherAI
|
|
254
303
|
Model(
|
|
255
304
|
group="together",
|
|
@@ -323,6 +372,15 @@ ALL_MODELS = [
|
|
|
323
372
|
# Does not support echo=True
|
|
324
373
|
tags=[TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG],
|
|
325
374
|
),
|
|
375
|
+
Model(
|
|
376
|
+
group="together",
|
|
377
|
+
creator_organization="Google",
|
|
378
|
+
name="together/flan-t5-xxl",
|
|
379
|
+
display_name="Flan-T5 (11B)",
|
|
380
|
+
description="Flan-T5 (11B parameters) is T5 fine-tuned on 1.8K tasks.",
|
|
381
|
+
# Does not support echo=True
|
|
382
|
+
tags=[TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG],
|
|
383
|
+
),
|
|
326
384
|
Model(
|
|
327
385
|
group="together",
|
|
328
386
|
creator_organization="Google",
|
|
@@ -498,7 +556,21 @@ ALL_MODELS = [
|
|
|
498
556
|
description="Code model that is a stronger, multilingual version of the Codex (12B) model in the paper.",
|
|
499
557
|
tags=[CODE_MODEL_TAG, GPT2_TOKENIZER_TAG],
|
|
500
558
|
),
|
|
501
|
-
# ChatGPT
|
|
559
|
+
# ChatGPT: https://openai.com/blog/chatgpt
|
|
560
|
+
Model(
|
|
561
|
+
group="gpt3",
|
|
562
|
+
creator_organization="OpenAI",
|
|
563
|
+
name="openai/gpt-3.5-turbo-0301",
|
|
564
|
+
display_name="gpt-3.5-turbo-0301",
|
|
565
|
+
# https://platform.openai.com/docs/models/gpt-3-5
|
|
566
|
+
description="Sibling model of text-davinci-003 is optimized for chat but works well "
|
|
567
|
+
"for traditional completions tasks as well. Snapshot from 2023-03-01.",
|
|
568
|
+
# The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable
|
|
569
|
+
# sequence length is smaller at 4087 with one user input message and one assistant
|
|
570
|
+
# output message because ChatGPT uses special tokens for message roles and boundaries.
|
|
571
|
+
# We use a rounded-down sequence length of 4000 to account for these special tokens.
|
|
572
|
+
tags=[TEXT_MODEL_TAG, WIDER_CONTEXT_WINDOW_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, GPT2_TOKENIZER_TAG],
|
|
573
|
+
),
|
|
502
574
|
Model(
|
|
503
575
|
group="gpt3",
|
|
504
576
|
creator_organization="OpenAI",
|
|
@@ -550,6 +622,14 @@ ALL_MODELS = [
|
|
|
550
622
|
description="GPT-JT (6B parameters) is a fork of GPT-J",
|
|
551
623
|
tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, GPTJ_TOKENIZER_TAG],
|
|
552
624
|
),
|
|
625
|
+
Model(
|
|
626
|
+
group="together",
|
|
627
|
+
creator_organization="Together",
|
|
628
|
+
name="together/gpt-neoxt-chat-base-20b",
|
|
629
|
+
display_name="GPT-NeoXT-Chat-Base (20B)",
|
|
630
|
+
description="GPT-NeoXT-Chat-Base (20B parameters) is a fork of GPT-NeoX",
|
|
631
|
+
tags=[TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, CHATML_MODEL_TAG, GPTNEO_TOKENIZER_TAG],
|
|
632
|
+
),
|
|
553
633
|
# Tsinghua
|
|
554
634
|
Model(
|
|
555
635
|
group="together",
|
|
File without changes
|
|
File without changes
|