crfm-helm 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/METADATA +10 -8
  2. {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/RECORD +50 -37
  3. {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/WHEEL +1 -1
  4. {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/entry_points.txt +1 -0
  5. helm/benchmark/__init__.py +2 -0
  6. helm/benchmark/adaptation/adapter_spec.py +3 -0
  7. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -7
  8. helm/benchmark/contamination/__init__.py +0 -0
  9. helm/benchmark/metrics/classification_metrics.py +28 -23
  10. helm/benchmark/metrics/test_classification_metrics.py +44 -9
  11. helm/benchmark/presentation/create_plots.py +617 -0
  12. helm/benchmark/presentation/summarize.py +4 -2
  13. helm/benchmark/presentation/test_create_plots.py +32 -0
  14. helm/benchmark/run.py +23 -1
  15. helm/benchmark/run_expander.py +161 -47
  16. helm/benchmark/run_specs.py +84 -10
  17. helm/benchmark/runner.py +31 -3
  18. helm/benchmark/scenarios/copyright_scenario.py +1 -1
  19. helm/benchmark/scenarios/imdb_listdir.json +50014 -0
  20. helm/benchmark/scenarios/lex_glue_scenario.py +58 -17
  21. helm/benchmark/scenarios/lextreme_scenario.py +37 -25
  22. helm/benchmark/scenarios/opinions_qa_scenario.py +194 -0
  23. helm/benchmark/scenarios/scenario.py +5 -0
  24. helm/benchmark/scenarios/the_pile_scenario.py +1 -1
  25. helm/benchmark/static/benchmarking.css +14 -0
  26. helm/benchmark/static/benchmarking.js +43 -0
  27. helm/benchmark/static/index.html +2 -0
  28. helm/benchmark/static/json-urls.js +4 -0
  29. helm/benchmark/static/plot-captions.js +16 -0
  30. helm/benchmark/static/schema.yaml +66 -8
  31. helm/benchmark/window_services/cohere_window_service.py +20 -0
  32. helm/benchmark/window_services/flan_t5_window_service.py +29 -0
  33. helm/benchmark/window_services/huggingface_window_service.py +39 -0
  34. helm/benchmark/window_services/test_flan_t5_window_service.py +12 -0
  35. helm/benchmark/window_services/wider_ai21_window_service.py +13 -0
  36. helm/benchmark/window_services/window_service_factory.py +27 -6
  37. helm/common/general.py +12 -5
  38. helm/proxy/clients/aleph_alpha_client.py +47 -28
  39. helm/proxy/clients/auto_client.py +28 -24
  40. helm/proxy/clients/huggingface_client.py +30 -17
  41. helm/proxy/clients/huggingface_model_registry.py +111 -0
  42. helm/proxy/clients/huggingface_tokenizer.py +23 -7
  43. helm/proxy/clients/openai_client.py +60 -2
  44. helm/proxy/clients/test_huggingface_model_registry.py +57 -0
  45. helm/proxy/clients/together_client.py +17 -2
  46. helm/proxy/clients/yalm_tokenizer/voc_100b.sp +0 -0
  47. helm/proxy/clients/yalm_tokenizer/yalm_tokenizer.py +8 -2
  48. helm/proxy/models.py +82 -2
  49. {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/LICENSE +0 -0
  50. {crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/top_level.txt +0 -0
helm/proxy/models.py CHANGED
@@ -11,8 +11,14 @@ EMBEDDING_MODEL_TAG: str = "embedding"
11
11
  FULL_FUNCTIONALITY_TEXT_MODEL_TAG: str = "full_functionality_text"
12
12
  LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG: str = "limited_functionality_text"
13
13
 
14
+ # ChatML format
15
+ CHATML_MODEL_TAG: str = "chatml"
16
+
14
17
  # For OpenAI models with wider context windows
15
- WIDER_CONTEXT_WINDOW_TAG: str = "wider_context_window"
18
+ WIDER_CONTEXT_WINDOW_TAG: str = "wider_context_window" # 4000 tokens
19
+
20
+ # For AI21 Jurassic-2 models with wider context windows
21
+ AI21_WIDER_CONTEXT_WINDOW_TAG: str = "ai21_wider_context_window"
16
22
 
17
23
  # To fetch models that use these tokenizers
18
24
  GPT2_TOKENIZER_TAG: str = "gpt2_tokenizer"
@@ -122,6 +128,31 @@ ALL_MODELS = [
122
128
  description="Jurassic-1 Large (7.5B parameters)",
123
129
  tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, AI21_TOKENIZER_TAG],
124
130
  ),
131
+ # AI21 Jurassic-2 Models: https://www.ai21.com/blog/introducing-j2
132
+ Model(
133
+ group="jurassic",
134
+ creator_organization="AI21 Labs",
135
+ name="ai21/j2-jumbo",
136
+ display_name="Jurassic-2 Jumbo (178B)",
137
+ description="Jurassic-2 Jumbo (178B parameters)",
138
+ tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, AI21_TOKENIZER_TAG],
139
+ ),
140
+ Model(
141
+ group="jurassic",
142
+ creator_organization="AI21 Labs",
143
+ name="ai21/j2-grande",
144
+ display_name="Jurassic-2 Grande (17B)",
145
+ description="Jurassic-2 Grande (17B parameters) with a few tweaks to the training process.",
146
+ tags=[TEXT_MODEL_TAG, AI21_WIDER_CONTEXT_WINDOW_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, AI21_TOKENIZER_TAG],
147
+ ),
148
+ Model(
149
+ group="jurassic",
150
+ creator_organization="AI21 Labs",
151
+ name="ai21/j2-large",
152
+ display_name="Jurassic-2 Large (7.5B)",
153
+ description="Jurassic-2 Large (7.5B parameters)",
154
+ tags=[TEXT_MODEL_TAG, AI21_WIDER_CONTEXT_WINDOW_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, AI21_TOKENIZER_TAG],
155
+ ),
125
156
  # Aleph Alpha's Luminous models: https://docs.aleph-alpha.com/docs/introduction/luminous
126
157
  Model(
127
158
  group="luminous",
@@ -250,6 +281,24 @@ ALL_MODELS = [
250
281
  description="Cohere small v20220720 (410M parameters)",
251
282
  tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, COHERE_TOKENIZER_TAG],
252
283
  ),
284
+ Model(
285
+ group="cohere",
286
+ creator_organization="Cohere",
287
+ name="cohere/command-medium-beta",
288
+ display_name="Cohere Command beta (6.1B)",
289
+ description="Cohere Command beta (6.1B parameters) is fine-tuned from the medium model "
290
+ "to respond well with instruction-like prompts",
291
+ tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, COHERE_TOKENIZER_TAG],
292
+ ),
293
+ Model(
294
+ group="cohere",
295
+ creator_organization="Cohere",
296
+ name="cohere/command-xlarge-beta",
297
+ display_name="Cohere Command beta (52.4B)",
298
+ description="Cohere Command beta (52.4B parameters) is fine-tuned from the XL model "
299
+ "to respond well with instruction-like prompts",
300
+ tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, COHERE_TOKENIZER_TAG],
301
+ ),
253
302
  # EleutherAI
254
303
  Model(
255
304
  group="together",
@@ -323,6 +372,15 @@ ALL_MODELS = [
323
372
  # Does not support echo=True
324
373
  tags=[TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG],
325
374
  ),
375
+ Model(
376
+ group="together",
377
+ creator_organization="Google",
378
+ name="together/flan-t5-xxl",
379
+ display_name="Flan-T5 (11B)",
380
+ description="Flan-T5 (11B parameters) is T5 fine-tuned on 1.8K tasks.",
381
+ # Does not support echo=True
382
+ tags=[TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG],
383
+ ),
326
384
  Model(
327
385
  group="together",
328
386
  creator_organization="Google",
@@ -498,7 +556,21 @@ ALL_MODELS = [
498
556
  description="Code model that is a stronger, multilingual version of the Codex (12B) model in the paper.",
499
557
  tags=[CODE_MODEL_TAG, GPT2_TOKENIZER_TAG],
500
558
  ),
501
- # ChatGPT - https://openai.com/blog/chatgpt
559
+ # ChatGPT: https://openai.com/blog/chatgpt
560
+ Model(
561
+ group="gpt3",
562
+ creator_organization="OpenAI",
563
+ name="openai/gpt-3.5-turbo-0301",
564
+ display_name="gpt-3.5-turbo-0301",
565
+ # https://platform.openai.com/docs/models/gpt-3-5
566
+ description="Sibling model of text-davinci-003 is optimized for chat but works well "
567
+ "for traditional completions tasks as well. Snapshot from 2023-03-01.",
568
+ # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable
569
+ # sequence length is smaller at 4087 with one user input message and one assistant
570
+ # output message because ChatGPT uses special tokens for message roles and boundaries.
571
+ # We use a rounded-down sequence length of 4000 to account for these special tokens.
572
+ tags=[TEXT_MODEL_TAG, WIDER_CONTEXT_WINDOW_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, GPT2_TOKENIZER_TAG],
573
+ ),
502
574
  Model(
503
575
  group="gpt3",
504
576
  creator_organization="OpenAI",
@@ -550,6 +622,14 @@ ALL_MODELS = [
550
622
  description="GPT-JT (6B parameters) is a fork of GPT-J",
551
623
  tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, GPTJ_TOKENIZER_TAG],
552
624
  ),
625
+ Model(
626
+ group="together",
627
+ creator_organization="Together",
628
+ name="together/gpt-neoxt-chat-base-20b",
629
+ display_name="GPT-NeoXT-Chat-Base (20B)",
630
+ description="GPT-NeoXT-Chat-Base (20B parameters) is a fork of GPT-NeoX",
631
+ tags=[TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, CHATML_MODEL_TAG, GPTNEO_TOKENIZER_TAG],
632
+ ),
553
633
  # Tsinghua
554
634
  Model(
555
635
  group="together",