PyPI - crfm-helm - Versions diffs - 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

crfm-helm 0.2.1py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

{crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/METADATA +10 -8
{crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/RECORD +50 -37
{crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/WHEEL +1 -1
{crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/entry_points.txt +1 -0
helm/benchmark/__init__.py +2 -0
helm/benchmark/adaptation/adapter_spec.py +3 -0
helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -7
helm/benchmark/contamination/__init__.py +0 -0
helm/benchmark/metrics/classification_metrics.py +28 -23
helm/benchmark/metrics/test_classification_metrics.py +44 -9
helm/benchmark/presentation/create_plots.py +617 -0
helm/benchmark/presentation/summarize.py +4 -2
helm/benchmark/presentation/test_create_plots.py +32 -0
helm/benchmark/run.py +23 -1
helm/benchmark/run_expander.py +161 -47
helm/benchmark/run_specs.py +84 -10
helm/benchmark/runner.py +31 -3
helm/benchmark/scenarios/copyright_scenario.py +1 -1
helm/benchmark/scenarios/imdb_listdir.json +50014 -0
helm/benchmark/scenarios/lex_glue_scenario.py +58 -17
helm/benchmark/scenarios/lextreme_scenario.py +37 -25
helm/benchmark/scenarios/opinions_qa_scenario.py +194 -0
helm/benchmark/scenarios/scenario.py +5 -0
helm/benchmark/scenarios/the_pile_scenario.py +1 -1
helm/benchmark/static/benchmarking.css +14 -0
helm/benchmark/static/benchmarking.js +43 -0
helm/benchmark/static/index.html +2 -0
helm/benchmark/static/json-urls.js +4 -0
helm/benchmark/static/plot-captions.js +16 -0
helm/benchmark/static/schema.yaml +66 -8
helm/benchmark/window_services/cohere_window_service.py +20 -0
helm/benchmark/window_services/flan_t5_window_service.py +29 -0
helm/benchmark/window_services/huggingface_window_service.py +39 -0
helm/benchmark/window_services/test_flan_t5_window_service.py +12 -0
helm/benchmark/window_services/wider_ai21_window_service.py +13 -0
helm/benchmark/window_services/window_service_factory.py +27 -6
helm/common/general.py +12 -5
helm/proxy/clients/aleph_alpha_client.py +47 -28
helm/proxy/clients/auto_client.py +28 -24
helm/proxy/clients/huggingface_client.py +30 -17
helm/proxy/clients/huggingface_model_registry.py +111 -0
helm/proxy/clients/huggingface_tokenizer.py +23 -7
helm/proxy/clients/openai_client.py +60 -2
helm/proxy/clients/test_huggingface_model_registry.py +57 -0
helm/proxy/clients/together_client.py +17 -2
helm/proxy/clients/yalm_tokenizer/voc_100b.sp +0 -0
helm/proxy/clients/yalm_tokenizer/yalm_tokenizer.py +8 -2
helm/proxy/models.py +82 -2
{crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/LICENSE +0 -0
{crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/top_level.txt +0 -0

helm/proxy/models.py CHANGED Viewed

@@ -11,8 +11,14 @@ EMBEDDING_MODEL_TAG: str = "embedding"
 FULL_FUNCTIONALITY_TEXT_MODEL_TAG: str = "full_functionality_text"
 LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG: str = "limited_functionality_text"
+# ChatML format
+CHATML_MODEL_TAG: str = "chatml"
 # For OpenAI models with wider context windows
-WIDER_CONTEXT_WINDOW_TAG: str = "wider_context_window"
+WIDER_CONTEXT_WINDOW_TAG: str = "wider_context_window"  # 4000 tokens
+# For AI21 Jurassic-2 models with wider context windows
+AI21_WIDER_CONTEXT_WINDOW_TAG: str = "ai21_wider_context_window"
 # To fetch models that use these tokenizers
 GPT2_TOKENIZER_TAG: str = "gpt2_tokenizer"
@@ -122,6 +128,31 @@ ALL_MODELS = [
         description="Jurassic-1 Large (7.5B parameters)",
         tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, AI21_TOKENIZER_TAG],
     ),
+    # AI21 Jurassic-2 Models: https://www.ai21.com/blog/introducing-j2
+    Model(
+        group="jurassic",
+        creator_organization="AI21 Labs",
+        name="ai21/j2-jumbo",
+        display_name="Jurassic-2 Jumbo (178B)",
+        description="Jurassic-2 Jumbo (178B parameters)",
+        tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, AI21_TOKENIZER_TAG],
+    ),
+    Model(
+        group="jurassic",
+        creator_organization="AI21 Labs",
+        name="ai21/j2-grande",
+        display_name="Jurassic-2 Grande (17B)",
+        description="Jurassic-2 Grande (17B parameters) with a few tweaks to the training process.",
+        tags=[TEXT_MODEL_TAG, AI21_WIDER_CONTEXT_WINDOW_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, AI21_TOKENIZER_TAG],
+    ),
+    Model(
+        group="jurassic",
+        creator_organization="AI21 Labs",
+        name="ai21/j2-large",
+        display_name="Jurassic-2 Large (7.5B)",
+        description="Jurassic-2 Large (7.5B parameters)",
+        tags=[TEXT_MODEL_TAG, AI21_WIDER_CONTEXT_WINDOW_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, AI21_TOKENIZER_TAG],
+    ),
     # Aleph Alpha's Luminous models: https://docs.aleph-alpha.com/docs/introduction/luminous
     Model(
         group="luminous",
@@ -250,6 +281,24 @@ ALL_MODELS = [
         description="Cohere small v20220720 (410M parameters)",
         tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, COHERE_TOKENIZER_TAG],
     ),
+    Model(
+        group="cohere",
+        creator_organization="Cohere",
+        name="cohere/command-medium-beta",
+        display_name="Cohere Command beta (6.1B)",
+        description="Cohere Command beta (6.1B parameters) is fine-tuned from the medium model "
+        "to respond well with instruction-like prompts",
+        tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, COHERE_TOKENIZER_TAG],
+    ),
+    Model(
+        group="cohere",
+        creator_organization="Cohere",
+        name="cohere/command-xlarge-beta",
+        display_name="Cohere Command beta (52.4B)",
+        description="Cohere Command beta (52.4B parameters) is fine-tuned from the XL model "
+        "to respond well with instruction-like prompts",
+        tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, COHERE_TOKENIZER_TAG],
+    ),
     # EleutherAI
     Model(
         group="together",
@@ -323,6 +372,15 @@ ALL_MODELS = [
         # Does not support echo=True
         tags=[TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG],
     ),
+    Model(
+        group="together",
+        creator_organization="Google",
+        name="together/flan-t5-xxl",
+        display_name="Flan-T5 (11B)",
+        description="Flan-T5 (11B parameters) is T5 fine-tuned on 1.8K tasks.",
+        # Does not support echo=True
+        tags=[TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG],
+    ),
     Model(
         group="together",
         creator_organization="Google",
@@ -498,7 +556,21 @@ ALL_MODELS = [
         description="Code model that is a stronger, multilingual version of the Codex (12B) model in the paper.",
         tags=[CODE_MODEL_TAG, GPT2_TOKENIZER_TAG],
     ),
-    # ChatGPT - https://openai.com/blog/chatgpt
+    # ChatGPT: https://openai.com/blog/chatgpt
+    Model(
+        group="gpt3",
+        creator_organization="OpenAI",
+        name="openai/gpt-3.5-turbo-0301",
+        display_name="gpt-3.5-turbo-0301",
+        # https://platform.openai.com/docs/models/gpt-3-5
+        description="Sibling model of text-davinci-003 is optimized for chat but works well "
+        "for traditional completions tasks as well. Snapshot from 2023-03-01.",
+        # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable
+        # sequence length is smaller at 4087 with one user input message and one assistant
+        # output message because ChatGPT uses special tokens for message roles and boundaries.
+        # We use a rounded-down sequence length of 4000 to account for these special tokens.
+        tags=[TEXT_MODEL_TAG, WIDER_CONTEXT_WINDOW_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, GPT2_TOKENIZER_TAG],
+    ),
     Model(
         group="gpt3",
         creator_organization="OpenAI",
@@ -550,6 +622,14 @@ ALL_MODELS = [
         description="GPT-JT (6B parameters) is a fork of GPT-J",
         tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, GPTJ_TOKENIZER_TAG],
     ),
+    Model(
+        group="together",
+        creator_organization="Together",
+        name="together/gpt-neoxt-chat-base-20b",
+        display_name="GPT-NeoXT-Chat-Base (20B)",
+        description="GPT-NeoXT-Chat-Base (20B parameters) is a fork of GPT-NeoX",
+        tags=[TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, CHATML_MODEL_TAG, GPTNEO_TOKENIZER_TAG],
+    ),
     # Tsinghua
     Model(
         group="together",

{crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{crfm_helm-0.2.1.dist-info → crfm_helm-0.2.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

crfm-helm 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

crfm-helm 0.2.1py3-none-any.whl → 0.2.2py3-none-any.whl