PyPI - crfm-helm - Versions diffs - 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl - Mend

crfm-helm 0.5.1py3-none-any.whl → 0.5.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (236) hide show

{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/METADATA +41 -57
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/RECORD +197 -152
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/WHEEL +1 -1
helm/benchmark/adaptation/adapter_spec.py +32 -31
helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
helm/benchmark/adaptation/common_adapter_specs.py +2 -0
helm/benchmark/annotation/air_bench_annotator.py +64 -0
helm/benchmark/annotation/annotator_factory.py +6 -0
helm/benchmark/annotation/anthropic_red_team_annotator.py +70 -0
helm/benchmark/annotation/call_center_annotator.py +247 -0
helm/benchmark/annotation/financebench_annotator.py +79 -0
helm/benchmark/annotation/harm_bench_annotator.py +68 -0
helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
helm/benchmark/annotation/live_qa_annotator.py +71 -0
helm/benchmark/annotation/medication_qa_annotator.py +68 -0
helm/benchmark/annotation/model_as_judge.py +45 -0
helm/benchmark/annotation/simple_safety_tests_annotator.py +64 -0
helm/benchmark/annotation/xstest_annotator.py +110 -0
helm/benchmark/augmentations/translate_perturbation.py +1 -0
helm/benchmark/huggingface_registration.py +16 -6
helm/benchmark/metrics/air_bench_metrics.py +56 -0
helm/benchmark/metrics/annotation_metrics.py +108 -0
helm/benchmark/metrics/bhasa_metrics.py +188 -0
helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
helm/benchmark/metrics/code_metrics_helper.py +11 -1
helm/benchmark/metrics/fin_qa_metrics.py +60 -0
helm/benchmark/metrics/fin_qa_metrics_helper.py +398 -0
helm/benchmark/metrics/gpt4v_originality_critique_metrics.py +126 -0
helm/benchmark/metrics/instruction_following_critique_metrics.py +1 -0
helm/benchmark/metrics/live_qa_metrics.py +23 -0
helm/benchmark/metrics/medication_qa_metrics.py +23 -0
helm/benchmark/metrics/prometheus_vision_critique_metrics.py +185 -0
helm/benchmark/metrics/reka_vibe_critique_metrics.py +158 -0
helm/benchmark/metrics/safety_metrics.py +57 -0
helm/benchmark/metrics/summac/model_summac.py +3 -3
helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
helm/benchmark/metrics/unitxt_metrics.py +20 -10
helm/benchmark/metrics/vision_language/emd_utils.py +4 -0
helm/benchmark/metrics/vision_language/image_metrics.py +30 -72
helm/benchmark/metrics/vision_language/image_utils.py +1 -1
helm/benchmark/model_metadata_registry.py +3 -3
helm/benchmark/presentation/schema.py +54 -4
helm/benchmark/presentation/test_run_entry.py +1 -0
helm/benchmark/presentation/test_schema.py +11 -0
helm/benchmark/run.py +31 -2
helm/benchmark/run_expander.py +113 -10
helm/benchmark/run_spec_factory.py +4 -0
helm/benchmark/run_specs/air_bench_run_specs.py +40 -0
helm/benchmark/run_specs/bhasa_run_specs.py +638 -0
helm/benchmark/run_specs/call_center_run_specs.py +152 -0
helm/benchmark/run_specs/classic_run_specs.py +15 -11
helm/benchmark/run_specs/decodingtrust_run_specs.py +11 -9
helm/benchmark/run_specs/experimental_run_specs.py +85 -0
helm/benchmark/run_specs/finance_run_specs.py +110 -0
helm/benchmark/run_specs/safety_run_specs.py +154 -0
helm/benchmark/run_specs/vlm_run_specs.py +251 -57
helm/benchmark/scenarios/air_bench_scenario.py +50 -0
helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
helm/benchmark/scenarios/banking77_scenario.py +51 -0
helm/benchmark/scenarios/bhasa_scenario.py +1798 -0
helm/benchmark/scenarios/call_center_scenario.py +84 -0
helm/benchmark/scenarios/ci_mcqa_scenario.py +80 -0
helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
helm/benchmark/scenarios/entity_data_imputation_scenario.py +8 -2
helm/benchmark/scenarios/ewok_scenario.py +116 -0
helm/benchmark/scenarios/fin_qa_scenario.py +119 -0
helm/benchmark/scenarios/financebench_scenario.py +53 -0
helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
helm/benchmark/scenarios/scenario.py +1 -1
helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
helm/benchmark/scenarios/test_air_bench_scenario.py +27 -0
helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
helm/benchmark/scenarios/test_math_scenario.py +2 -8
helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
helm/benchmark/scenarios/vision_language/bingo_scenario.py +5 -5
helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +13 -2
helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -7
helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -5
helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +44 -13
helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
helm/benchmark/scenarios/vision_language/pairs_scenario.py +7 -6
helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
helm/benchmark/scenarios/vision_language/unicorn_scenario.py +5 -5
helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +98 -0
helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
helm/benchmark/scenarios/xstest_scenario.py +35 -0
helm/benchmark/server.py +1 -6
helm/benchmark/static/schema_air_bench.yaml +3149 -0
helm/benchmark/static/schema_bhasa.yaml +709 -0
helm/benchmark/static/schema_call_center.yaml +232 -0
helm/benchmark/static/schema_classic.yaml +3 -59
helm/benchmark/static/schema_cleva.yaml +768 -0
helm/benchmark/static/schema_decodingtrust.yaml +444 -0
helm/benchmark/static/schema_ewok.yaml +367 -0
helm/benchmark/static/schema_finance.yaml +189 -0
helm/benchmark/static/schema_image2struct.yaml +588 -0
helm/benchmark/static/schema_instruction_following.yaml +3 -52
helm/benchmark/static/schema_lite.yaml +3 -61
helm/benchmark/static/schema_medical.yaml +255 -0
helm/benchmark/static/schema_mmlu.yaml +3 -61
helm/benchmark/static/schema_safety.yaml +247 -0
helm/benchmark/static/schema_tables.yaml +317 -0
helm/benchmark/static/schema_thai.yaml +244 -0
helm/benchmark/static/schema_unitxt.yaml +3 -61
helm/benchmark/static/{schema_vlm.yaml → schema_vhelm.yaml} +304 -298
helm/benchmark/static/schema_vhelm_lite.yaml +4 -59
helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
helm/benchmark/static_build/assets/air-overview-d2e6c49f.png +0 -0
helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
helm/benchmark/static_build/assets/index-58f97dcd.js +10 -0
helm/benchmark/static_build/assets/overview-74aea3d8.png +0 -0
helm/benchmark/static_build/assets/process-flow-bd2eba96.png +0 -0
helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
helm/benchmark/static_build/index.html +2 -2
helm/benchmark/window_services/test_openai_window_service.py +8 -8
helm/clients/ai21_client.py +71 -1
helm/clients/anthropic_client.py +50 -28
helm/clients/auto_client.py +11 -0
helm/clients/client.py +24 -7
helm/clients/cohere_client.py +98 -3
helm/clients/huggingface_client.py +79 -19
helm/clients/nvidia_nim_client.py +35 -0
helm/clients/openai_client.py +11 -5
helm/clients/palmyra_client.py +25 -0
helm/clients/perspective_api_client.py +11 -6
helm/clients/reka_client.py +189 -0
helm/clients/test_client.py +7 -9
helm/clients/test_huggingface_client.py +19 -3
helm/clients/test_together_client.py +72 -2
helm/clients/together_client.py +129 -23
helm/clients/vertexai_client.py +62 -18
helm/clients/vision_language/huggingface_vlm_client.py +1 -0
helm/clients/vision_language/open_flamingo_client.py +1 -2
helm/clients/vision_language/paligemma_client.py +146 -0
helm/clients/vision_language/palmyra_vision_client.py +99 -0
helm/clients/yi_client.py +31 -0
helm/common/critique_request.py +10 -1
helm/common/images_utils.py +25 -0
helm/common/mongo_key_value_store.py +2 -1
helm/common/request.py +16 -0
helm/config/model_deployments.yaml +740 -363
helm/config/model_metadata.yaml +824 -128
helm/config/tokenizer_configs.yaml +207 -10
helm/proxy/critique/model_critique_client.py +32 -4
helm/proxy/example_queries.py +14 -21
helm/proxy/services/server_service.py +2 -3
helm/proxy/token_counters/test_auto_token_counter.py +2 -2
helm/tokenizers/ai21_tokenizer.py +51 -59
helm/tokenizers/auto_tokenizer.py +1 -1
helm/tokenizers/cohere_tokenizer.py +29 -62
helm/tokenizers/huggingface_tokenizer.py +35 -13
helm/tokenizers/test_ai21_tokenizer.py +48 -0
helm/tokenizers/test_cohere_tokenizer.py +39 -0
helm/tokenizers/test_huggingface_tokenizer.py +5 -1
helm/benchmark/static/benchmarking.css +0 -156
helm/benchmark/static/benchmarking.js +0 -1705
helm/benchmark/static/config.js +0 -3
helm/benchmark/static/general.js +0 -122
helm/benchmark/static/images/crfm-logo.png +0 -0
helm/benchmark/static/images/helm-logo-simple.png +0 -0
helm/benchmark/static/images/helm-logo.png +0 -0
helm/benchmark/static/images/language-model-helm.png +0 -0
helm/benchmark/static/images/organizations/ai21.png +0 -0
helm/benchmark/static/images/organizations/anthropic.png +0 -0
helm/benchmark/static/images/organizations/bigscience.png +0 -0
helm/benchmark/static/images/organizations/cohere.png +0 -0
helm/benchmark/static/images/organizations/eleutherai.png +0 -0
helm/benchmark/static/images/organizations/google.png +0 -0
helm/benchmark/static/images/organizations/meta.png +0 -0
helm/benchmark/static/images/organizations/microsoft.png +0 -0
helm/benchmark/static/images/organizations/nvidia.png +0 -0
helm/benchmark/static/images/organizations/openai.png +0 -0
helm/benchmark/static/images/organizations/together.png +0 -0
helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
helm/benchmark/static/images/organizations/yandex.png +0 -0
helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
helm/benchmark/static/index.html +0 -68
helm/benchmark/static/info-icon.png +0 -0
helm/benchmark/static/json-urls.js +0 -69
helm/benchmark/static/plot-captions.js +0 -27
helm/benchmark/static/schema_image2structure.yaml +0 -304
helm/benchmark/static/utils.js +0 -285
helm/benchmark/static_build/assets/index-737eef9e.js +0 -10
helm/benchmark/static_build/assets/index-878a1094.css +0 -1
helm/benchmark/window_services/ai21_window_service.py +0 -247
helm/benchmark/window_services/cohere_window_service.py +0 -101
helm/benchmark/window_services/test_ai21_window_service.py +0 -163
helm/benchmark/window_services/test_cohere_window_service.py +0 -75
helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
helm/benchmark/window_services/test_ice_window_service.py +0 -327
helm/tokenizers/ice_tokenizer.py +0 -30
helm/tokenizers/test_ice_tokenizer.py +0 -57
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/LICENSE +0 -0
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/entry_points.txt +0 -0
{crfm_helm-0.5.1.dist-info → crfm_helm-0.5.3.dist-info}/top_level.txt +0 -0
/helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
/helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
/helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/__init__.py +0 -0
/helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/__init__.py +0 -0
/helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0

helm/benchmark/scenarios/vision_language/unicorn_scenario.py CHANGED Viewed

@@ -40,7 +40,7 @@ class UnicornScenario(Scenario):
     Paper: https://arxiv.org/abs/2311.16101
     """
-    UNICORN_HUGGINGFACE_DATASET_NAME: str = "PahaII/unicorn"
+    UNICORN_HUGGINGFACE_DATASET_URL: str = "https://huggingface.co/datasets/PahaII/unicorn/resolve/main"
     IMAGE_URL: str = "https://huggingface.co/datasets/PahaII/unicorn/resolve/main/images/{image_path}?download=true"
@@ -55,8 +55,8 @@ class UnicornScenario(Scenario):
     name = "unicorn"
     description = (
-        "Evaluate multimodal models on two out-of-distribution scenarios with four subjects"
-        " ([paper](https://arxiv.org/abs/2311.16101))."
+        "Evaluate multimodal models on two out-of-distribution scenarios with four subjects "
+        "([Tu et al., 2023](https://arxiv.org/abs/2311.16101))."
     )
     tags = ["vision-language"]
@@ -72,12 +72,12 @@ class UnicornScenario(Scenario):
         # There is only the test split in Unicorn benchmark
         instances: List[Instance] = []
-        question_data_files = {TEST_SPLIT: f"{self._subject}.json"}
+        question_data_files = {TEST_SPLIT: f"{self.UNICORN_HUGGINGFACE_DATASET_URL}/{self._subject}.json"}
         # Process the test set
         for row in tqdm(
             load_dataset(
-                self.UNICORN_HUGGINGFACE_DATASET_NAME,
+                "json",
                 data_files=question_data_files,
                 split=TEST_SPLIT,
                 cache_dir=output_path,

helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py ADDED Viewed

@@ -0,0 +1,98 @@
+import os.path
+from typing import List
+from datasets import load_dataset
+from tqdm import tqdm
+from helm.benchmark.scenarios.scenario import (
+    CORRECT_TAG,
+    TEST_SPLIT,
+    Instance,
+    Input,
+    Output,
+    Reference,
+    Scenario,
+)
+from helm.common.media_object import MediaObject, MultimediaObject
+from helm.common.general import ensure_directory_exists
+class VibeEvalScenario(Scenario):
+    """
+    Vibe-Eval: A hard evaluation suite for measuring progress of multimodal language models
+    We introduce Vibe-Eval: a new open benchmark and framework for evaluating multimodal chat
+    models. Vibe-Eval consists of 269 visual understanding prompts, including 100 of hard
+    difficulty, complete with gold-standard responses authored by experts. Vibe-Eval is
+    open-ended and challenging with dual objectives: (i) vibe checking multimodal chat models
+    for day-to-day tasks and (ii) rigorously testing and probing the capabilities of present
+    frontier models. Notably, our hard set contains >50% questions that all frontier models
+    answer incorrectly. We also discuss trade-offs between human and automatic evaluation,
+    and show that automatic model evaluation using Reka Core roughly correlates to human judgment.
+    @article{padlewski2024vibe,
+    title={Vibe-Eval: A hard evaluation suite for measuring progress of multimodal language models},
+    author={Padlewski, Piotr and Bain, Max and Henderson, Matthew and Zhu, Zhongkai
+    and Relan, Nishant and Pham, Hai and Ong, Donovan and Aleksiev, Kaloyan and Ormazabal, Aitor
+    and Phua, Samuel and others},
+    journal={arXiv preprint arXiv:2405.02287},
+    year={2024}
+    }
+    Paper: https://arxiv.org/abs/2405.02287
+    """
+    VIBE_EVAL_HUGGINGFACE_DATASET_NAME: str = "RekaAI/VibeEval"
+    SUBJECTS: List[str] = [
+        "difficulty-hard",
+        "difficulty-normal",
+    ]
+    name = "vibe_eval"
+    description = (
+        "Evaluate multimodal models on day-to-day tasks "
+        "([Padlewski et al., 2024](https://arxiv.org/abs/2405.02287))."
+    )
+    tags = ["vision-language", "knowledge", "reasoning"]
+    def __init__(self, subject: str):
+        super().__init__()
+        assert subject in self.SUBJECTS, f"Invalid subject: {subject}"
+        self._subject: str = subject
+    def get_instances(self, output_path: str) -> List[Instance]:
+        images_path: str = os.path.join(output_path, "images")
+        ensure_directory_exists(images_path)
+        instances: List[Instance] = []
+        # Process the test set
+        for row in tqdm(
+            load_dataset(
+                self.VIBE_EVAL_HUGGINGFACE_DATASET_NAME,
+                split=TEST_SPLIT,
+                cache_dir=output_path,
+            )
+        ):
+            if row["category"] != self._subject:
+                continue
+            example_id: str = row["example_id"].replace("/", "-")
+            # Save the image locally
+            local_image_path: str = os.path.join(images_path, f"{example_id}.png")
+            if not os.path.exists(local_image_path):
+                row["image"].convert("RGB").save(local_image_path, "PNG", optimize=True)
+            content: List[MediaObject] = [
+                MediaObject(location=local_image_path, content_type="image/png"),
+                MediaObject(text=row["prompt"], content_type="text/plain"),
+            ]
+            answer: str = row["reference"]
+            instances.append(
+                Instance(
+                    Input(multimedia_content=MultimediaObject(content)),
+                    references=[Reference(Output(text=answer), tags=[CORRECT_TAG])],
+                    split=TEST_SPLIT,
+                )
+            )
+        return instances

helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py CHANGED Viewed

@@ -53,7 +53,7 @@ class VizWizScenario(Scenario):
     name = "viz_wiz"
     description = (
         "Real-world VQA dataset consisting of questions asked by "
-        "people who are blind ([paper](https://arxiv.org/abs/1802.08218))."
+        "people who are blind ([Gurari et al., 2018](https://arxiv.org/abs/1802.08218))."
     )
     tags = ["vision-language", "visual question answering"]

helm/benchmark/scenarios/vision_language/vqa_scenario.py CHANGED Viewed

@@ -55,7 +55,9 @@ class VQAScenario(Scenario):
     }
     name = "vqa"
-    description = "Open-ended questions about images ([paper](https://arxiv.org/abs/1612.00837))."
+    description = (
+        "Open-ended questions about real-world images " "([Goyal et al., 2017](https://arxiv.org/abs/1612.00837))."
+    )
     tags = ["vision-language", "visual question answering"]
     def get_instances(self, output_path: str) -> List[Instance]:

helm/benchmark/scenarios/xstest_scenario.py ADDED Viewed

@@ -0,0 +1,35 @@
+from typing import List
+from datasets import load_dataset
+from .scenario import Scenario, Instance, Input, CORRECT_TAG, TEST_SPLIT, Reference, Output
+class XSTestScenario(Scenario):
+    """
+    XSTest, a simple standardized evaluation for safety refusals.
+    """
+    name = "XSTest"
+    description = "XSTest dataset - safety benchmark"
+    tags = ["safety"]
+    def __init__(self, subject: str = "all"):
+        super().__init__()
+        self.subset = subject
+    def get_instances(self, output_path: str) -> List[Instance]:
+        # read explicit data
+        dataset = load_dataset("walledai/XSTest", split="test", revision="f1d713187c61b6ae64e602d74f0b3d812cc2e8e8")
+        # Read all the instances
+        instances: List[Instance] = []
+        for row in dataset:
+            input = Input(text=row["prompt"])
+            references = []
+            for column_name in ["focus", "type", "note"]:
+                if row[column_name]:
+                    references += [Reference(output=Output(text=row[column_name]), tags=[])]
+            references += [Reference(output=Output(text=row["label"]), tags=[CORRECT_TAG])]
+            instance = Instance(input=input, references=references, split=TEST_SPLIT)
+            instances.append(instance)
+        return instances

helm/benchmark/server.py CHANGED Viewed

@@ -113,11 +113,6 @@ def main():
         default=None,
         help="Experimental: The release to serve. If unset, don't serve a release, and serve the latest suite instead.",
     )
-    parser.add_argument(
-        "--jquery",
-        action="store_true",
-        help="Whether to serve the legacy jQuery frontend instead of the React frontend.",
-    )
     args = parser.parse_args()
     if args.suite and args.release:
@@ -126,7 +121,7 @@ def main():
     # Determine the location of the static directory.
     # This is a hack: it assumes that the static directory has a physical location,
     # which is not always the case (e.g. when using zipimport).
-    static_package_name = "helm.benchmark.static" if args.jquery else "helm.benchmark.static_build"
+    static_package_name = "helm.benchmark.static_build"
     resource_path = resources.files(static_package_name).joinpath("index.html")
     with resources.as_file(resource_path) as resource_filename:
         static_path = str(resource_filename.parent)

crfm-helm 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

Potentially problematic release.

crfm-helm 0.5.1py3-none-any.whl → 0.5.3py3-none-any.whl