PyPI - crfm-helm - Versions diffs - 0.5.3__py3-none-any.whl → 0.5.5__py3-none-any.whl - Mend - Supply Chain Defender

crfm-helm 0.5.3py3-none-any.whl → 0.5.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (606) hide show

helm/benchmark/scenarios/echr_judgment_classification_scenario.py ADDED Viewed

@@ -0,0 +1,113 @@
+from typing import List, Optional
+import json
+import os
+import re
+from helm.benchmark.scenarios.scenario import (
+    Scenario,
+    Instance,
+    Reference,
+    TRAIN_SPLIT,
+    VALID_SPLIT,
+    TEST_SPLIT,
+    CORRECT_TAG,
+    Input,
+    Output,
+)
+from helm.common.general import ensure_file_downloaded, ensure_directory_exists
+class EchrJudgeScenario(Scenario):
+    """The "Binary Violation" Classification task from the paper Neural Legal Judgment Prediction in English [(Chalkidis et al., 2019)](https://arxiv.org/pdf/1906.02059.pdf).
+    The task is to analyze the description of a legal case from the European Court of Human Rights (ECHR),
+    and classify it as positive if any human rights article or protocol has been violated and negative otherwise.
+    The case text can be very long, which sometimes results in incorrect model output
+    when using zero-shot predictions in many cases.
+    Therefore, have added two trivial cases to the instructions part.
+    Example Prompt:
+        Is the following case a violation of human rights?  (Instructions)
+        Case: Human rights have not been violated.          (Trivial No case in instructions)
+        Answer: No
+        Case: Human rights have been violated.              (Trivial Yes case in instructions)
+        Answer: Yes
+        Case: <TEXT>                                        (In-context examples, if possible)
+        Answer: <Label>                                     (Label is correct answer, Yes or No)
+        ...
+        Case: <TEXT>                                        (Target input text)
+        Answer: <Output>                                    (Output ::= Yes | No)
+    """  # noqa: E501
+    # Names of the tasks we support
+    name = "echr_judgment_classification"
+    description = 'The "Binary Violation" Classification task from the paper Neural Legal Judgment Prediction in English [(Chalkidis et al., 2019)](https://arxiv.org/pdf/1906.02059.pdf).'  # noqa: E501
+    tags = ["classification", "judgement", "legal"]
+    # Dataset file name
+    _DATASET_URL = "https://archive.org/download/ECHR-ACL2019/ECHR_Dataset.zip"
+    # Answer labels
+    YES_ANSWER = "Yes"
+    NO_ANSWER = "No"
+    # Prompt constants (used in adapter)
+    PROMPT_INPUT = "Case"
+    PROMPT_OUTPUT = "Answer"
+    YES_EX = f"\n\n{PROMPT_INPUT}: Human rights have been violated.\n{PROMPT_OUTPUT}: {YES_ANSWER}"
+    NO_EX = f"\n\n{PROMPT_INPUT}: Human rights have not been violated.\n{PROMPT_OUTPUT}: {NO_ANSWER}"
+    INST_EX = f"{NO_EX}{YES_EX}"
+    PROMPT_INST = "Is the following case a violation of human rights?"  # Prompt for instructions
+    PROMPT_INST_WITH_EX = f"{PROMPT_INST}{INST_EX}"  # Prompt for instructions with trivial examples
+    def __init__(self, filter_max_length: Optional[int] = None):
+        """
+        Args:
+            filter_max_length: Int indicating maximum length for documents. Documents longer than
+                               train_filter_max_length tokens (using whitespace tokenization)
+                               will be filtered out.
+        """
+        super().__init__()
+        self.filter_max_length = filter_max_length
+    def count_words(self, text: str) -> int:
+        """Returns the number of words in the text"""
+        return len(re.split(r"\W+", text))
+    def get_instances(self, output_path: str) -> List[Instance]:
+        data_dir = os.path.join(output_path, "data")
+        ensure_directory_exists(data_dir)
+        file_name = self._DATASET_URL.split("/")[-1]
+        download_directory_path = os.path.join(data_dir, self._DATASET_URL)
+        ensure_file_downloaded(
+            source_url=self._DATASET_URL,
+            target_path=download_directory_path,
+            unpack=True,
+            unpack_type="unzip",
+        )
+        source_split_to_helm_split = {"EN_train": TRAIN_SPLIT, "EN_dev": VALID_SPLIT, "EN_test": TEST_SPLIT}
+        instances: List[Instance] = []
+        for source_split, helm_split in source_split_to_helm_split.items():
+            target_data_dir = os.path.join(download_directory_path, source_split)
+            for file_name in os.listdir(target_data_dir):
+                if not file_name.endswith(".json"):
+                    continue
+                file_path = os.path.join(target_data_dir, file_name)
+                with open(file_path, "r") as f:
+                    raw_data = json.load(f)
+                input_text = " ".join(raw_data["TEXT"])
+                if self.filter_max_length is not None and self.count_words(input_text) > self.filter_max_length:
+                    continue
+                answer = self.YES_ANSWER if len(raw_data["VIOLATED_ARTICLES"]) > 0 else self.NO_ANSWER
+                correct_reference = Reference(Output(text=answer), tags=[CORRECT_TAG])
+                instances.append(Instance(input=Input(input_text), references=[correct_reference], split=helm_split))
+        return instances

helm/benchmark/scenarios/ehr_sql_scenario.py ADDED Viewed

@@ -0,0 +1,131 @@
+import os
+import json
+from typing import List
+from helm.benchmark.scenarios.scenario import (
+    CORRECT_TAG,
+    TRAIN_SPLIT,
+    TEST_SPLIT,
+    Input,
+    Instance,
+    Output,
+    Reference,
+    Scenario,
+)
+from helm.common.general import ensure_directory_exists, ensure_file_downloaded
+class EhrSqlScenario(Scenario):
+    """
+    Scenario for the EHR SQL dataset.
+    - Downloads and sets up the EHR SQL dataset.
+    - Ensures the `eicu.sqlite` database is available for evaluation.
+    - Extracts schema from `eicu.sql` to pass it to the LLM.
+    - Includes `value` field as alternative ground truth result.
+    """
+    BASE_URL: str = (
+        "https://raw.githubusercontent.com/glee4810/EHRSQL/e172ec8e61391ecae2d872c8d0ba02a622222f54/dataset/ehrsql/eicu"
+    )
+    DB_URL: str = (
+        "https://github.com/raulista1997/benchmarkdata/raw/c4c252443fa9c52afb6960f53e51be278639bea2/eicu.sqlite"
+    )
+    SQL_SCHEMA_URL: str = (
+        "https://raw.githubusercontent.com/glee4810/EHRSQL/"
+        "e172ec8e61391ecae2d872c8d0ba02a622222f54/dataset/ehrsql/eicu/eicu.sql"
+    )
+    name = "ehr_sql"
+    description = "Given a natural language instruction, generate an SQL query that would be used in clinical research."
+    tags = ["sql", "medical", "reasoning"]
+    def setup_database(self, output_path: str) -> str:
+        """Ensure the SQLite database is downloaded and accessible."""
+        ensure_directory_exists(output_path)
+        db_path = os.path.join(output_path, "eicu.sqlite")
+        ensure_file_downloaded(self.DB_URL, db_path)
+        return db_path
+    def download_sql_schema(self, output_path: str) -> str:
+        """Download the SQL schema file containing table definitions."""
+        ensure_directory_exists(output_path)
+        schema_path = os.path.join(output_path, "eicu.sql")
+        ensure_file_downloaded(self.SQL_SCHEMA_URL, schema_path)
+        return schema_path
+    def extract_schema(self, schema_path: str) -> str:
+        """Extracts and returns `CREATE TABLE` statements from the SQL schema file."""
+        schema_text = []
+        with open(schema_path, "r", encoding="utf-8") as f:
+            lines = f.readlines()
+        inside_create_table = False
+        current_table_def = []
+        for line in lines:
+            line = line.strip()
+            if line.startswith("CREATE TABLE"):
+                inside_create_table = True
+                current_table_def = [line]
+            elif inside_create_table:
+                current_table_def.append(line)
+                if line.endswith(";"):  # End of table definition
+                    inside_create_table = False
+                    schema_text.append("\n".join(current_table_def))
+        return "\n\n".join(schema_text)
+    def download_json(self, split_name: str, output_path: str) -> str:
+        """Download a specific JSON file for the given split (train, valid, test)."""
+        json_path = os.path.join(output_path, f"ehrsql_{split_name}.json")
+        json_url = f"{self.BASE_URL}/{split_name}.json"
+        ensure_file_downloaded(json_url, json_path)
+        return json_path
+    def process_json(self, json_path: str, schema_prompt: str, split: str) -> List[Instance]:
+        """Read and process the JSON file to generate HELM instances."""
+        instances: List[Instance] = []
+        with open(json_path, "r", encoding="utf-8") as f:
+            raw_data = json.load(f)
+            for entry in raw_data:
+                if "question" in entry and "query" in entry:
+                    is_impossible = entry.get("is_impossible", False)
+                    input_text = f"-- Database Schema:\n{schema_prompt}\n\n-- \
+                    Question:\n{entry['question']}\n\n"
+                    instance = Instance(
+                        input=Input(text=input_text),
+                        references=[Reference(Output(text=entry["query"]), tags=[CORRECT_TAG])],
+                        extra_data={
+                            "db_path": "eicu.sqlite",
+                            "value": entry.get("value", {}),
+                            "is_impossible": is_impossible,
+                        },
+                        split=split,
+                    )
+                    instances.append(instance)
+        return instances
+    def get_instances(self, output_path: str) -> List[Instance]:
+        """Download and process the dataset to generate HELM instances."""
+        ensure_directory_exists(output_path)
+        # Ensure SQLite database is downloaded
+        self.setup_database(output_path)
+        # Ensure schema file is downloaded and extract schema
+        schema_path = self.download_sql_schema(output_path)
+        schema_prompt = self.extract_schema(schema_path)
+        # Process dataset
+        splits = {"train": TRAIN_SPLIT, "valid": TEST_SPLIT, "test": TEST_SPLIT}
+        instances: List[Instance] = []
+        for split_name, helm_split in splits.items():
+            json_path = self.download_json(split_name, output_path)
+            instances.extend(self.process_json(json_path, schema_prompt, helm_split))
+        return instances