PyPI - crfm-helm - Versions diffs - 0.5.7__py3-none-any.whl → 0.5.9__py3-none-any.whl - Mend

crfm-helm 0.5.7py3-none-any.whl → 0.5.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (333) hide show

helm/benchmark/scenarios/audio_language/ultra_suite_classification_scenario.py CHANGED Viewed

@@ -1,7 +1,7 @@
-from typing import List, Tuple
+from typing import List
 import os
-import json
+from datasets import load_dataset
 from tqdm import tqdm
 from helm.benchmark.scenarios.scenario import (
@@ -14,41 +14,7 @@ from helm.benchmark.scenarios.scenario import (
     Output,
 )
 from helm.common.media_object import MediaObject, MultimediaObject
-from huggingface_hub import snapshot_download
-def find_audio_json_pairs(directory: str) -> List[Tuple[str, str]]:
-    """
-    Find all pairs of MP3 and JSON files in the given directory and its subdirectories.
-    Each pair consists of an MP3 file and its corresponding JSON file with the same base name.
-    Args:
-        directory: Path to the directory containing the files
-    Returns:
-        List of tuples where each tuple contains (mp3_path, json_path)
-    """
-    pairs = []
-    # Walk through all directories and subdirectories
-    for root, _, files in os.walk(directory):
-        # Get all MP3 files in current directory
-        mp3_files = [f for f in files if f.endswith(".mp3")]
-        for mp3_file in mp3_files:
-            base_name = os.path.splitext(mp3_file)[0]
-            json_file = f"{base_name}.json"
-            # Check if corresponding JSON file exists in the same directory
-            if json_file in files:
-                mp3_path = os.path.join(root, mp3_file)
-                json_path = os.path.join(root, json_file)
-                pairs.append((mp3_path, json_path))
-    if len(pairs) == 0:
-        raise ValueError(f"No pairs of MP3 and JSON files found in {directory}")
-    return pairs
+from helm.common.audio_utils import ensure_audio_file_exists_from_array
 class UltraSuiteClassificationScenario(Scenario):
@@ -72,44 +38,39 @@ class UltraSuiteClassificationScenario(Scenario):
         - Audio files (e.g., .mp3)
         - A JSON file with annotations containing 'answer' field
         """
+        audio_save_dir = os.path.join(output_path, "audio_files")
+        os.makedirs(audio_save_dir, exist_ok=True)
-        print("Downloading SAA-Lab/SLPHelmManualLabels dataset...")
-        data_path = snapshot_download(
-            repo_id="SAA-Lab/SLPHelmManualLabels",
-            repo_type="dataset",
-            revision="38c2d7dab831acf8ccff0ca6f6463d6a8a0184ed",
-        )
+        print("Downloading SAA-Lab/SLPHelmUltraSuitePlus dataset...")
+        dataset = load_dataset("SAA-Lab/SLPHelmUltraSuitePlus")
         instances: List[Instance] = []
         split: str = TEST_SPLIT
-        # Find all pairs of audio and JSON files
-        pairs = find_audio_json_pairs(data_path)
-        print(f"Num pairs: {len(pairs)}")
+        for idx, row in enumerate(tqdm(dataset["train"])):
-        for audio_path, json_path in tqdm(pairs):
             # Load the annotation
-            with open(json_path, "r") as f:
-                annotation = json.load(f)
+            label = row["disorder_class"]
+            transcription = row["transcription"]
+            unique_id = str(idx)
+            local_audio_name = f"{label}_{unique_id}.mp3"
+            local_audio_path = os.path.join(audio_save_dir, local_audio_name)
+            ensure_audio_file_exists_from_array(local_audio_path, row["audio"]["array"], row["audio"]["sampling_rate"])
-            # Get the correct answer and convert to label
-            answer = annotation["disorder_class"]
-            words = annotation["transcription"]
             # Create references for each option
             references: List[Reference] = []
-            correct_label = 0
-            for option in ["typically_developing", "speech_disorder"]:
-                reference = Reference(Output(text=option), tags=[CORRECT_TAG] if option == answer else [])
-                references.append(reference)
-                if option == answer:
-                    correct_label += 1
-            if correct_label == 0:
+            options = ["typically_developing", "speech_disorder"]
+            if label not in options:
                 continue
+            for option in options:
+                reference = Reference(Output(text=option), tags=[CORRECT_TAG] if option == label else [])
+                references.append(reference)
             # Create the input with audio and instruction
             content = [
-                MediaObject(content_type="audio/mpeg", location=audio_path),
-                MediaObject(content_type="text/plain", text=self.get_instruction(words)),
+                MediaObject(content_type="audio/mpeg", location=local_audio_path),
+                MediaObject(content_type="text/plain", text=self.get_instruction(transcription)),
             ]
             input = Input(multimedia_content=MultimediaObject(content))

helm/benchmark/scenarios/audio_language/ultra_suite_disorder_breakdown_scenario.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import List
-import json
+import os
+from datasets import load_dataset
 from tqdm import tqdm
 from helm.benchmark.scenarios.scenario import (
@@ -13,8 +14,7 @@ from helm.benchmark.scenarios.scenario import (
     Output,
 )
 from helm.common.media_object import MediaObject, MultimediaObject
-from huggingface_hub import snapshot_download
-from .ultra_suite_classification_scenario import find_audio_json_pairs
+from helm.common.audio_utils import ensure_audio_file_exists_from_array
 class UltraSuiteDisorderBreakdownScenario(Scenario):
@@ -38,46 +38,38 @@ class UltraSuiteDisorderBreakdownScenario(Scenario):
         - Audio files (e.g., .mp3)
         - A JSON file with annotations containing 'disorder_class' field
         """
-        print("Downloading SAA-Lab/SLPHelmManualLabels dataset...")
-        data_path = snapshot_download(
-            repo_id="SAA-Lab/SLPHelmManualLabels",
-            repo_type="dataset",
-            revision="38c2d7dab831acf8ccff0ca6f6463d6a8a0184ed",
-        )
+        audio_save_dir = os.path.join(output_path, "audio_files")
+        os.makedirs(audio_save_dir, exist_ok=True)
+        print("Downloading SAA-Lab/SLPHelmUltraSuitePlus dataset...")
+        dataset = load_dataset("SAA-Lab/SLPHelmUltraSuitePlus")
         instances: List[Instance] = []
         split: str = TEST_SPLIT
-        # Find all pairs of audio and JSON files
-        pairs = find_audio_json_pairs(data_path)
-        print(f"Num pairs: {len(pairs)}")
-        for audio_path, json_path in tqdm(pairs):
+        for idx, row in enumerate(tqdm(dataset["train"])):
             # Load the annotation
-            with open(json_path, "r") as f:
-                annotation = json.load(f)
+            label = row["disorder_type"]
+            transcription = row["transcription"]
-            # Get the correct answer and convert to label
-            if "disorder_type" not in annotation or "transcription" not in annotation:
-                continue
-            label = annotation["disorder_type"]
-            prompt = annotation["transcription"]
+            unique_id = str(idx)
+            local_audio_name = f"{label}_{unique_id}.mp3"
+            local_audio_path = os.path.join(audio_save_dir, local_audio_name)
+            ensure_audio_file_exists_from_array(local_audio_path, row["audio"]["array"], row["audio"]["sampling_rate"])
             # Create references for each option
             references: List[Reference] = []
-            correct_label = 0
-            for option in ["typically_developing", "articulation", "phonological"]:
+            options = ["typically_developing", "articulation", "phonological"]
+            if label not in options:
+                continue
+            for option in options:
                 reference = Reference(Output(text=option), tags=[CORRECT_TAG] if option == label else [])
                 references.append(reference)
-                if option == label:
-                    correct_label += 1
-            if correct_label == 0:
-                continue
             # Create the input with audio and instruction
             content = [
-                MediaObject(content_type="audio/mpeg", location=audio_path),
-                MediaObject(content_type="text/plain", text=self.get_instruction(prompt)),
+                MediaObject(content_type="audio/mpeg", location=local_audio_path),
+                MediaObject(content_type="text/plain", text=self.get_instruction(transcription)),
             ]
             input = Input(multimedia_content=MultimediaObject(content))

helm/benchmark/scenarios/audio_language/ultra_suite_disorder_symptoms_scenario.py CHANGED Viewed

@@ -1,7 +1,7 @@
-from typing import List, Tuple
+from typing import List
 import os
-import json
+from datasets import load_dataset
 from tqdm import tqdm
 from helm.benchmark.scenarios.scenario import (
@@ -14,38 +14,7 @@ from helm.benchmark.scenarios.scenario import (
     Output,
 )
 from helm.common.media_object import MediaObject, MultimediaObject
-from huggingface_hub import snapshot_download
-def find_audio_json_pairs(directory: str) -> List[Tuple[str, str]]:
-    """
-    Find all pairs of MP3 and JSON files in the given directory and its subdirectories.
-    Each pair consists of an MP3 file and its corresponding JSON file with the same base name.
-    Args:
-        directory: Path to the directory containing the files
-    Returns:
-        List of tuples where each tuple contains (mp3_path, json_path)
-    """
-    pairs = []
-    # Walk through all directories and subdirectories
-    for root, _, files in os.walk(directory):
-        # Get all MP3 files in current directory
-        mp3_files = [f for f in files if f.endswith(".mp3")]
-        for mp3_file in mp3_files:
-            base_name = os.path.splitext(mp3_file)[0]
-            json_file = f"{base_name}.json"
-            # Check if corresponding JSON file exists in the same directory
-            if json_file in files:
-                mp3_path = os.path.join(root, mp3_file)
-                json_path = os.path.join(root, json_file)
-                pairs.append((mp3_path, json_path))
-    return pairs
+from helm.common.audio_utils import ensure_audio_file_exists_from_array
 class UltraSuiteDisorderSymptomsScenario(Scenario):
@@ -70,45 +39,37 @@ class UltraSuiteDisorderSymptomsScenario(Scenario):
         - Audio files (e.g., .mp3)
         - A JSON file with annotations containing 'answer' field
         """
-        print("Downloading SAA-Lab/SLPHelmManualLabels dataset...")
-        data_path = snapshot_download(
-            repo_id="SAA-Lab/SLPHelmManualLabels",
-            repo_type="dataset",
-            revision="38c2d7dab831acf8ccff0ca6f6463d6a8a0184ed",
-        )
+        audio_save_dir = os.path.join(output_path, "audio_files")
+        os.makedirs(audio_save_dir, exist_ok=True)
+        print("Downloading SAA-Lab/SLPHelmUltraSuitePlus dataset...")
+        dataset = load_dataset("SAA-Lab/SLPHelmUltraSuitePlus")
         instances: List[Instance] = []
         split: str = TEST_SPLIT
-        # Find all pairs of audio and JSON files
-        pairs = find_audio_json_pairs(data_path)
-        for audio_path, json_path in tqdm(pairs):
+        for idx, row in enumerate(tqdm(dataset["train"])):
+            label = row["disorder_symptom"]
+            transcription = row["transcription"]
-            # Load the annotation
-            with open(json_path, "r") as f:
-                annotation = json.load(f)
+            unique_id = str(idx)
+            local_audio_name = f"{label}_{unique_id}.mp3"
+            local_audio_path = os.path.join(audio_save_dir, local_audio_name)
+            ensure_audio_file_exists_from_array(local_audio_path, row["audio"]["array"], row["audio"]["sampling_rate"])
-            # Get the correct answer and convert to label
-            if "disorder_symptom" not in annotation or "transcription" not in annotation:
-                continue
-            label = annotation["disorder_symptom"]
-            prompt = annotation["transcription"]
             # Create references for each option
             references: List[Reference] = []
-            correct_label = 0
-            for option in ["substitution", "omission", "addition", "typically_developing", "stuttering"]:
+            options = ["substitution", "omission", "addition", "typically_developing", "stuttering"]
+            if label not in options:
+                continue
+            for option in options:
                 reference = Reference(Output(text=option), tags=[CORRECT_TAG] if option == label else [])
                 references.append(reference)
-                if option == label:
-                    correct_label += 1
-            if correct_label == 0:
-                continue
             # Create the input with audio and instruction
             content = [
-                MediaObject(content_type="audio/mpeg", location=audio_path),
-                MediaObject(content_type="text/plain", text=self.get_instruction(prompt)),
+                MediaObject(content_type="audio/mpeg", location=local_audio_path),
+                MediaObject(content_type="text/plain", text=self.get_instruction(transcription)),
             ]
             input = Input(multimedia_content=MultimediaObject(content))

helm/benchmark/scenarios/babi_qa_scenario.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 from typing import List
+from helm.benchmark.presentation.taxonomy_info import TaxonomyInfo
 from helm.common.general import ensure_file_downloaded, ensure_directory_exists
 from helm.benchmark.scenarios.scenario import (
     Scenario,
@@ -12,6 +13,7 @@ from helm.benchmark.scenarios.scenario import (
     CORRECT_TAG,
     PassageQuestionInput,
     Output,
+    ScenarioMetadata,
 )
@@ -139,3 +141,16 @@ class BabiQAScenario(Scenario):
                             story.append(fact)
         return instances
+    def get_metadata(self) -> ScenarioMetadata:
+        return ScenarioMetadata(
+            name="babi_qa",
+            display_name="bAbI",
+            description="The bAbI benchmark for measuring understanding and reasoning [(Weston et al., "
+            "2015)](https://arxiv.org/pdf/1502.05698.pdf).",
+            taxonomy=TaxonomyInfo(
+                task="question answering", what="reasoning", when="2015", who="synthetic", language="English"
+            ),
+            main_metric="quasi_exact_match",
+            main_split="test",
+        )

helm/benchmark/scenarios/banking77_scenario.py CHANGED Viewed

@@ -2,6 +2,7 @@ import datasets
 import os
 from typing import List
+from helm.benchmark.presentation.taxonomy_info import TaxonomyInfo
 from helm.benchmark.scenarios.scenario import (
     CORRECT_TAG,
     TEST_SPLIT,
@@ -11,6 +12,7 @@ from helm.benchmark.scenarios.scenario import (
     Reference,
     Input,
     Output,
+    ScenarioMetadata,
 )
 from helm.common.general import ensure_directory_exists
@@ -54,3 +56,22 @@ class Banking77Scenario(Scenario):
                 instance = Instance(input=input, references=references, split=split_name)
                 instances.append(instance)
         return instances
+    def get_metadata(self) -> ScenarioMetadata:
+        return ScenarioMetadata(
+            name="banking77",
+            display_name="BANKING77",
+            short_display_name="BANKING77",
+            description="BANKING77 is a benchmark for intent classification of customer service queries "
+            "in the banking domain [(Casanueva et al., "
+            "2020)](https://aclanthology.org/2020.nlp4convai-1.5/).",
+            taxonomy=TaxonomyInfo(
+                task="text classification",
+                what="customer service queries in the banking domain",
+                when="During or before 2020",
+                who="banking customers",
+                language="English",
+            ),
+            main_metric="quasi_exact_match",
+            main_split="test",
+        )

helm/benchmark/scenarios/bbq_scenario.py CHANGED Viewed

@@ -3,6 +3,7 @@ import os
 import random
 from typing import List, Dict, Tuple
+from helm.benchmark.presentation.taxonomy_info import TaxonomyInfo
 from helm.common.general import ensure_file_downloaded
 from helm.benchmark.scenarios.scenario import (
     Scenario,
@@ -14,6 +15,7 @@ from helm.benchmark.scenarios.scenario import (
     DEFAULT_TEST_SIZE,
     PassageQuestionInput,
     Output,
+    ScenarioMetadata,
 )
 AMBIGUOUS_TAG = "ambiguous"
@@ -237,3 +239,16 @@ class BBQScenario(Scenario):
             instances.append(instance)
         return instances
+    def get_metadata(self) -> ScenarioMetadata:
+        return ScenarioMetadata(
+            name="bbq",
+            display_name="BBQ (Bias Benchmark for Question Answering)",
+            short_display_name="BBQ",
+            description="The Bias Benchmark for Question Answering (BBQ) for measuring social bias in "
+            "question answering in ambiguous and unambigous context [(Parrish et al., "
+            "2022)](https://aclanthology.org/2022.findings-acl.165/).",
+            taxonomy=TaxonomyInfo(task="?", what="n/a", when="n/a", who="n/a", language="synthetic"),
+            main_metric="bbq_accuracy",
+            main_split="test",
+        )

crfm-helm 0.5.7__py3-none-any.whl → 0.5.9__py3-none-any.whl

Potentially problematic release.

crfm-helm 0.5.7py3-none-any.whl → 0.5.9py3-none-any.whl