PyPI - dslighting - Versions diffs - 1.7.1__py3-none-any.whl → 1.7.6__py3-none-any.whl - Mend

dslighting 1.7.1py3-none-any.whl → 1.7.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (352) hide show

mlebench/competitions/movie-review-sentiment-analysis-kernels-only/prepare_val.py ADDED Viewed

@@ -0,0 +1,120 @@
+import shutil
+from pathlib import Path
+from pandas import DataFrame, read_csv
+from sklearn.model_selection import train_test_split
+from mlebench.utils import extract
+def prepare(raw: Path, public: Path, private: Path):
+    """
+    Prepares the dataset by performing two sequential splits.
+    1. Splits the raw data into a main train/test set for the `public` and
+       `private` directories.
+    2. Splits the main training set again to create a smaller train/validation
+       set for the `public_val` and `private_val` directories.
+    """
+    def _split_and_save(
+        data_to_split: DataFrame,
+        test_ratio: float,
+        public_dir: Path,
+        private_dir: Path,
+        random_state: int,
+    ) -> DataFrame:
+        """
+        Helper function to perform a data split, save files to specified
+        directories, and return the resulting training set for a potential
+        subsequent split.
+        """
+        # Ensure output directories exist
+        public_dir.mkdir(parents=True, exist_ok=True)
+        private_dir.mkdir(parents=True, exist_ok=True)
+        # Create train and test splits from the provided dataframe
+        new_train, answers = train_test_split(
+            data_to_split, test_size=test_ratio, random_state=random_state
+        )
+        # Create public test set (unlabeled)
+        new_test = answers.copy()
+        new_test = new_test.drop("Sentiment", axis="columns")
+        # Create sample submission
+        sample_submission = answers[["PhraseId", "Sentiment"]].copy()
+        sample_submission["Sentiment"] = 2
+        # Checks
+        assert new_train["PhraseId"].is_unique, f"PhraseId in new_train ({public_dir.name}) should be unique"
+        assert new_test["PhraseId"].is_unique, f"PhraseId in new_test ({public_dir.name}) should be unique"
+        assert set(new_train["PhraseId"]).isdisjoint(
+            set(new_test["PhraseId"])
+        ), f"PhraseId in new_train and new_test ({public_dir.name}) should be disjoint"
+        assert (
+            new_train.shape[0] + new_test.shape[0] == data_to_split.shape[0]
+        ), "New train and new test should have the same number of rows as the input data"
+        assert (
+            new_train.columns.tolist() == data_to_split.columns.tolist()
+        ), "New train and input data should have the same columns"
+        assert new_test.columns.tolist() == [
+            "PhraseId",
+            "SentenceId",
+            "Phrase",
+        ], "new_test should have columns ['PhraseId', 'SentenceId', 'Phrase']"
+        # Write CSVs to their respective directories
+        answers.to_csv(private_dir / "answers.csv", index=False)
+        new_train.to_csv(public_dir / "train.tsv", index=False, sep="\t")
+        new_test.to_csv(public_dir / "test.tsv", index=False, sep="\t")
+        sample_submission.to_csv(public_dir / "sampleSubmission.csv", index=False)
+        # Zip files
+        shutil.make_archive(str(public_dir / "train.tsv"), "zip", public_dir, "train.tsv")
+        shutil.make_archive(str(public_dir / "test.tsv"), "zip", public_dir, "test.tsv")
+        # Delete unzipped files
+        (public_dir / "train.tsv").unlink()
+        (public_dir / "test.tsv").unlink()
+        return new_train
+    # --- Main script execution starts here ---
+    # Extract the raw data file
+    extract(raw / "train.tsv.zip", raw)
+    old_train = read_csv(raw / "train.tsv", sep="\t")
+    # Define the test ratio for the first split
+    test_ratio_1 = 0.3  # 66293/(156061+66293) = 0.3
+    # --- First Split: Create the original public/private sets ---
+    # This call produces the original, unmodified competition output.
+    # The resulting training set is captured for the second split.
+    train_for_val_split = _split_and_save(
+        data_to_split=old_train,
+        test_ratio=test_ratio_1,
+        public_dir=public,
+        private_dir=private,
+        random_state=0,
+    )
+    # --- Second Split: Create the new validation sets ---
+    # Define the new parallel directories for the validation split
+    public_val = public.parent / "public_val"
+    private_val = private.parent / "private_val"
+    # Calculate the test ratio for the second split. This ensures the new
+    # validation set (`test_val`) is the same size as the original test set.
+    # test_ratio_2 = (size of original test set) / (size of new training set)
+    #              = (total * test_ratio_1) / (total * (1 - test_ratio_1))
+    test_ratio_2 = test_ratio_1 / (1 - test_ratio_1)
+    # Perform the second split on the training data from the first split
+    _split_and_save(
+        data_to_split=train_for_val_split,
+        test_ratio=test_ratio_2,
+        public_dir=public_val,
+        private_dir=private_val,
+        random_state=0,
+    )

mlebench/competitions/multi-modal-gesture-recognition/grade.py ADDED Viewed

@@ -0,0 +1,58 @@
+from typing import List
+import numpy as np
+import pandas as pd
+from Levenshtein import distance
+from mlebench.grade_helpers import InvalidSubmissionError
+def final_edit_distance(y_true: List[int], y_pred: List[int]) -> float:
+    """
+    > The overall score we compute is the sum of the Levenshtein distances for all the lines of the
+      result file compared to the corresponding lines in the truth value file, divided by the total
+      number of gestures in the truth value file. This score is analogous to an error rate.
+      However, it can exceed one.
+      (https://www.kaggle.com/competitions/multi-modal-gesture-recognition/overview)
+    """
+    sum_of_distances = np.sum([distance(a, b) for a, b in zip(y_true, y_pred)])
+    total_num_gestures = sum(len(x) for x in y_true)
+    return sum_of_distances / total_num_gestures
+def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame):
+    if len(submission) != len(answers):
+        raise InvalidSubmissionError("Submission and answers must have the same length")
+    if "Id" not in submission.columns:
+        raise InvalidSubmissionError("Submission must have an 'Id' column")
+    if "Sequence" not in submission.columns:
+        raise InvalidSubmissionError("Submission must have a 'Sequence' column")
+    assert "Id" in answers.columns, "Answers must have 'Id' column"
+    assert "Sequence" in answers.columns, "Answers must have 'Sequence' column"
+    submission = submission.sort_values("Id")
+    answers = answers.sort_values("Id")
+    if (submission["Id"].values != answers["Id"].values).any():
+        raise InvalidSubmissionError("Submission and answers must have the same ids")
+    # Read as strings, convert to list of numbers
+    submission["Sequence"] = submission["Sequence"].astype(str)
+    answers["Sequence"] = answers["Sequence"].astype(str)
+    answers_sequences = [list(map(int, x.split())) for x in answers["Sequence"]]
+    try:
+        submission_sequences = [list(map(int, x.split())) for x in submission["Sequence"]]
+    except ValueError as e:
+        raise InvalidSubmissionError(
+            f"Submission sequences must be integers separated by spaces. Failed to convert Sequence to list of integers: {e}"
+        )
+    return answers_sequences, submission_sequences
+def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
+    y_true, y_pred = prepare_for_metric(submission, answers)
+    return final_edit_distance(y_true, y_pred)

mlebench/competitions/multi-modal-gesture-recognition/prepare.py ADDED Viewed

@@ -0,0 +1,85 @@
+import random
+import shutil
+from pathlib import Path
+import pandas as pd
+def prepare(raw: Path, public: Path, private: Path):
+    """
+    Splits the data in raw into public and private datasets with appropriate test/train splits.
+    Raw dataset has:
+    - Train: training1, training2, training3, training4
+    - Val: validation1, validation2, validation3 (no labels)
+    - Test: (not available)
+    New prepared dataset has:
+    - Train: training1, training2, training3
+    - Val: validation1, validation2, validation3 (no labels)
+    - Test: training4 (renamed to `test.tar.gz`)
+    Furthermore,
+    - We modify the `training.csv` to remove training4 samples
+    - We modify the `test.csv` and `randomPredictions.csv` to include only training4 IDs
+    No other changes. We copy over the remaining files (devel01-40.7z, valid_all_files_combined.7z, sample_code_mmrgc.zip) as-is.
+    """
+    # Unzip the training4 file to get new test IDs
+    shutil.unpack_archive(raw / "training4.tar.gz", raw / "training4")
+    # training4 contains samples like "Sample00300.zip", the ID is the last 4 digits ("0300")
+    test_ids = sorted([fp.stem[-4:] for fp in (raw / "training4").glob("*.zip")])
+    # Update training.csv to remove training4 samples
+    training_df = pd.read_csv(raw / "training.csv", dtype={"Id": str, "Sequence": str})
+    new_training_df = training_df[~training_df["Id"].isin(test_ids)]
+    new_training_df.to_csv(public / "training.csv", index=False)
+    assert len(new_training_df) == len(training_df) - len(
+        test_ids
+    ), f"Expected {len(training_df) - len(test_ids)} samples in training.csv, but got {len(new_training_df)}"
+    # Make private answers
+    answers_df = training_df[training_df["Id"].isin(test_ids)]
+    answers_df.to_csv(private / "test.csv", index=False)
+    assert len(answers_df) == len(
+        test_ids
+    ), f"Expected {len(test_ids)} samples in private/test.csv, but got {len(answers_df)}"
+    # Make new public test.csv
+    test_df = pd.DataFrame({"Id": test_ids})
+    test_df.to_csv(public / "test.csv", index=False)
+    assert len(test_df) == len(
+        test_ids
+    ), f"Expected {len(test_ids)} samples in public/test.csv, but got {len(test_df)}"
+    # Make new public randomPredictions.csv
+    # predictions are random shufflings of numbers 1-20 (no repeats)
+    random.seed(0)
+    preds = []
+    for _ in range(len(test_ids)):
+        pred = " ".join(str(x) for x in random.sample(range(1, 21), 20))
+        preds.append(pred)
+    random_predictions_df = pd.DataFrame({"Id": test_ids, "Sequence": preds})
+    random_predictions_df.to_csv(public / "randomPredictions.csv", index=False)
+    assert len(random_predictions_df) == len(
+        test_ids
+    ), f"Expected {len(test_ids)} samples in public/randomPredictions.csv, but got {len(random_predictions_df)}"
+    # Copy over training4 as new test set
+    shutil.copyfile(src=raw / "training4.tar.gz", dst=public / "test.tar.gz")
+    # Copy over train and validation tars
+    for file in [
+        "training1.tar.gz",
+        "training2.tar.gz",
+        "training3.tar.gz",
+        "validation1.tar.gz",
+        "validation2.tar.gz",
+        "validation3.tar.gz",
+    ]:
+        shutil.copyfile(src=raw / file, dst=public / file)
+    # Copy over the rest of the files
+    for file in ["devel01-40.7z", "valid_all_files_combined.7z", "sample_code_mmrgc.zip"]:
+        shutil.copyfile(src=raw / file, dst=public / file)

mlebench/competitions/multi-modal-gesture-recognition/prepare_val.py ADDED Viewed

@@ -0,0 +1,139 @@
+import random
+import shutil
+from pathlib import Path
+import pandas as pd
+def _create_split(
+    source_df: pd.DataFrame,
+    test_tar_name: str,
+    train_tar_names: list[str],
+    raw_dir: Path,
+    public_dir: Path,
+    private_dir: Path,
+) -> pd.DataFrame:
+    """
+    Helper function to perform a data split based on specified tar files.
+    It unpacks a given test tarball to identify test sample IDs, splits the
+    source dataframe into train/test sets, creates all necessary public and private
+    CSV files, copies the relevant data tarballs, and returns the newly created
+    training dataframe for potential subsequent splits.
+    """
+    # Unpack the test file to get test IDs
+    # The ID is the last 4 digits of the sample filename (e.g., "0300" from "Sample00300.zip")
+    test_data_dir_name = test_tar_name.replace(".tar.gz", "")
+    shutil.unpack_archive(raw_dir / test_tar_name, raw_dir / test_data_dir_name)
+    test_ids = sorted([fp.stem[-4:] for fp in (raw_dir / test_data_dir_name).glob("*.zip")])
+    # Create the new training dataframe for this split
+    new_training_df = source_df[~source_df["Id"].isin(test_ids)]
+    new_training_df.to_csv(public_dir / "training.csv", index=False)
+    assert len(new_training_df) == len(source_df) - len(test_ids)
+    # Make private answers
+    answers_df = source_df[source_df["Id"].isin(test_ids)]
+    answers_df.to_csv(private_dir / "test.csv", index=False)
+    assert len(answers_df) == len(test_ids)
+    # Make new public test.csv (IDs only)
+    test_df = pd.DataFrame({"Id": test_ids})
+    test_df.to_csv(public_dir / "test.csv", index=False)
+    assert len(test_df) == len(test_ids)
+    # Make new public randomPredictions.csv
+    # predictions are random shufflings of numbers 1-20 (no repeats)
+    random.seed(0)
+    preds = []
+    for _ in range(len(test_ids)):
+        pred = " ".join(str(x) for x in random.sample(range(1, 21), 20))
+        preds.append(pred)
+    random_predictions_df = pd.DataFrame({"Id": test_ids, "Sequence": preds})
+    random_predictions_df.to_csv(public_dir / "randomPredictions.csv", index=False)
+    assert len(random_predictions_df) == len(test_ids)
+    # Copy over the designated test set tarball
+    shutil.copyfile(src=raw_dir / test_tar_name, dst=public_dir / "test.tar.gz")
+    # Copy over the designated training tarballs for this split
+    for file in train_tar_names:
+        shutil.copyfile(src=raw_dir / file, dst=public_dir / file)
+    return new_training_df
+def prepare(raw: Path, public: Path, private: Path):
+    """
+    Splits the data in raw into public and private datasets with appropriate test/train splits.
+    Raw dataset has:
+    - Train: training1, training2, training3, training4
+    - Val: validation1, validation2, validation3 (no labels)
+    - Test: (not available)
+    New prepared dataset has:
+    - Train: training1, training2, training3
+    - Val: validation1, validation2, validation3 (no labels)
+    - Test: training4 (renamed to `test.tar.gz`)
+    Furthermore,
+    - We modify the `training.csv` to remove training4 samples
+    - We modify the `test.csv` and `randomPredictions.csv` to include only training4 IDs
+    No other changes. We copy over the remaining files (devel01-40.7z, valid_all_files_combined.7z, sample_code_mmrgc.zip) as-is.
+    """
+    # --- Setup new directories for the validation split ---
+    public_val = public.parent / "public_val"
+    private_val = private.parent / "private_val"
+    public_val.mkdir(exist_ok=True)
+    private_val.mkdir(exist_ok=True)
+    # Load the complete training data manifest
+    full_training_df = pd.read_csv(raw / "training.csv", dtype={"Id": str, "Sequence": str})
+    # --- 1. Create the original public/private split ---
+    # This split is identical to the original script's behavior.
+    # Train set: training1, 2, 3. Test set: training4.
+    original_train_tars = ["training1.tar.gz", "training2.tar.gz", "training3.tar.gz"]
+    original_test_tar = "training4.tar.gz"
+    train_df_for_val_split = _create_split(
+        source_df=full_training_df,
+        test_tar_name=original_test_tar,
+        train_tar_names=original_train_tars,
+        raw_dir=raw,
+        public_dir=public,
+        private_dir=private,
+    )
+    # Copy over validation and other miscellaneous files to the 'public' directory
+    files_to_copy = [
+        "validation1.tar.gz",
+        "validation2.tar.gz",
+        "validation3.tar.gz",
+        "devel01-40.7z",
+        "valid_all_files_combined.7z",
+        "sample_code_mmrgc.zip",
+    ]
+    for file in files_to_copy:
+        shutil.copyfile(src=raw / file, dst=public / file)
+    # --- 2. Create the new public_val/private_val split ---
+    # This second split uses the training data from the first split as its source.
+    # New train set: training1, 2. New test (validation) set: training3.
+    val_train_tars = ["training1.tar.gz", "training2.tar.gz"]
+    val_test_tar = "training3.tar.gz"
+    _create_split(
+        source_df=train_df_for_val_split,
+        test_tar_name=val_test_tar,
+        train_tar_names=val_train_tars,
+        raw_dir=raw,
+        public_dir=public_val,
+        private_dir=private_val,
+    )
+    # Copy over validation and other files to 'public_val' to mirror the 'public' structure
+    for file in files_to_copy:
+        shutil.copyfile(src=raw / file, dst=public_val / file)

mlebench/competitions/my-custom-task-01/prepare.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from pathlib import Path
2	+ def prepare(r, pub, priv): return pub

mlebench/competitions/new-my-task-01/prepare.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from pathlib import Path
2	+ def prepare(r, pub, priv): return pub

mlebench/competitions/new-my-task-03/grade.py ADDED Viewed

@@ -0,0 +1,107 @@
+import os
+from pathlib import Path
+from typing import Any
+import pandas as pd
+# This is a generic LLM-based grader for open-ended tasks.
+# It reads 'rubric.md' from the task directory and evaluates the submission.
+try:
+    from dsat.services.llm import LLMService
+    from dsat.config import LLMConfig
+except ImportError:
+    # Fallback for when running outside of dsat package context
+    import sys
+    sys.path.append(str(Path(__file__).resolve().parent.parent.parent.parent))
+    from dsat.services.llm import LLMService
+    from dsat.config import LLMConfig
+class Report:
+    def __init__(self, score, feedback):
+        self.score = score
+        self.feedback = feedback
+        # Standard fields expected by the framework
+        self.is_lower_better = False
+        self.submission_exists = True
+        self.valid_submission = True
+        self.gold_medal = score >= 0.9
+        self.silver_medal = score >= 0.7
+        self.bronze_medal = score >= 0.5
+        self.above_median = score >= 0.5
+        self.submission_path = ""
+        self.competition_id = "open_ended_task"
+def grade(submission_path: Path, competition: Any) -> Report:
+    """
+    Grades the submission using an LLM Judge against rubric.md.
+    """
+    # 1. Load the Rubric
+    task_dir = competition.raw_dir.parent
+    rubric_path = task_dir / "rubric.md"
+    if not rubric_path.exists():
+        # Fallback if no rubric exists
+        print(f"Warning: Rubric not found at {rubric_path}. Returning default score.")
+        return Report(0.5, "No grading rubric defined.")
+    rubric_content = rubric_path.read_text(encoding="utf-8")
+    # 2. Load the Submission Content (Preview)
+    # Since it's open-ended, the 'submission_path' might be a CSV, code, or just a marker.
+    # We'll try to peek at the output artifacts if possible, or assume the agent's recent work
+    # is what we are grading. Ideally, AIDE produces a submission file.
+    submission_content = "No submission content readable."
+    if submission_path.exists():
+        try:
+            if submission_path.suffix == '.csv':
+                df = pd.read_csv(submission_path)
+                submission_content = f"CSV Submission Preview:\n{df.head().to_markdown()}"
+            else:
+                submission_content = submission_path.read_text(encoding="utf-8")[:2000]
+        except Exception as e:
+            submission_content = f"Error reading submission: {e}"
+    # 3. Setup LLM for Judging
+    # Note: In a real run, we might want to inject the API key securely.
+    # Here we assume environment variables are set (which they are in DSATRunner).
+    try:
+        api_key = os.getenv("API_KEY", "EMPTY")
+        base_url = os.getenv("API_BASE", "https://api.openai.com/v1")
+        model = os.getenv("LLM_MODEL", "gpt-4o")
+        llm = LLMService(LLMConfig(api_key=api_key, api_base=base_url, model=model))
+        prompt = f"""You are an impartial Judge. Evaluate the following submission against the provided Rubric.
+# RUBRIC
+{rubric_content}
+# SUBMISSION CONTENT
+{submission_content}
+# INSTRUCTION
+Assess the submission.
+Output ONLY a float number between 0.0 and 1.0 on the first line.
+On subsequent lines, provide brief feedback.
+"""
+        # Synchronous call wrapper or direct call if possible.
+        # Since grade() is synchronous in standard mlebench, we need a way to run async code.
+        import asyncio
+        response = asyncio.run(llm.achat([{"role": "user", "content": prompt}]))
+        lines = response.strip().split('\n')
+        try:
+            score = float(lines[0].strip())
+        except ValueError:
+            # Fallback if LLM is chatty
+            import re
+            match = re.search(r"(\d+(\.\d+)?)", lines[0])
+            score = float(match.group(1)) if match else 0.5
+        feedback = "\n".join(lines[1:])
+        return Report(score, feedback)
+    except Exception as e:
+        print(f"LLM Judging failed: {e}")
+        return Report(0.0, f"Judging failed: {e}")

mlebench/competitions/new-my-task-03/prepare.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ def prepare(raw, public, private):
2	+ pass

mlebench/competitions/new-york-city-taxi-fare-prediction/grade.py ADDED Viewed

@@ -0,0 +1,28 @@
+from pandas import DataFrame
+from sklearn.metrics import root_mean_squared_error
+from mlebench.grade_helpers import InvalidSubmissionError
+def prepare_for_metric(submission: DataFrame, answers: DataFrame) -> dict:
+    assert "fare_amount" in answers.columns, "Answers should have a fare_amount column"
+    assert "key" in answers.columns, "Answers should have a key column"
+    if "fare_amount" not in submission.columns:
+        raise InvalidSubmissionError("Submission should have a fare_amount column")
+    if "key" not in submission.columns:
+        raise InvalidSubmissionError("Submission should have a key column")
+    # Sort by 'key' to ensure alignment
+    submission = submission.sort_values("key")
+    answers = answers.sort_values("key")
+    y_true = answers["fare_amount"]
+    y_pred = submission["fare_amount"]
+    return y_true, y_pred
+def grade(submission: DataFrame, answers: DataFrame) -> float:
+    y_true, y_pred = prepare_for_metric(submission, answers)
+    return root_mean_squared_error(y_true, y_pred)

mlebench/competitions/new-york-city-taxi-fare-prediction/prepare.py ADDED Viewed

@@ -0,0 +1,44 @@
+import shutil
+from pathlib import Path
+from sklearn.model_selection import train_test_split
+from mlebench.utils import read_csv
+def prepare(raw: Path, public: Path, private: Path):
+    # Create train, test from train split
+    old_train = read_csv(raw / "train.csv")
+    # Train is c. 55M rows, original test is 9914 rows
+    new_train, new_test = train_test_split(old_train, test_size=9914, random_state=0)
+    new_test_without_labels = new_test.drop(columns=["fare_amount"])
+    # Create a sample submission file
+    submission_df = new_test.copy()[["key"]]
+    submission_df["fare_amount"] = 11.35
+    # Write CSVs
+    new_train.to_csv(public / "labels.csv", index=False)
+    new_test_without_labels.to_csv(public / "test.csv", index=False)
+    submission_df.to_csv(public / "sample_submission.csv", index=False)
+    new_test.to_csv(private / "test.csv", index=False)
+    # Copy over other files
+    shutil.copy(raw / "GCP-Coupons-Instructions.rtf", public / "GCP-Coupons-Instructions.rtf")
+    # Checks
+    assert set(new_train["key"]).isdisjoint(
+        set(new_test["key"])
+    ), "Train and test sets share samples!"
+    assert new_test.shape[1] == 8, f"Test set should have 8 columns, but has {new_test.shape[1]}"
+    assert (
+        new_test_without_labels.shape[1] == 7
+    ), f"Test set without labels should have 7 columns, but has {new_test_without_labels.shape[1]}"
+    assert new_train.shape[1] == 8, f"Train set should have 8 columns, but has {new_train.shape[1]}"
+    assert (
+        submission_df.shape[1] == 2
+    ), f"Sample submission should have 2 columns, but has {submission_df.shape[1]}"
+    assert (
+        submission_df.shape[0] == new_test.shape[0]
+    ), f"Sample submission should have {new_test.shape[0]} rows, but has {submission_df.shape[0]}"

dslighting 1.7.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

dslighting 1.7.1py3-none-any.whl → 1.7.6py3-none-any.whl