PyPI - dslighting - Versions diffs - 1.7.1__py3-none-any.whl → 1.7.6__py3-none-any.whl - Mend

dslighting 1.7.1py3-none-any.whl → 1.7.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (352) hide show

mlebench/grade_helpers.py ADDED Viewed

@@ -0,0 +1,235 @@
+"""Helper classes related to grading"""
+import inspect
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Any, Optional, Union
+import pandas as pd
+from mlebench.utils import get_logger, import_fn
+logger = get_logger(__name__)
+class Grader:
+    def __init__(self, name: str, grade_fn: str) -> None:
+        self.name = name
+        self.grade_fn = import_fn(grade_fn)
+        assert isinstance(self.name, str), "Grader name must be a string."
+        assert len(self.name) > 0, "Grader name cannot be empty."
+    def is_lower_better(self, leaderboard: pd.DataFrame) -> bool:
+        """
+        Determines if a lower score is better based on the leaderboard.
+        Returns True if lower scores are better, False otherwise.
+        """
+        scores = leaderboard["score"]
+        top_score = scores.iloc[0]
+        bottom_score = scores.iloc[-1]
+        return bool(top_score < bottom_score)
+    @staticmethod
+    def from_dict(data: dict) -> "Grader":
+        if "name" not in data:
+            data = data.copy()
+            data["name"] = "StandardGrader"
+        return Grader(**data)
+    def __call__(self, submission: pd.DataFrame, answers: Any) -> Optional[float]:
+        """
+        Runs the grading function on a submission, returning the score rounded to 5 decimal places.
+        """
+        try:
+            score = self.grade_fn(submission, answers)
+        except InvalidSubmissionError as e:
+            logger.warning(f"Invalid submission: {e}")
+            return None
+        except Exception as e:
+            try:
+                fpath = inspect.getfile(self.grade_fn)
+                line_number = inspect.getsourcelines(self.grade_fn)[1]
+                fn_info = f"{fpath}:{line_number}"
+            except TypeError:
+                fn_info = str(self.grade_fn)
+            logger.error(f"Unexpected error during grading: {e}. Check {fn_info}")
+            return None
+        rounded_score = round(score, 5)
+        return rounded_score
+    def rank_score(self, score: Optional[float], leaderboard: pd.DataFrame) -> dict:
+        """
+        Ranks a score based on the leaderboard.
+        Returns a dictionary of bools with the following keys:
+        - gold_medal: bool
+        - silver_medal: bool
+        - bronze_medal: bool
+        - above_median: bool
+        - gold_threshold: float
+        - silver_threshold: float
+        - bronze_threshold: float
+        - median_threshold: float
+        """
+        assert "score" in leaderboard.columns, "Leaderboard must have a `score` column."
+        lower_is_better = self.is_lower_better(leaderboard)
+        num_teams = len(leaderboard)
+        scores = leaderboard["score"]
+        def get_score_at_position(position: int) -> float:
+            """
+            Returns the score at the given position in the leaderboard.
+            Raises an IndexError if the position is out of bounds.
+            """
+            if position - 1 >= len(scores) or position < 1:
+                raise IndexError("Position out of bounds in the leaderboard.")
+            return scores.iloc[position - 1]
+        def get_thresholds(num_teams: int) -> tuple[float, float, float, float]:
+            """
+            Returns the thresholds for medals based on kaggle.com/progression
+            Returns a tuple of thresholds in the order of gold, silver, bronze, median
+            """
+            if 1 <= num_teams < 100:
+                gold_threshold = get_score_at_position(max(1, int(num_teams * 0.1)))
+                silver_threshold = get_score_at_position(max(1, int(num_teams * 0.2)))
+                bronze_threshold = get_score_at_position(max(1, int(num_teams * 0.4)))
+            elif 100 <= num_teams < 250:
+                gold_threshold = get_score_at_position(10)
+                silver_threshold = get_score_at_position(max(1, int(num_teams * 0.2)))
+                bronze_threshold = get_score_at_position(max(1, int(num_teams * 0.4)))
+            elif 250 <= num_teams < 1000:
+                gold_threshold = get_score_at_position(10 + int(num_teams * 0.002))
+                silver_threshold = get_score_at_position(50)
+                bronze_threshold = get_score_at_position(100)
+            elif num_teams >= 1000:
+                gold_threshold = get_score_at_position(10 + int(num_teams * 0.002))
+                silver_threshold = get_score_at_position(max(1, int(num_teams * 0.05)))
+                bronze_threshold = get_score_at_position(max(1, int(num_teams * 0.1)))
+            else:
+                raise ValueError("Number of teams in leaderboard must be greater than 0.")
+            median_threshold = scores.median()
+            return (
+                float(gold_threshold),
+                float(silver_threshold),
+                float(bronze_threshold),
+                float(median_threshold),
+            )
+        gold_threshold, silver_threshold, bronze_threshold, median_threshold = get_thresholds(
+            num_teams
+        )
+        if score is None:
+            return {
+                "gold_medal": False,
+                "silver_medal": False,
+                "bronze_medal": False,
+                "above_median": False,
+                "gold_threshold": gold_threshold,
+                "silver_threshold": silver_threshold,
+                "bronze_threshold": bronze_threshold,
+                "median_threshold": median_threshold,
+            }
+        assert isinstance(
+            score, (float, int)
+        ), f"Expected `score` to be a `float` or `int` but got a {type(score)}."
+        gold_medal = score <= gold_threshold if lower_is_better else score >= gold_threshold
+        silver_medal = not gold_medal and (
+            score <= silver_threshold if lower_is_better else score >= silver_threshold
+        )
+        bronze_medal = (
+            not gold_medal
+            and not silver_medal
+            and (score <= bronze_threshold if lower_is_better else score >= bronze_threshold)
+        )
+        above_median = score < median_threshold if lower_is_better else score > median_threshold
+        return {
+            "gold_medal": gold_medal,
+            "silver_medal": silver_medal,
+            "bronze_medal": bronze_medal,
+            "above_median": above_median,
+            "gold_threshold": gold_threshold,
+            "silver_threshold": silver_threshold,
+            "bronze_threshold": bronze_threshold,
+            "median_threshold": median_threshold,
+        }
+@dataclass(frozen=True)
+class CompetitionReport:
+    competition_id: str
+    score: Optional[float]  # Changed from float | None to Optional[float]
+    gold_threshold: float
+    silver_threshold: float
+    bronze_threshold: float
+    median_threshold: float
+    any_medal: bool
+    gold_medal: bool
+    silver_medal: bool
+    bronze_medal: bool
+    above_median: bool
+    submission_exists: bool
+    valid_submission: bool
+    is_lower_better: bool
+    created_at: datetime
+    submission_path: str
+    def to_dict(self) -> dict:
+        # Convert all values to JSON-compatible types explicitly
+        return {
+            "competition_id": self.competition_id,
+            "score": float(self.score) if self.score is not None else None,
+            "gold_threshold": float(self.gold_threshold),
+            "silver_threshold": float(self.silver_threshold),
+            "bronze_threshold": float(self.bronze_threshold),
+            "median_threshold": float(self.median_threshold),
+            "any_medal": bool(self.any_medal),
+            "gold_medal": bool(self.gold_medal),
+            "silver_medal": bool(self.silver_medal),
+            "bronze_medal": bool(self.bronze_medal),
+            "above_median": bool(self.above_median),
+            "submission_exists": bool(self.submission_exists),
+            "valid_submission": bool(self.valid_submission),
+            "is_lower_better": bool(self.is_lower_better),
+            "created_at": self.created_at.isoformat(),  # Serialize datetime
+            "submission_path": self.submission_path,
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> "CompetitionReport":
+        data = data.copy()  # Avoid accidentally mutating the original dictionary
+        typed_data = {
+            "competition_id": data["competition_id"],
+            "score": float(data["score"]) if data["score"] is not None else None,
+            "gold_threshold": float(data["gold_threshold"]),
+            "silver_threshold": float(data["silver_threshold"]),
+            "bronze_threshold": float(data["bronze_threshold"]),
+            "median_threshold": float(data["median_threshold"]),
+            "any_medal": bool(data["any_medal"]),
+            "gold_medal": bool(data["gold_medal"]),
+            "silver_medal": bool(data["silver_medal"]),
+            "bronze_medal": bool(data["bronze_medal"]),
+            "above_median": bool(data["above_median"]),
+            "submission_exists": bool(data["submission_exists"]),
+            "valid_submission": bool(data["valid_submission"]),
+            "is_lower_better": bool(data["is_lower_better"]),
+            "created_at": datetime.fromisoformat(data["created_at"]),
+            "submission_path": data["submission_path"],
+        }
+        return cls(**typed_data)
+class InvalidSubmissionError(Exception):
+    """
+    A custom exception for when the agent submission cannot be graded.
+    """
+    pass

mlebench/metrics.py ADDED Viewed

@@ -0,0 +1,75 @@
+import numpy as np
+def average_precision_at_k(actual: set, predicted: list, k: int):
+    """
+    Computes the average precision at k (AP@k).
+    This function computes the average precision at k between a predicted ranking and a ground truth
+    set.
+    Args:
+        actual : A set of elements that are to be predicted (order doesn't matter)
+        predicted : A list of predicted elements (order does matter, most relevant go first)
+        k : The maximum number of predicted elements
+    Adapted from: https://github.com/benhamner/Metrics/blob/master/Python/ml_metrics/average_precision.py
+    """
+    if len(predicted) > k:
+        predicted = predicted[:k]
+    score = 0.0
+    num_hits = 0.0
+    for i, p in enumerate(predicted):
+        if p in actual and p not in predicted[:i]:
+            num_hits += 1.0
+            score += num_hits / (i + 1.0)
+    if not actual:
+        return 0.0
+    return score / min(len(actual), k)
+def mean_average_precision_at_k(actual: list[set], predicted: list[list], k: int):
+    """
+    Computes the MAP@k
+    Args:
+        actual : a list of sets of the elements that are to be predicted (order doesn't matter)
+        predicted : a list of lists of predicted elements (order does matter, most relevant go first)
+        k : The maximum number of predicted elements
+    """
+    return np.mean([average_precision_at_k(a, p, k) for a, p, in zip(actual, predicted)])
+def dice_coefficient(
+    predicted_mask: np.ndarray, true_mask: np.ndarray, both_empty_value: float = np.nan
+) -> float:
+    """
+    Computes the Dice coefficient between two binary masks (can be multi-dimensional)
+    Args:
+        predicted_mask: A binary numpy array indicating where the segmentation is predicted
+        true_mask: A binary numpy array indicating where the segmentation is
+        both_empty_value: The value to return when both masks are empty
+    """
+    assert (
+        predicted_mask.shape == true_mask.shape
+    ), f"Predicted mask shape {predicted_mask.shape} does not match true mask shape {true_mask.shape}"
+    # Check if both masks are empty
+    if np.sum(predicted_mask) == 0 and np.sum(true_mask) == 0:
+        return both_empty_value
+    # Calculate intersection and union
+    intersection = np.sum(predicted_mask * true_mask)
+    union = np.sum(predicted_mask) + np.sum(true_mask)
+    if union == 0:
+        return both_empty_value
+    # Calculate Dice coefficient
+    dice_coeff = 2 * intersection / union
+    return dice_coeff

mlebench/registry.py ADDED Viewed

@@ -0,0 +1,332 @@
+from dataclasses import dataclass
+import importlib
+from pathlib import Path
+from typing import Callable
+from appdirs import user_cache_dir
+from mlebench.grade_helpers import Grader
+from mlebench.utils import get_logger, get_module_dir, get_repo_dir, import_fn, load_yaml
+logger = get_logger(__name__)
+DEFAULT_DATA_DIR = (Path(user_cache_dir()) / "mle-bench" / "data").resolve()
+@dataclass(frozen=True)
+class Competition:
+    id: str
+    name: str
+    description: str
+    grader: Grader
+    answers: Path
+    gold_submission: Path
+    sample_submission: Path
+    competition_type: str
+    prepare_fn: Callable[[Path, Path, Path], Path]
+    raw_dir: Path
+    private_dir: Path
+    public_dir: Path
+    checksums: Path
+    leaderboard: Path
+    def __post_init__(self):
+        assert isinstance(self.id, str), "Competition id must be a string."
+        assert isinstance(self.name, str), "Competition name must be a string."
+        assert isinstance(self.description, str), "Competition description must be a string."
+        assert isinstance(self.grader, Grader), "Competition grader must be of type Grader."
+        assert isinstance(self.answers, Path), "Competition answers must be a Path."
+        assert isinstance(self.gold_submission, Path), "Gold submission must be a Path."
+        assert isinstance(self.sample_submission, Path), "Sample submission must be a Path."
+        assert isinstance(self.competition_type, str), "Competition type must be a string."
+        assert isinstance(self.checksums, Path), "Checksums must be a Path."
+        assert isinstance(self.leaderboard, Path), "Leaderboard must be a Path."
+        assert len(self.id) > 0, "Competition id cannot be empty."
+        assert len(self.name) > 0, "Competition name cannot be empty."
+        assert len(self.description) > 0, "Competition description cannot be empty."
+        assert len(self.competition_type) > 0, "Competition type cannot be empty."
+    @staticmethod
+    def from_dict(data: dict) -> "Competition":
+        grader = Grader.from_dict(data["grader"])
+        try:
+            return Competition(
+                id=data["id"],
+                name=data["name"],
+                description=data["description"],
+                grader=grader,
+                answers=data["answers"],
+                sample_submission=data["sample_submission"],
+                gold_submission=data["gold_submission"],
+                competition_type=data["competition_type"],
+                prepare_fn=data["prepare_fn"],
+                raw_dir=data["raw_dir"],
+                public_dir=data["public_dir"],
+                private_dir=data["private_dir"],
+                checksums=data["checksums"],
+                leaderboard=data["leaderboard"],
+            )
+        except KeyError as e:
+            raise ValueError(f"Missing key {e} in competition config!")
+class Registry:
+    def __init__(self, data_dir: Path = DEFAULT_DATA_DIR, registry_dir: Path = None):
+        self._data_dir = data_dir.resolve()
+        self._custom_registry_dir = registry_dir.resolve() if registry_dir else None
+        self.mode = 'test'
+    def _coerce_file_import(
+        self, fn_import_string: str, root_dir: Path, competition_id: str
+    ) -> str:
+        if fn_import_string.startswith("file:"):
+            return fn_import_string
+        module_name, fn_name = fn_import_string.split(":")
+        try:
+            importlib.import_module(module_name)
+            return fn_import_string
+        except ModuleNotFoundError as exc:
+            if exc.name != module_name and not module_name.startswith(f"{exc.name}."):
+                raise
+            leaf = module_name.split(".")[-1]
+            file_module = root_dir / competition_id / f"{leaf}.py"
+            if not file_module.exists() and leaf.endswith("_val"):
+                fallback = root_dir / competition_id / f"{leaf[:-4]}.py"
+                if fallback.exists():
+                    file_module = fallback
+            if not file_module.exists():
+                raise
+            return f"file:{file_module}:{fn_name}"
+    def _resolve_competition_root(self, competition_id: str) -> Path:
+        """
+        Resolve where a competition config lives.
+        - Prefer custom registry_dir if provided by user.
+        - Prefer top-level `dabench/` for DABench-prefixed tasks.
+        - Check `data_dir` for user-uploaded tasks.
+        - Fallback to legacy `mlebench/competitions/` for everything else.
+        """
+        # Priority 1: Use custom registry_dir if provided
+        if self._custom_registry_dir:
+            if (self._custom_registry_dir / competition_id / "config.yaml").exists():
+                return self._custom_registry_dir
+            # If custom registry_dir doesn't have config, still try to use it
+            # (user might have custom structure)
+            return self._custom_registry_dir
+        # Priority 2: DABench tasks
+        repo_dir = get_repo_dir()
+        dabench_root = repo_dir / "benchmarks" / "dabench" / "competitions"
+        if competition_id.startswith("dabench-") and (dabench_root / competition_id).exists():
+            return dabench_root
+        # Priority 3: Check if the competition is in the data directory (user uploaded)
+        if (self._data_dir / competition_id / "config.yaml").exists():
+            return self._data_dir
+        if (legacy_root / competition_id).exists():
+            return legacy_root
+        if (dabench_root / competition_id).exists():
+            return dabench_root
+        return legacy_root
+    def set_mode(self, mode: str = 'test'):
+        """Set the mode of the registry.
+        Args:
+            mode: The mode of the registry. Can be 'test' or 'validation'.
+        """
+        assert mode in ['test', 'validation', 'prepare'], "Mode must be in ['test', 'validation', 'prepare']."
+        self.mode = mode
+    def get_competition(self, competition_id: str) -> Competition:
+        """Fetch the competition from the registry."""
+        root_dir = self._resolve_competition_root(competition_id)
+        is_dabench = root_dir.name == "competitions" and root_dir.parent.name == "dabench"
+        config_path = root_dir / competition_id / "config.yaml"
+        config = load_yaml(config_path)
+        checksums_path = root_dir / competition_id / "checksums.yaml"
+        leaderboard_path = root_dir / competition_id / "leaderboard.csv"
+        # Resolve description file. DABench configs may still point to legacy paths.
+        if root_dir.name == "competitions" and root_dir.parent.name == "dabench":
+            description_path = root_dir / competition_id / "description.md"
+        else:
+            # Try to find description relative to competition dir first
+            candidate_desc = root_dir / competition_id / config["description"]
+            if candidate_desc.exists():
+                description_path = candidate_desc
+            else:
+                description_path = get_repo_dir() / config["description"]
+                if not description_path.exists() and config["description"].startswith("mlebench/"):
+                    description_path = get_repo_dir() / "benchmarks" / config["description"]
+        description = description_path.read_text()
+        # Config for different modes
+        base_preparer = config["preparer"]
+        base_answers = config["dataset"]["answers"]
+        base_sample_submission = config["dataset"]["sample_submission"]
+        config_preparer = base_preparer
+        config_answers = base_answers
+        config_sample_submission = base_sample_submission
+        public_folder = 'public'
+        private_folder = 'private'
+        if is_dabench:
+            # DABench evaluation should NOT depend on any prepare logic; always use the
+            # existing prepared/public + prepared/private folders.
+            config_preparer = base_preparer
+            config_answers = base_answers
+            config_sample_submission = base_sample_submission
+            public_folder = "public"
+            private_folder = "private"
+        else:
+            if self.mode == 'prepare':
+                config_preparer = config_preparer.replace('prepare:', 'prepare_val:')
+            elif self.mode == 'validation':
+                config_preparer = config_preparer.replace('prepare:', 'prepare_val:')
+                config_answers = config_answers.replace('/private/', '/private_val/')
+                config_sample_submission = config_sample_submission.replace('/public/', '/public_val/')
+                public_folder = 'public_val'
+                private_folder = 'private_val'
+            # Some benchmarks may not provide *_val splits; if missing, fall back to test artifacts.
+            if self.mode == "validation":
+                data_dir = self.get_data_dir()
+                answers_candidate = data_dir / config_answers
+                sample_candidate = data_dir / config_sample_submission
+                public_candidate = data_dir / competition_id / "prepared" / public_folder
+                private_candidate = data_dir / competition_id / "prepared" / private_folder
+                if not (
+                    answers_candidate.exists()
+                    and sample_candidate.exists()
+                    and public_candidate.exists()
+                    and private_candidate.exists()
+                ):
+                    config_preparer = base_preparer
+                    config_answers = base_answers
+                    config_sample_submission = base_sample_submission
+                    public_folder = "public"
+                    private_folder = "private"
+        # DABench competitions are not importable as Python packages (hyphenated ids).
+        # Convert legacy import strings to file-based imports under the resolved root_dir.
+        if is_dabench:
+            module_str, fn_name = config_preparer.split(":")
+            leaf = module_str.split(".")[-1]  # prepare or grade
+            file_module = root_dir / competition_id / f"{leaf}.py"
+            if not file_module.exists() and leaf.endswith("_val"):
+                fallback = root_dir / competition_id / f"{leaf[:-4]}.py"
+                if fallback.exists():
+                    file_module = fallback
+            config_preparer = f"file:{file_module}:{fn_name}"
+            if "grader" in config and "grade_fn" in config["grader"]:
+                g_module_str, g_fn_name = config["grader"]["grade_fn"].split(":")
+                g_leaf = g_module_str.split(".")[-1]
+                g_file_module = root_dir / competition_id / f"{g_leaf}.py"
+                if not g_file_module.exists() and g_leaf.endswith("_val"):
+                    g_fallback = root_dir / competition_id / f"{g_leaf[:-4]}.py"
+                    if g_fallback.exists():
+                        g_file_module = g_fallback
+                config["grader"]["grade_fn"] = f"file:{g_file_module}:{g_fn_name}"
+        else:
+            config_preparer = self._coerce_file_import(
+                config_preparer, root_dir, competition_id
+            )
+            if "grader" in config and "grade_fn" in config["grader"]:
+                config["grader"]["grade_fn"] = self._coerce_file_import(
+                    config["grader"]["grade_fn"], root_dir, competition_id
+                )
+        if is_dabench:
+            def preparer_fn(raw: Path, public: Path, private: Path) -> Path:
+                logger.info(
+                    "DABench prepare disabled; using existing prepared/public + prepared/private."
+                )
+                return public
+        else:
+            preparer_fn = import_fn(config_preparer)
+        answers = self.get_data_dir() / config_answers
+        gold_submission = answers
+        if "gold_submission" in config["dataset"]:
+            gold_submission = self.get_data_dir() / config["dataset"]["gold_submission"]
+        sample_submission = self.get_data_dir() / config_sample_submission
+        raw_dir = self.get_data_dir() / competition_id / "raw"
+        private_dir = self.get_data_dir() / competition_id / "prepared" / private_folder
+        public_dir = self.get_data_dir() / competition_id / "prepared" / public_folder
+        return Competition.from_dict(
+            {
+                **config,
+                "description": description,
+                "answers": answers,
+                "sample_submission": sample_submission,
+                "gold_submission": gold_submission,
+                "prepare_fn": preparer_fn,
+                "raw_dir": raw_dir,
+                "private_dir": private_dir,
+                "public_dir": public_dir,
+                "checksums": checksums_path,
+                "leaderboard": leaderboard_path,
+            }
+        )
+    def get_competitions_dir(self) -> Path:
+        """Retrieves the competition directory within the registry."""
+        return get_module_dir() / "competitions"
+    def get_splits_dir(self) -> Path:
+        """Retrieves the splits directory within the repository."""
+        return get_repo_dir() / "experiments" / "splits"
+    def get_lite_competition_ids(self) -> list[str]:
+        """List all competition IDs for the lite version (low complexity competitions)."""
+        lite_competitions_file = self.get_splits_dir() / "low.txt"
+        with open(lite_competitions_file, "r") as f:
+            competition_ids = f.read().splitlines()
+        return competition_ids
+    def get_data_dir(self) -> Path:
+        """Retrieves the data directory within the registry."""
+        return self._data_dir
+    def set_data_dir(self, new_data_dir: Path) -> "Registry":
+        """Sets the data directory within the registry."""
+        return Registry(new_data_dir)
+    def list_competition_ids(self) -> list[str]:
+        """List all competition IDs available in the registry, sorted alphabetically."""
+        repo_dir = get_repo_dir()
+        dabench_root = repo_dir / "dabench" / "competitions"
+        search_roots = [repo_dir / "benchmarks" / "mlebench" / "competitions"]
+        if dabench_root.exists():
+            search_roots.append(dabench_root)
+        competition_ids: set[str] = set()
+        for root in search_roots:
+            for cfg in root.rglob("config.yaml"):
+                competition_ids.add(cfg.parent.stem)
+        return sorted(competition_ids)
+registry = Registry()

dslighting 1.7.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

dslighting 1.7.1py3-none-any.whl → 1.7.6py3-none-any.whl