PyPI - dslighting - Versions diffs - 1.7.16__py3-none-any.whl → 1.7.20__py3-none-any.whl - Mend

dslighting 1.7.16py3-none-any.whl → 1.7.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

dslighting/__init__.py CHANGED Viewed

@@ -26,7 +26,7 @@ Advanced Usage:
 For more information, see: https://github.com/usail-hkust/dslighting
 """
-__version__ = "1.7.16"
+__version__ = "1.7.20"
 __author__ = "DSLighting Team"
 # Core API classes

dslighting/core/agent.py CHANGED Viewed

@@ -417,16 +417,37 @@ class Agent:
                                         # Resolve paths relative to data_dir (from config.yaml)
                                         # self.data_dir is the parent directory (e.g., /path/to/competitions)
-                                        # config["dataset"]["answers"] is relative path like "bike-sharing-demand/prepared/private/test_answer.csv"
+                                        # config["dataset"]["answers"] is relative path like "bike-sharing-demand/prepared/private/test_answer.csv
                                         answers_rel_path = config.get("dataset", {}).get("answers", "")
                                         answers_path = self.data_dir / answers_rel_path
+                                        # **MANDATORY**: Check for prepared/public and prepared/private structure
+                                        competition_dir = self.data_dir / self.task_id
+                                        prepared_public_dir = competition_dir / "prepared" / "public"
+                                        prepared_private_dir = competition_dir / "prepared" / "private"
+                                        if not prepared_public_dir.exists():
+                                            self.logger.error(f"  ❌ Required directory not found: {prepared_public_dir}")
+                                            self.logger.error(f"  ❌ Tasks must have prepared/public/ directory structure")
+                                            self.logger.error(f"  See: https://github.com/usail-hkust/dslighting for setup instructions")
+                                            return 0.0
+                                        if not prepared_private_dir.exists():
+                                            self.logger.error(f"  ❌ Required directory not found: {prepared_private_dir}")
+                                            self.logger.error(f"  ❌ Tasks must have prepared/private/ directory structure")
+                                            self.logger.error(f"  See: https://github.com/usail-hkust/dslighting for setup instructions")
+                                            return 0.0
+                                        self.logger.info(f"  ✓ Required structure verified:")
+                                        self.logger.info(f"    - prepared/public: {prepared_public_dir}")
+                                        self.logger.info(f"    - prepared/private: {prepared_private_dir}")
                                         if not answers_path.exists():
                                             self.logger.warning(f"  Answers file not found: {answers_path}")
                                             self.logger.warning(f"  Looking for: {answers_path}")
                                             return 0.0
-                                        self.logger.info(f"  Found answers file: {answers_path}")
+                                        self.logger.info(f"  ✓ Found answers file: {answers_path}")
                                         # Import the actual Competition class from mlebench
                                         from mlebench.registry import Competition
@@ -466,13 +487,13 @@ class Agent:
                                             # Default RMSLE grader
                                             grader = Grader(name="rmsle", grade_fn=None)
-                                        # Resolve paths
-                                        competition_dir = self.data_dir / self.task_id
-                                        private_dir = competition_dir / "prepared" / "private"
-                                        public_dir = competition_dir / "prepared" / "public"
+                                        # Resolve paths - use actual prepared directories (already verified above)
                                         raw_dir = competition_dir / "raw"
                                         checksums = competition_dir / "checksums.txt"
                                         leaderboard = competition_dir / "leaderboard.csv"
+                                        # Use the actual prepared directories that we verified exist
+                                        private_dir = prepared_private_dir
+                                        public_dir = prepared_public_dir
                                         # Create placeholder prepare_fn
                                         def dummy_prepare_fn(a, b, c):

dslighting/datasets/__init__.py CHANGED Viewed

@@ -44,6 +44,8 @@ def load_bike_sharing_demand_raw() -> dict:
     This returns the raw data files as DataFrames in a dictionary.
     Use this if you want to access the data directly.
+    Note: Data files are in prepared/public and prepared/private structure.
     Returns:
         Dictionary with keys:
             - 'train': Training DataFrame
@@ -67,12 +69,16 @@ def load_bike_sharing_demand_raw() -> dict:
         )
     data = {}
-    data['train'] = pd.read_csv(data_path / "train.csv")
-    data['test'] = pd.read_csv(data_path / "test.csv")
-    data['sample_submission'] = pd.read_csv(data_path / "sampleSubmission.csv")
+    # Load from prepared/public directory
+    public_dir = data_path / "prepared" / "public"
+    private_dir = data_path / "prepared" / "private"
+    data['train'] = pd.read_csv(public_dir / "train.csv")
+    data['test'] = pd.read_csv(public_dir / "test.csv")
+    data['sample_submission'] = pd.read_csv(public_dir / "sampleSubmission.csv")
-    # Load test_answer if available
-    test_answer_path = data_path / "test_answer.csv"
+    # Load test_answer from prepared/private
+    test_answer_path = private_dir / "test_answer.csv"
     if test_answer_path.exists():
         data['test_answer'] = pd.read_csv(test_answer_path)
@@ -83,11 +89,11 @@ def load_bike_sharing_demand(data_dir: Optional[str] = None) -> dict:
     """
     Load the Bike Sharing Demand dataset as a ready-to-use competition.
-    This creates the proper MLE-Bench directory structure (prepared/public/)
-    in the specified or a temporary directory, and returns the path information.
+    The dataset already follows the MLE-Bench standard structure with
+    prepared/public and prepared/private directories.
     Args:
-        data_dir: Base data directory. If None, uses a temporary directory.
+        data_dir: Base data directory. If None, uses the built-in dataset location.
     Returns:
         Dictionary with keys:
@@ -108,30 +114,33 @@ def load_bike_sharing_demand(data_dir: Optional[str] = None) -> dict:
         ...     data_dir=str(info['data_dir'].parent)
         ... )
     """
-    # Load raw data
+    # Load raw data (from prepared/public and prepared/private structure)
     raw_data = load_bike_sharing_demand_raw()
-    # Create directory structure
+    # Determine data directory
     if data_dir is None:
-        # Create a temporary directory
-        data_dir = Path(tempfile.mkdtemp(prefix="bike_sharing_"))
+        # Use built-in dataset location
+        competition_dir = get_data_path() / "bike-sharing-demand"
     else:
+        # Create directory structure in specified location
         data_dir = Path(data_dir)
+        competition_dir = data_dir / "bike-sharing-demand"
-    # Create competition structure
-    competition_dir = data_dir / "bike-sharing-demand"
-    prepared_dir = competition_dir / "prepared" / "public"
-    prepared_dir.mkdir(parents=True, exist_ok=True)
+        # Create prepared/public and prepared/private structure
+        prepared_dir = competition_dir / "prepared" / "public"
+        prepared_dir.mkdir(parents=True, exist_ok=True)
+        # Copy data files
+        raw_data['train'].to_csv(prepared_dir / "train.csv", index=False)
+        raw_data['test'].to_csv(prepared_dir / "test.csv", index=False)
+        raw_data['sample_submission'].to_csv(prepared_dir / "sampleSubmission.csv", index=False)
-    # Save data files
-    raw_data['train'].to_csv(prepared_dir / "train.csv", index=False)
-    raw_data['test'].to_csv(prepared_dir / "test.csv", index=False)
-    raw_data['sample_submission'].to_csv(prepared_dir / "sampleSubmission.csv", index=False)
+        # Create private directory with answers
+        private_dir = competition_dir / "prepared" / "private"
+        private_dir.mkdir(parents=True, exist_ok=True)
+        raw_data['test_answer'].to_csv(private_dir / "test_answer.csv", index=False)
-    # Create private directory with answers
-    private_dir = competition_dir / "prepared" / "private"
-    private_dir.mkdir(parents=True, exist_ok=True)
-    raw_data['test_answer'].to_csv(private_dir / "test_answer.csv", index=False)
+    prepared_dir = competition_dir / "prepared" / "public"
     return {
         'task_id': 'bike-sharing-demand',

{dslighting-1.7.16.dist-info → dslighting-1.7.20.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dslighting
-Version: 1.7.16
+Version: 1.7.20
 Summary: Simplified API for Data Science Agent Automation
 Author: DSLighting Team
 License: AGPL-3.0

{dslighting-1.7.16.dist-info → dslighting-1.7.20.dist-info}/RECORD RENAMED Viewed

@@ -66,17 +66,17 @@ dsat/workflows/search/aide_workflow.py,sha256=mxIGXcueZGXpv1RXsQJ0YPWtvzICaFQeJo
 dsat/workflows/search/automind_workflow.py,sha256=b2JzqUDnDOt_SQdtAvC0fBCJzgTadLylbpgmpaS63Ls,12573
 dsat/workflows/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dsat/workflows/templates/basic_kaggle_loop.py,sha256=e6YLEpCArgWfKViwoti7SdygHsHp43sqP6VyMqnOJaA,3128
-dslighting/__init__.py,sha256=EB2LmdQzrd-rZB6fEyW7QRG2q58I2X9QonTsMihkii4,5153
+dslighting/__init__.py,sha256=joE_kvnmYWLrHWSePpyTJKwbLy641BO2xPKN_3P_qMA,5153
 dslighting/core/__init__.py,sha256=T4yYs0RQoz6DBarjOk12PeZq9YoPYrfl3Os0CPlzcB0,252
-dslighting/core/agent.py,sha256=qiAqJwrhry3T_HBfEndkghtRCy02uGUiHod4hpLkm9w,38211
+dslighting/core/agent.py,sha256=40f4D33Jba4_GOhDbKT9GfyOmQOz7XjB9lIzLTqp_LE,40009
 dslighting/core/config_builder.py,sha256=JMDW0JFj6PRCeP70D1Td8FYXC1bIupe4H6m8J1caO7Y,10294
 dslighting/core/data_loader.py,sha256=HdcDN-10FYfViu8Z0RSYjA2ne6VXVbfKEOZO7EpxLIc,25003
 dslighting/core/task_detector.py,sha256=xOYAV9yiboC8lDeDIEtxvucaGi6fENfeycrowWs-kP0,16300
-dslighting/datasets/__init__.py,sha256=I0Ctrt9wGhHnjzTAp8mVqoo5IQIAq0qaLWYMMOWsXzg,5355
-dslighting/datasets/bike-sharing-demand/sampleSubmission.csv,sha256=WMJ5URg7EEN9Z1LPW6xwiun4BJDZkcJP923ilvccsvI,142861
-dslighting/datasets/bike-sharing-demand/test.csv,sha256=-paigmJ0767Po8ANQlbRFpQlaZB0Xg_OwqEE6Fq1e-M,109461
-dslighting/datasets/bike-sharing-demand/test_answer.csv,sha256=A-tTMmqn6094FzXHn4bv73xurV5rZD4GKstCpVh8LSk,51199
-dslighting/datasets/bike-sharing-demand/train.csv,sha256=SIaNazhqAdVtsRZmMzLdLOw8IDAs5H_FeLSHhwU2vdE,521358
+dslighting/datasets/__init__.py,sha256=Imn7lnXzrkDkNeL6xrSZziUC9Z7X_3uwvemTJEXeBqc,5812
+dslighting/datasets/bike-sharing-demand/prepared/private/test_answer.csv,sha256=A-tTMmqn6094FzXHn4bv73xurV5rZD4GKstCpVh8LSk,51199
+dslighting/datasets/bike-sharing-demand/prepared/public/sampleSubmission.csv,sha256=WMJ5URg7EEN9Z1LPW6xwiun4BJDZkcJP923ilvccsvI,142861
+dslighting/datasets/bike-sharing-demand/prepared/public/test.csv,sha256=-paigmJ0767Po8ANQlbRFpQlaZB0Xg_OwqEE6Fq1e-M,109461
+dslighting/datasets/bike-sharing-demand/prepared/public/train.csv,sha256=SIaNazhqAdVtsRZmMzLdLOw8IDAs5H_FeLSHhwU2vdE,521358
 dslighting/registry/README.md,sha256=SFAmvPqFyl2dm1mLd3-r94qW9DaIq84OZeQd8wIsw04,12208
 dslighting/registry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dslighting/registry/utils.py,sha256=cPkDpfTrO9RQD0As2YF64GuFjJyjMsYHzdc1v8sJ5go,9808
@@ -2105,7 +2105,7 @@ mlebench/README.md,sha256=tyV4Y8FWJ1ZhJYcEoNYhP2jcndJ9dTbQNyIV6Ubf7TU,3027
 mlebench/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 mlebench/cli.py,sha256=aJzQRGExprtwRGW5l2WVnmYST7ZiEXehxd6IOMf3pYg,7877
 mlebench/data.py,sha256=IUMRgf9zD-A4fQqH4jPR-4d2KPBiUsVM0I9f2FgubKQ,14462
-mlebench/grade.py,sha256=ykG_Ga6dJHtTEoMVX9lVhyPySe2b5iER5MzQ7zgonlQ,8877
+mlebench/grade.py,sha256=TAfw4-IsGHDtKwaYvti697UXiGH0yPkx5FHXJpwRDvk,10826
 mlebench/grade_helpers.py,sha256=ILRjLFBXnRylsY8bxpFns_RsNRwX52qQ90qdQs0hn7Q,9380
 mlebench/metrics.py,sha256=s0Om2rKXJ9hyQYWnh_G8gLAGwKBZxHM01VdP3ZaVH54,2494
 mlebench/registry.py,sha256=8ZLBULzX6p4DrRH3SsWfUIP_Cdk8E_olIJDpHlbkaWw,14335
@@ -2446,8 +2446,8 @@ mlebench/competitions/vinbigdata-chest-xray-abnormalities-detection/prepare_val.
 mlebench/competitions/whale-categorization-playground/grade.py,sha256=Wl1fNvpapAmc_Cdy2Cp68nEqeHB2XKnN91U9t-YLlXQ,1562
 mlebench/competitions/whale-categorization-playground/prepare.py,sha256=huo8fDBcGR413JF8m8Js8l8gkFCZpWL1st7Yd57Rjg8,4199
 mlebench/competitions/whale-categorization-playground/prepare_val.py,sha256=XvdnOvEJyTxovXQsoKLMKA_J6EMdxkpOe_SIgEdoOx8,7486
-dslighting-1.7.16.dist-info/METADATA,sha256=CqHn1grvJF-lyY06Q3GB9-0VKnwpyQSK98MsZBEvPEk,18326
-dslighting-1.7.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dslighting-1.7.16.dist-info/entry_points.txt,sha256=1xqIWL9-EG9t7nkERVeNRtMoRyIQe-6CJZJN-rdCYFQ,91
-dslighting-1.7.16.dist-info/top_level.txt,sha256=cmZU3ri0tz1IjiTKrz85Ih9614QlCrYqSqzlPMgaSM0,25
-dslighting-1.7.16.dist-info/RECORD,,
+dslighting-1.7.20.dist-info/METADATA,sha256=zr0RDfx3rAYi-9fgMzscOXLkG4cp_AB_WcKkkmiySBQ,18326
+dslighting-1.7.20.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dslighting-1.7.20.dist-info/entry_points.txt,sha256=1xqIWL9-EG9t7nkERVeNRtMoRyIQe-6CJZJN-rdCYFQ,91
+dslighting-1.7.20.dist-info/top_level.txt,sha256=cmZU3ri0tz1IjiTKrz85Ih9614QlCrYqSqzlPMgaSM0,25
+dslighting-1.7.20.dist-info/RECORD,,

mlebench/grade.py CHANGED Viewed

@@ -73,9 +73,61 @@ def grade_csv(path_to_submission: Path, competition: Competition) -> Competition
         )
     valid_submission = score is not None
-    competition_leaderboard = get_leaderboard(competition)
-    rank_info = competition.grader.rank_score(score, competition_leaderboard)
-    is_lower_better = competition.grader.is_lower_better(competition_leaderboard)
+    # Helper function to determine is_lower_better from metric name
+    def infer_is_lower_better(metric_name: str) -> bool:
+        """
+        Infer whether lower is better based on metric name.
+        Returns True if lower is better (error metrics), False if higher is better (accuracy metrics).
+        """
+        metric_name_lower = metric_name.lower()
+        # Error/loss metrics (lower is better)
+        error_metrics = [
+            'rmse', 'rmsle', 'mae', 'mse', 'mape',
+            'log_loss', 'crossentropy', 'kld', 'error',
+            'loss', 'distance', 'deviation'
+        ]
+        # Accuracy/score metrics (higher is better)
+        accuracy_metrics = [
+            'accuracy', 'precision', 'recall', 'f1',
+            'auc', 'roc', 'score', 'r2', 'correlation',
+            'iou', 'dice', 'map', 'ndcg'
+        ]
+        for error_metric in error_metrics:
+            if error_metric in metric_name_lower:
+                return True
+        for accuracy_metric in accuracy_metrics:
+            if accuracy_metric in metric_name_lower:
+                return False
+        # Default: assume lower is better for safety
+        return True
+    # Try to get leaderboard, but make it optional
+    try:
+        competition_leaderboard = get_leaderboard(competition)
+        rank_info = competition.grader.rank_score(score, competition_leaderboard)
+        is_lower_better = competition.grader.is_lower_better(competition_leaderboard)
+    except (AssertionError, FileNotFoundError):
+        # Leaderboard not found, infer is_lower_better from grader name
+        logger.warning(f"Leaderboard not found for competition {competition.id}, skipping rank calculation")
+        competition_leaderboard = None
+        rank_info = {
+            "gold_threshold": None,
+            "silver_threshold": None,
+            "bronze_threshold": None,
+            "median_threshold": None,
+            "gold_medal": False,
+            "silver_medal": False,
+            "bronze_medal": False,
+            "above_median": False,
+        }
+        # Infer is_lower_better from grader name
+        is_lower_better = infer_is_lower_better(competition.grader.name)
     return CompetitionReport(
         competition_id=competition.id,

/dslighting/datasets/bike-sharing-demand/{test_answer.csv → prepared/private/test_answer.csv} RENAMED Viewed

File without changes

/dslighting/datasets/bike-sharing-demand/{sampleSubmission.csv → prepared/public/sampleSubmission.csv} RENAMED Viewed

File without changes

/dslighting/datasets/bike-sharing-demand/{test.csv → prepared/public/test.csv} RENAMED Viewed

File without changes

/dslighting/datasets/bike-sharing-demand/{train.csv → prepared/public/train.csv} RENAMED Viewed

File without changes

{dslighting-1.7.16.dist-info → dslighting-1.7.20.dist-info}/WHEEL RENAMED Viewed

File without changes

{dslighting-1.7.16.dist-info → dslighting-1.7.20.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{dslighting-1.7.16.dist-info → dslighting-1.7.20.dist-info}/top_level.txt RENAMED Viewed

File without changes

dslighting 1.7.16__py3-none-any.whl → 1.7.20__py3-none-any.whl

dslighting 1.7.16py3-none-any.whl → 1.7.20py3-none-any.whl