PyPI - dslighting - Versions diffs - 1.1.8__tar.gz → 1.3.1__tar.gz - Mend

dslighting 1.1.8tar.gz → 1.3.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

{dslighting-1.1.8 → dslighting-1.3.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dslighting
-Version: 1.1.8
+Version: 1.3.1
 Summary: Simplified API for Data Science Agent Automation
 Author: DSLighting Team
 License: AGPL-3.0

{dslighting-1.1.8 → dslighting-1.3.1}/dslighting/__init__.py RENAMED Viewed

@@ -26,7 +26,7 @@ Advanced Usage:
 For more information, see: https://github.com/usail-hkust/dslighting
 """
-__version__ = "1.1.8"
+__version__ = "1.3.1"
 __author__ = "DSLighting Team"
 # Core API classes
@@ -60,27 +60,35 @@ def load_data(source, **kwargs):
     return loader.load(source, **kwargs)
-def run_agent(data, **kwargs):
+def run_agent(data=None, task_id=None, data_dir=None, **kwargs):
     """
     Quick one-liner: load data and run with defaults.
     This function creates an Agent with the specified parameters and runs it on the data.
     Args:
-        data: Data source (path, DataFrame, dict, etc.)
+        data: Optional data source (path, DataFrame, dict, etc.)
+        task_id: Task/Competition identifier (e.g., "bike-sharing-demand")
+        data_dir: Base data directory (default: "data/competitions")
         **kwargs: Parameters passed to Agent.__init__ and Agent.run
     Returns:
         AgentResult with output, metrics, and metadata
     Examples:
-        >>> # Simplest usage - all defaults
-        >>> result = dslighting.run_agent("data/titanic")
+        >>> # Recommended: using task_id
+        >>> result = dslighting.run_agent(
+        ...     task_id="bike-sharing-demand",
+        ...     data_dir="data/competitions"
+        ... )
         >>> print(f"Score: {result.score}, Cost: ${result.cost}")
+        >>> # Legacy: using data path
+        >>> result = dslighting.run_agent("data/titanic")
         >>> # With custom parameters
         >>> result = dslighting.run_agent(
-        ...     "data/titanic",
+        ...     task_id="bike-sharing-demand",
         ...     workflow="autokaggle",
         ...     model="gpt-4o"
         ... )
@@ -90,7 +98,7 @@ def run_agent(data, **kwargs):
     agent_params = {}
     # Parameters that should go to run(), not __init__
-    run_only_params = {'task_id', 'output_path', 'description'}
+    run_only_params = {'task_id', 'data_dir', 'output_path', 'description'}
     for key, value in kwargs.items():
         if key in run_only_params:
@@ -98,6 +106,12 @@ def run_agent(data, **kwargs):
         else:
             agent_params[key] = value
+    # Add explicit parameters to run_kwargs
+    if task_id is not None:
+        run_kwargs['task_id'] = task_id
+    if data_dir is not None:
+        run_kwargs['data_dir'] = data_dir
     # Create agent and run
     agent = Agent(**agent_params)
     return agent.run(data, **run_kwargs)

{dslighting-1.1.8 → dslighting-1.3.1}/dslighting/core/agent.py RENAMED Viewed

@@ -161,8 +161,9 @@ class Agent:
     def run(
         self,
-        data: Union[str, Path, dict, pd.DataFrame, LoadedData],
+        data: Union[str, Path, dict, pd.DataFrame, LoadedData] = None,
         task_id: str = None,
+        data_dir: str = None,
         output_path: str = None,
         description: str = None,
         **kwargs
@@ -175,8 +176,12 @@ class Agent:
         result collection.
         Args:
-            data: Data source (path, DataFrame, dict, or LoadedData)
-            task_id: Optional task identifier
+            data: Optional data source (path, DataFrame, dict, or LoadedData).
+                  If not provided, use task_id + data_dir pattern.
+            task_id: Task/Competition identifier (e.g., "bike-sharing-demand").
+                     Required when using MLE benchmark format.
+            data_dir: Base data directory containing competition data.
+                     Default: "data/competitions"
             output_path: Custom output path for results
             description: Optional task description (overrides detected)
             **kwargs: Additional task parameters
@@ -185,22 +190,77 @@ class Agent:
             AgentResult with output, metrics, and metadata
         Examples:
-            >>> result = agent.run("data/titanic")
-            >>> print(f"Score: {result.score}, Cost: ${result.cost}")
+            >>> # Method 1: Recommended - using task_id + data_dir
+            >>> result = agent.run(
+            ...     task_id="bike-sharing-demand",
+            ...     data_dir="data/competitions"
+            ... )
+            >>> # Method 2: Using data path directly
+            >>> result = agent.run("path/to/competition")
+            >>> # Method 3: Using DataFrame
             >>> result = agent.run(df, description="Predict price")
-            >>> predictions = result.output
         """
         # Start timing
         start_time = time.time()
         try:
-            # Load data if not already loaded
-            if not isinstance(data, LoadedData):
+            # ========== New simplified API: task_id + data_dir ==========
+            if task_id:
+                # Set default data_dir if not provided
+                if data_dir is None:
+                    data_dir = "data/competitions"
+                self.logger.info(f"Using MLE benchmark format")
+                self.logger.info(f"  task_id: {task_id}")
+                self.logger.info(f"  data_dir: {data_dir}")
+                # Resolve paths
+                data_dir_path = Path(data_dir).resolve()
+                competition_dir = data_dir_path / task_id
+                # Check if task exists in benchmarks registry
+                benchmark_dir = self._get_default_benchmark_dir()
+                task_registry = benchmark_dir / task_id
+                if not task_registry.exists():
+                    self.logger.warning(
+                        f"Task '{task_id}' not found in benchmark registry: {benchmark_dir}"
+                    )
+                    self.logger.warning(
+                        f"This means the task cannot be auto-graded. "
+                        f"To enable grading, register the task at: {task_registry}"
+                    )
+                else:
+                    self.logger.info(f"  ✓ Task registered: {task_registry}")
+                # Check if data exists
+                if not competition_dir.exists():
+                    raise FileNotFoundError(
+                        f"Data directory not found: {competition_dir}\n"
+                        f"Please ensure data is prepared at: {competition_dir}/prepared/"
+                    )
+                self.logger.info(f"  Data directory: {competition_dir}")
+                # Load data
                 loader = DataLoader()
-                loaded_data = loader.load(data)
+                loaded_data = loader.load(competition_dir)
+            # ========== Legacy API: direct data path ==========
+            elif data is not None:
+                # Load data if not already loaded
+                if not isinstance(data, LoadedData):
+                    loader = DataLoader()
+                    loaded_data = loader.load(data)
+                else:
+                    loaded_data = data
             else:
-                loaded_data = data
+                raise ValueError(
+                    "Either 'task_id' or 'data' must be provided. "
+                    "Example: agent.run(task_id='bike-sharing-demand', data_dir='data/competitions')"
+                )
             # Get task information
             task_detection = loaded_data.task_detection
@@ -365,30 +425,33 @@ class Agent:
             if task_type == "kaggle":
                 # MLE/Kaggle format: needs public_data_dir and output_submission_path
+                # Follow MLEBenchmark pattern: {data_dir}/prepared/public
                 prepared_dir = data_dir / "prepared"
-                if prepared_dir.exists():
-                    public_dir = prepared_dir / "public"
-                    if public_dir.exists():
-                        payload["public_data_dir"] = str(public_dir)
-                    else:
-                        # Fallback: use data_dir as public_data_dir
-                        payload["public_data_dir"] = str(data_dir)
+                public_dir = prepared_dir / "public"
+                # Check if prepared/public exists (MLE format)
+                if public_dir.exists():
+                    payload["public_data_dir"] = str(public_dir.resolve())
+                    self.logger.info(f"Using MLE prepared data: {public_dir.resolve()}")
                 else:
-                    # No prepared dir, use data_dir directly
-                    payload["public_data_dir"] = str(data_dir)
+                    # Fallback: use data_dir directly
+                    payload["public_data_dir"] = str(data_dir.resolve())
+                    self.logger.warning(
+                        f"Prepared data not found at {public_dir}, using data_dir instead"
+                    )
-                # Set output path to workspace with unique ID (like MLEBenchmark does)
+                # Set output path - use simple filename, will be saved in workspace/sandbox
                 if output_path is None:
                     # Extract competition_id from data_dir path if possible
                     competition_id = data_dir.name
                     unique_id = str(uuid.uuid4())[:8]
                     output_filename = f"submission_{competition_id}_{unique_id}.csv"
-                    # Save to workspace directory (not sandbox, workspace is preserved)
-                    workspace_dir = self._get_workspace_dir()
-                    output_path = workspace_dir / output_filename
+                    # Use just the filename - DSAT will save it in workspace/sandbox
+                    output_path = Path(output_filename)
                 payload["output_submission_path"] = str(output_path)
+                self.logger.info(f"Output submission file: {output_path}")
             else:
                 # Other task types: use data_dir
                 payload["data_dir"] = str(data_dir)
@@ -427,6 +490,36 @@ class Agent:
         return workspace_path
+    def _get_default_benchmark_dir(self) -> Path:
+        """
+        Get the default benchmark registry directory.
+        This is where task registration files (grade.py, description.md, etc.) are stored.
+        Default: benchmarks/mlebench/competitions/
+        Returns:
+            Path to benchmark registry directory
+        """
+        # Try to get from config
+        benchmark_dir = None
+        if hasattr(self, 'config') and hasattr(self.config, 'run'):
+            run_config = self.config.run
+            if hasattr(run_config, 'parameters') and run_config.parameters:
+                benchmark_dir = run_config.parameters.get('benchmark_dir')
+        # Fallback to default benchmark directory
+        if benchmark_dir is None:
+            # Use relative path from current working directory
+            # Default: benchmarks/mlebench/competitions/
+            benchmark_dir = "benchmarks/mlebench/competitions"
+        benchmark_path = Path(benchmark_dir).resolve()
+        self.logger.debug(f"Benchmark registry directory: {benchmark_path}")
+        return benchmark_path
     async def _execute_task(
         self,
         task: TaskDefinition,

{dslighting-1.1.8 → dslighting-1.3.1}/dslighting/core/data_loader.py RENAMED Viewed

@@ -269,10 +269,13 @@ class DataLoader:
         description = "MLE competition task"
         if isinstance(source, (str, Path)):
-            path = Path(source)
+            path = Path(source).resolve()  # Convert to absolute path
+            self.logger.info(f"Resolved path: {path}")
             if path.exists():
                 if path.is_dir():
                     data_dir = path
+                    self.logger.info(f"Data directory found: {data_dir}")
                     # Try to load description
                     desc_file = path / "description.md"
                     if desc_file.exists():
@@ -283,6 +286,7 @@ class DataLoader:
                             pass
                 elif path.is_file():
                     data_dir = path.parent
+                    self.logger.info(f"Data directory (from file parent): {data_dir}")
                     # Try to load description from parent directory
                     desc_file = path.parent / "description.md"
                     if desc_file.exists():
@@ -291,6 +295,15 @@ class DataLoader:
                             self.logger.info(f"Loaded description from {desc_file}")
                         except Exception:
                             pass
+            else:
+                self.logger.warning(f"Path does not exist: {path}")
+                # Still use the path even if it doesn't exist (might be created later)
+                if path.is_dir() or not path.suffix:
+                    data_dir = path
+                    self.logger.info(f"Using non-existent path as data directory: {data_dir}")
+                else:
+                    data_dir = path.parent
+                    self.logger.info(f"Using parent of non-existent file: {data_dir}")
         # Create MLE-style detection
         from dslighting.utils.defaults import WORKFLOW_RECOMMENDATIONS

{dslighting-1.1.8 → dslighting-1.3.1}/dslighting/utils/defaults.py RENAMED Viewed

@@ -105,7 +105,7 @@ DEFAULT_CONFIG: Dict[str, Any] = {
         "params": {}
     },
     "run": {
-        "name": "dslighting",  # Fixed name without UID to avoid UUID suffix
+        "name": "dsat_run",  # Use "dsat_run" to let DSATRunner auto-generate: dsat_run_{task_id}_{uid}
         "total_steps": DEFAULT_MAX_ITERATIONS,
         "keep_all_workspaces": DEFAULT_KEEP_ALL_WORKSPACES,
         "keep_workspace_on_failure": DEFAULT_KEEP_WORKSPACE_ON_FAILURE,

{dslighting-1.1.8 → dslighting-1.3.1}/dslighting.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dslighting
-Version: 1.1.8
+Version: 1.3.1
 Summary: Simplified API for Data Science Agent Automation
 Author: DSLighting Team
 License: AGPL-3.0

{dslighting-1.1.8 → dslighting-1.3.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "dslighting"
-version = "1.1.8"
+version = "1.3.1"
 description = "Simplified API for Data Science Agent Automation"
 readme = "README.md"
 requires-python = ">=3.10"

{dslighting-1.1.8 → dslighting-1.3.1}/README.md RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/__init__.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/benchmark/__init__.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/benchmark/benchmark.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/benchmark/datasci.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/benchmark/mle.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/benchmark/sciencebench.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/common/__init__.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/common/constants.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/common/exceptions.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/common/typing.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/config.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/models/__init__.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/models/candidates.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/models/formats.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/models/task.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/operators/__init__.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/operators/aflow_ops.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/operators/autokaggle_ops.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/operators/automind_ops.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/operators/base.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/operators/code.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/operators/dsagent_ops.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/operators/llm_basic.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/prompts/__init__.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/prompts/aflow_prompt.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/prompts/aide_prompt.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/prompts/autokaggle_prompt.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/prompts/automind_prompt.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/prompts/common.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/prompts/data_interpreter_prompt.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/prompts/dsagent_prompt.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/runner.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/__init__.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/data_analyzer.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/llm.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/llm_single.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/sandbox.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/states/__init__.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/states/autokaggle_state.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/states/base.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/states/dsa_log.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/states/experience.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/states/journal.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/states/operator_library.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/vdb.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/workspace.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/tasks/__init__.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/tasks/handlers.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/templates/open_ended/grade_template.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/tools/__init__.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/utils/__init__.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/utils/context.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/utils/dynamic_import.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/utils/parsing.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/__init__.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/base.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/factory.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/manual/__init__.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/manual/autokaggle_workflow.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/manual/data_interpreter_workflow.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/manual/deepanalyze_workflow.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/manual/dsagent_workflow.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/search/__init__.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/search/aflow_workflow.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/search/aide_workflow.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/search/automind_workflow.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/templates/__init__.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/templates/basic_kaggle_loop.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dslighting/core/__init__.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dslighting/core/config_builder.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dslighting/core/task_detector.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dslighting/utils/__init__.py RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dslighting.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dslighting.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dslighting.egg-info/requires.txt RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/dslighting.egg-info/top_level.txt RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/setup.cfg RENAMED Viewed

File without changes

{dslighting-1.1.8 → dslighting-1.3.1}/tests/test_dslighting_api.py RENAMED Viewed

File without changes

dslighting 1.1.8__tar.gz → 1.3.1__tar.gz

dslighting 1.1.8tar.gz → 1.3.1tar.gz