dslighting 1.1.8__tar.gz → 1.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. {dslighting-1.1.8 → dslighting-1.3.1}/PKG-INFO +1 -1
  2. {dslighting-1.1.8 → dslighting-1.3.1}/dslighting/__init__.py +21 -7
  3. {dslighting-1.1.8 → dslighting-1.3.1}/dslighting/core/agent.py +116 -23
  4. {dslighting-1.1.8 → dslighting-1.3.1}/dslighting/core/data_loader.py +14 -1
  5. {dslighting-1.1.8 → dslighting-1.3.1}/dslighting/utils/defaults.py +1 -1
  6. {dslighting-1.1.8 → dslighting-1.3.1}/dslighting.egg-info/PKG-INFO +1 -1
  7. {dslighting-1.1.8 → dslighting-1.3.1}/pyproject.toml +1 -1
  8. {dslighting-1.1.8 → dslighting-1.3.1}/README.md +0 -0
  9. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/__init__.py +0 -0
  10. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/benchmark/__init__.py +0 -0
  11. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/benchmark/benchmark.py +0 -0
  12. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/benchmark/datasci.py +0 -0
  13. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/benchmark/mle.py +0 -0
  14. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/benchmark/sciencebench.py +0 -0
  15. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/common/__init__.py +0 -0
  16. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/common/constants.py +0 -0
  17. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/common/exceptions.py +0 -0
  18. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/common/typing.py +0 -0
  19. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/config.py +0 -0
  20. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/models/__init__.py +0 -0
  21. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/models/candidates.py +0 -0
  22. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/models/formats.py +0 -0
  23. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/models/task.py +0 -0
  24. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/operators/__init__.py +0 -0
  25. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/operators/aflow_ops.py +0 -0
  26. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/operators/autokaggle_ops.py +0 -0
  27. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/operators/automind_ops.py +0 -0
  28. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/operators/base.py +0 -0
  29. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/operators/code.py +0 -0
  30. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/operators/dsagent_ops.py +0 -0
  31. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/operators/llm_basic.py +0 -0
  32. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/prompts/__init__.py +0 -0
  33. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/prompts/aflow_prompt.py +0 -0
  34. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/prompts/aide_prompt.py +0 -0
  35. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/prompts/autokaggle_prompt.py +0 -0
  36. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/prompts/automind_prompt.py +0 -0
  37. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/prompts/common.py +0 -0
  38. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/prompts/data_interpreter_prompt.py +0 -0
  39. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/prompts/dsagent_prompt.py +0 -0
  40. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/runner.py +0 -0
  41. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/__init__.py +0 -0
  42. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/data_analyzer.py +0 -0
  43. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/llm.py +0 -0
  44. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/llm_single.py +0 -0
  45. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/sandbox.py +0 -0
  46. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/states/__init__.py +0 -0
  47. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/states/autokaggle_state.py +0 -0
  48. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/states/base.py +0 -0
  49. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/states/dsa_log.py +0 -0
  50. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/states/experience.py +0 -0
  51. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/states/journal.py +0 -0
  52. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/states/operator_library.py +0 -0
  53. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/vdb.py +0 -0
  54. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/services/workspace.py +0 -0
  55. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/tasks/__init__.py +0 -0
  56. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/tasks/handlers.py +0 -0
  57. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/templates/open_ended/grade_template.py +0 -0
  58. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/tools/__init__.py +0 -0
  59. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/utils/__init__.py +0 -0
  60. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/utils/context.py +0 -0
  61. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/utils/dynamic_import.py +0 -0
  62. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/utils/parsing.py +0 -0
  63. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/__init__.py +0 -0
  64. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/base.py +0 -0
  65. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/factory.py +0 -0
  66. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/manual/__init__.py +0 -0
  67. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/manual/autokaggle_workflow.py +0 -0
  68. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/manual/data_interpreter_workflow.py +0 -0
  69. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/manual/deepanalyze_workflow.py +0 -0
  70. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/manual/dsagent_workflow.py +0 -0
  71. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/search/__init__.py +0 -0
  72. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/search/aflow_workflow.py +0 -0
  73. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/search/aide_workflow.py +0 -0
  74. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/search/automind_workflow.py +0 -0
  75. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/templates/__init__.py +0 -0
  76. {dslighting-1.1.8 → dslighting-1.3.1}/dsat/workflows/templates/basic_kaggle_loop.py +0 -0
  77. {dslighting-1.1.8 → dslighting-1.3.1}/dslighting/core/__init__.py +0 -0
  78. {dslighting-1.1.8 → dslighting-1.3.1}/dslighting/core/config_builder.py +0 -0
  79. {dslighting-1.1.8 → dslighting-1.3.1}/dslighting/core/task_detector.py +0 -0
  80. {dslighting-1.1.8 → dslighting-1.3.1}/dslighting/utils/__init__.py +0 -0
  81. {dslighting-1.1.8 → dslighting-1.3.1}/dslighting.egg-info/SOURCES.txt +0 -0
  82. {dslighting-1.1.8 → dslighting-1.3.1}/dslighting.egg-info/dependency_links.txt +0 -0
  83. {dslighting-1.1.8 → dslighting-1.3.1}/dslighting.egg-info/requires.txt +0 -0
  84. {dslighting-1.1.8 → dslighting-1.3.1}/dslighting.egg-info/top_level.txt +0 -0
  85. {dslighting-1.1.8 → dslighting-1.3.1}/setup.cfg +0 -0
  86. {dslighting-1.1.8 → dslighting-1.3.1}/tests/test_dslighting_api.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dslighting
3
- Version: 1.1.8
3
+ Version: 1.3.1
4
4
  Summary: Simplified API for Data Science Agent Automation
5
5
  Author: DSLighting Team
6
6
  License: AGPL-3.0
@@ -26,7 +26,7 @@ Advanced Usage:
26
26
  For more information, see: https://github.com/usail-hkust/dslighting
27
27
  """
28
28
 
29
- __version__ = "1.1.8"
29
+ __version__ = "1.3.1"
30
30
  __author__ = "DSLighting Team"
31
31
 
32
32
  # Core API classes
@@ -60,27 +60,35 @@ def load_data(source, **kwargs):
60
60
  return loader.load(source, **kwargs)
61
61
 
62
62
 
63
- def run_agent(data, **kwargs):
63
+ def run_agent(data=None, task_id=None, data_dir=None, **kwargs):
64
64
  """
65
65
  Quick one-liner: load data and run with defaults.
66
66
 
67
67
  This function creates an Agent with the specified parameters and runs it on the data.
68
68
 
69
69
  Args:
70
- data: Data source (path, DataFrame, dict, etc.)
70
+ data: Optional data source (path, DataFrame, dict, etc.)
71
+ task_id: Task/Competition identifier (e.g., "bike-sharing-demand")
72
+ data_dir: Base data directory (default: "data/competitions")
71
73
  **kwargs: Parameters passed to Agent.__init__ and Agent.run
72
74
 
73
75
  Returns:
74
76
  AgentResult with output, metrics, and metadata
75
77
 
76
78
  Examples:
77
- >>> # Simplest usage - all defaults
78
- >>> result = dslighting.run_agent("data/titanic")
79
+ >>> # Recommended: using task_id
80
+ >>> result = dslighting.run_agent(
81
+ ... task_id="bike-sharing-demand",
82
+ ... data_dir="data/competitions"
83
+ ... )
79
84
  >>> print(f"Score: {result.score}, Cost: ${result.cost}")
80
85
 
86
+ >>> # Legacy: using data path
87
+ >>> result = dslighting.run_agent("data/titanic")
88
+
81
89
  >>> # With custom parameters
82
90
  >>> result = dslighting.run_agent(
83
- ... "data/titanic",
91
+ ... task_id="bike-sharing-demand",
84
92
  ... workflow="autokaggle",
85
93
  ... model="gpt-4o"
86
94
  ... )
@@ -90,7 +98,7 @@ def run_agent(data, **kwargs):
90
98
  agent_params = {}
91
99
 
92
100
  # Parameters that should go to run(), not __init__
93
- run_only_params = {'task_id', 'output_path', 'description'}
101
+ run_only_params = {'task_id', 'data_dir', 'output_path', 'description'}
94
102
 
95
103
  for key, value in kwargs.items():
96
104
  if key in run_only_params:
@@ -98,6 +106,12 @@ def run_agent(data, **kwargs):
98
106
  else:
99
107
  agent_params[key] = value
100
108
 
109
+ # Add explicit parameters to run_kwargs
110
+ if task_id is not None:
111
+ run_kwargs['task_id'] = task_id
112
+ if data_dir is not None:
113
+ run_kwargs['data_dir'] = data_dir
114
+
101
115
  # Create agent and run
102
116
  agent = Agent(**agent_params)
103
117
  return agent.run(data, **run_kwargs)
@@ -161,8 +161,9 @@ class Agent:
161
161
 
162
162
  def run(
163
163
  self,
164
- data: Union[str, Path, dict, pd.DataFrame, LoadedData],
164
+ data: Union[str, Path, dict, pd.DataFrame, LoadedData] = None,
165
165
  task_id: str = None,
166
+ data_dir: str = None,
166
167
  output_path: str = None,
167
168
  description: str = None,
168
169
  **kwargs
@@ -175,8 +176,12 @@ class Agent:
175
176
  result collection.
176
177
 
177
178
  Args:
178
- data: Data source (path, DataFrame, dict, or LoadedData)
179
- task_id: Optional task identifier
179
+ data: Optional data source (path, DataFrame, dict, or LoadedData).
180
+ If not provided, use task_id + data_dir pattern.
181
+ task_id: Task/Competition identifier (e.g., "bike-sharing-demand").
182
+ Required when using MLE benchmark format.
183
+ data_dir: Base data directory containing competition data.
184
+ Default: "data/competitions"
180
185
  output_path: Custom output path for results
181
186
  description: Optional task description (overrides detected)
182
187
  **kwargs: Additional task parameters
@@ -185,22 +190,77 @@ class Agent:
185
190
  AgentResult with output, metrics, and metadata
186
191
 
187
192
  Examples:
188
- >>> result = agent.run("data/titanic")
189
- >>> print(f"Score: {result.score}, Cost: ${result.cost}")
193
+ >>> # Method 1: Recommended - using task_id + data_dir
194
+ >>> result = agent.run(
195
+ ... task_id="bike-sharing-demand",
196
+ ... data_dir="data/competitions"
197
+ ... )
198
+
199
+ >>> # Method 2: Using data path directly
200
+ >>> result = agent.run("path/to/competition")
190
201
 
202
+ >>> # Method 3: Using DataFrame
191
203
  >>> result = agent.run(df, description="Predict price")
192
- >>> predictions = result.output
193
204
  """
194
205
  # Start timing
195
206
  start_time = time.time()
196
207
 
197
208
  try:
198
- # Load data if not already loaded
199
- if not isinstance(data, LoadedData):
209
+ # ========== New simplified API: task_id + data_dir ==========
210
+ if task_id:
211
+ # Set default data_dir if not provided
212
+ if data_dir is None:
213
+ data_dir = "data/competitions"
214
+
215
+ self.logger.info(f"Using MLE benchmark format")
216
+ self.logger.info(f" task_id: {task_id}")
217
+ self.logger.info(f" data_dir: {data_dir}")
218
+
219
+ # Resolve paths
220
+ data_dir_path = Path(data_dir).resolve()
221
+ competition_dir = data_dir_path / task_id
222
+
223
+ # Check if task exists in benchmarks registry
224
+ benchmark_dir = self._get_default_benchmark_dir()
225
+ task_registry = benchmark_dir / task_id
226
+
227
+ if not task_registry.exists():
228
+ self.logger.warning(
229
+ f"Task '{task_id}' not found in benchmark registry: {benchmark_dir}"
230
+ )
231
+ self.logger.warning(
232
+ f"This means the task cannot be auto-graded. "
233
+ f"To enable grading, register the task at: {task_registry}"
234
+ )
235
+ else:
236
+ self.logger.info(f" ✓ Task registered: {task_registry}")
237
+
238
+ # Check if data exists
239
+ if not competition_dir.exists():
240
+ raise FileNotFoundError(
241
+ f"Data directory not found: {competition_dir}\n"
242
+ f"Please ensure data is prepared at: {competition_dir}/prepared/"
243
+ )
244
+
245
+ self.logger.info(f" Data directory: {competition_dir}")
246
+
247
+ # Load data
200
248
  loader = DataLoader()
201
- loaded_data = loader.load(data)
249
+ loaded_data = loader.load(competition_dir)
250
+
251
+ # ========== Legacy API: direct data path ==========
252
+ elif data is not None:
253
+ # Load data if not already loaded
254
+ if not isinstance(data, LoadedData):
255
+ loader = DataLoader()
256
+ loaded_data = loader.load(data)
257
+ else:
258
+ loaded_data = data
202
259
  else:
203
- loaded_data = data
260
+ raise ValueError(
261
+ "Either 'task_id' or 'data' must be provided. "
262
+ "Example: agent.run(task_id='bike-sharing-demand', data_dir='data/competitions')"
263
+ )
204
264
 
205
265
  # Get task information
206
266
  task_detection = loaded_data.task_detection
@@ -365,30 +425,33 @@ class Agent:
365
425
 
366
426
  if task_type == "kaggle":
367
427
  # MLE/Kaggle format: needs public_data_dir and output_submission_path
428
+ # Follow MLEBenchmark pattern: {data_dir}/prepared/public
368
429
  prepared_dir = data_dir / "prepared"
369
- if prepared_dir.exists():
370
- public_dir = prepared_dir / "public"
371
- if public_dir.exists():
372
- payload["public_data_dir"] = str(public_dir)
373
- else:
374
- # Fallback: use data_dir as public_data_dir
375
- payload["public_data_dir"] = str(data_dir)
430
+ public_dir = prepared_dir / "public"
431
+
432
+ # Check if prepared/public exists (MLE format)
433
+ if public_dir.exists():
434
+ payload["public_data_dir"] = str(public_dir.resolve())
435
+ self.logger.info(f"Using MLE prepared data: {public_dir.resolve()}")
376
436
  else:
377
- # No prepared dir, use data_dir directly
378
- payload["public_data_dir"] = str(data_dir)
437
+ # Fallback: use data_dir directly
438
+ payload["public_data_dir"] = str(data_dir.resolve())
439
+ self.logger.warning(
440
+ f"Prepared data not found at {public_dir}, using data_dir instead"
441
+ )
379
442
 
380
- # Set output path to workspace with unique ID (like MLEBenchmark does)
443
+ # Set output path - use simple filename, will be saved in workspace/sandbox
381
444
  if output_path is None:
382
445
  # Extract competition_id from data_dir path if possible
383
446
  competition_id = data_dir.name
384
447
  unique_id = str(uuid.uuid4())[:8]
385
448
  output_filename = f"submission_{competition_id}_{unique_id}.csv"
386
449
 
387
- # Save to workspace directory (not sandbox, workspace is preserved)
388
- workspace_dir = self._get_workspace_dir()
389
- output_path = workspace_dir / output_filename
450
+ # Use just the filename - DSAT will save it in workspace/sandbox
451
+ output_path = Path(output_filename)
390
452
 
391
453
  payload["output_submission_path"] = str(output_path)
454
+ self.logger.info(f"Output submission file: {output_path}")
392
455
  else:
393
456
  # Other task types: use data_dir
394
457
  payload["data_dir"] = str(data_dir)
@@ -427,6 +490,36 @@ class Agent:
427
490
 
428
491
  return workspace_path
429
492
 
493
+ def _get_default_benchmark_dir(self) -> Path:
494
+ """
495
+ Get the default benchmark registry directory.
496
+
497
+ This is where task registration files (grade.py, description.md, etc.) are stored.
498
+ Default: benchmarks/mlebench/competitions/
499
+
500
+ Returns:
501
+ Path to benchmark registry directory
502
+ """
503
+ # Try to get from config
504
+ benchmark_dir = None
505
+
506
+ if hasattr(self, 'config') and hasattr(self.config, 'run'):
507
+ run_config = self.config.run
508
+ if hasattr(run_config, 'parameters') and run_config.parameters:
509
+ benchmark_dir = run_config.parameters.get('benchmark_dir')
510
+
511
+ # Fallback to default benchmark directory
512
+ if benchmark_dir is None:
513
+ # Use relative path from current working directory
514
+ # Default: benchmarks/mlebench/competitions/
515
+ benchmark_dir = "benchmarks/mlebench/competitions"
516
+
517
+ benchmark_path = Path(benchmark_dir).resolve()
518
+
519
+ self.logger.debug(f"Benchmark registry directory: {benchmark_path}")
520
+
521
+ return benchmark_path
522
+
430
523
  async def _execute_task(
431
524
  self,
432
525
  task: TaskDefinition,
@@ -269,10 +269,13 @@ class DataLoader:
269
269
  description = "MLE competition task"
270
270
 
271
271
  if isinstance(source, (str, Path)):
272
- path = Path(source)
272
+ path = Path(source).resolve() # Convert to absolute path
273
+ self.logger.info(f"Resolved path: {path}")
274
+
273
275
  if path.exists():
274
276
  if path.is_dir():
275
277
  data_dir = path
278
+ self.logger.info(f"Data directory found: {data_dir}")
276
279
  # Try to load description
277
280
  desc_file = path / "description.md"
278
281
  if desc_file.exists():
@@ -283,6 +286,7 @@ class DataLoader:
283
286
  pass
284
287
  elif path.is_file():
285
288
  data_dir = path.parent
289
+ self.logger.info(f"Data directory (from file parent): {data_dir}")
286
290
  # Try to load description from parent directory
287
291
  desc_file = path.parent / "description.md"
288
292
  if desc_file.exists():
@@ -291,6 +295,15 @@ class DataLoader:
291
295
  self.logger.info(f"Loaded description from {desc_file}")
292
296
  except Exception:
293
297
  pass
298
+ else:
299
+ self.logger.warning(f"Path does not exist: {path}")
300
+ # Still use the path even if it doesn't exist (might be created later)
301
+ if path.is_dir() or not path.suffix:
302
+ data_dir = path
303
+ self.logger.info(f"Using non-existent path as data directory: {data_dir}")
304
+ else:
305
+ data_dir = path.parent
306
+ self.logger.info(f"Using parent of non-existent file: {data_dir}")
294
307
 
295
308
  # Create MLE-style detection
296
309
  from dslighting.utils.defaults import WORKFLOW_RECOMMENDATIONS
@@ -105,7 +105,7 @@ DEFAULT_CONFIG: Dict[str, Any] = {
105
105
  "params": {}
106
106
  },
107
107
  "run": {
108
- "name": "dslighting", # Fixed name without UID to avoid UUID suffix
108
+ "name": "dsat_run", # Use "dsat_run" to let DSATRunner auto-generate: dsat_run_{task_id}_{uid}
109
109
  "total_steps": DEFAULT_MAX_ITERATIONS,
110
110
  "keep_all_workspaces": DEFAULT_KEEP_ALL_WORKSPACES,
111
111
  "keep_workspace_on_failure": DEFAULT_KEEP_WORKSPACE_ON_FAILURE,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dslighting
3
- Version: 1.1.8
3
+ Version: 1.3.1
4
4
  Summary: Simplified API for Data Science Agent Automation
5
5
  Author: DSLighting Team
6
6
  License: AGPL-3.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "dslighting"
7
- version = "1.1.8"
7
+ version = "1.3.1"
8
8
  description = "Simplified API for Data Science Agent Automation"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes
File without changes