dslighting 1.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsat/__init__.py +3 -0
- dsat/benchmark/__init__.py +1 -0
- dsat/benchmark/benchmark.py +168 -0
- dsat/benchmark/datasci.py +291 -0
- dsat/benchmark/mle.py +777 -0
- dsat/benchmark/sciencebench.py +304 -0
- dsat/common/__init__.py +0 -0
- dsat/common/constants.py +11 -0
- dsat/common/exceptions.py +48 -0
- dsat/common/typing.py +19 -0
- dsat/config.py +79 -0
- dsat/models/__init__.py +3 -0
- dsat/models/candidates.py +16 -0
- dsat/models/formats.py +52 -0
- dsat/models/task.py +64 -0
- dsat/operators/__init__.py +0 -0
- dsat/operators/aflow_ops.py +90 -0
- dsat/operators/autokaggle_ops.py +170 -0
- dsat/operators/automind_ops.py +38 -0
- dsat/operators/base.py +22 -0
- dsat/operators/code.py +45 -0
- dsat/operators/dsagent_ops.py +123 -0
- dsat/operators/llm_basic.py +84 -0
- dsat/prompts/__init__.py +0 -0
- dsat/prompts/aflow_prompt.py +76 -0
- dsat/prompts/aide_prompt.py +52 -0
- dsat/prompts/autokaggle_prompt.py +290 -0
- dsat/prompts/automind_prompt.py +29 -0
- dsat/prompts/common.py +51 -0
- dsat/prompts/data_interpreter_prompt.py +82 -0
- dsat/prompts/dsagent_prompt.py +88 -0
- dsat/runner.py +554 -0
- dsat/services/__init__.py +0 -0
- dsat/services/data_analyzer.py +387 -0
- dsat/services/llm.py +486 -0
- dsat/services/llm_single.py +421 -0
- dsat/services/sandbox.py +386 -0
- dsat/services/states/__init__.py +0 -0
- dsat/services/states/autokaggle_state.py +43 -0
- dsat/services/states/base.py +14 -0
- dsat/services/states/dsa_log.py +13 -0
- dsat/services/states/experience.py +237 -0
- dsat/services/states/journal.py +153 -0
- dsat/services/states/operator_library.py +290 -0
- dsat/services/vdb.py +76 -0
- dsat/services/workspace.py +178 -0
- dsat/tasks/__init__.py +3 -0
- dsat/tasks/handlers.py +376 -0
- dsat/templates/open_ended/grade_template.py +107 -0
- dsat/tools/__init__.py +4 -0
- dsat/utils/__init__.py +0 -0
- dsat/utils/context.py +172 -0
- dsat/utils/dynamic_import.py +71 -0
- dsat/utils/parsing.py +33 -0
- dsat/workflows/__init__.py +12 -0
- dsat/workflows/base.py +53 -0
- dsat/workflows/factory.py +439 -0
- dsat/workflows/manual/__init__.py +0 -0
- dsat/workflows/manual/autokaggle_workflow.py +148 -0
- dsat/workflows/manual/data_interpreter_workflow.py +153 -0
- dsat/workflows/manual/deepanalyze_workflow.py +484 -0
- dsat/workflows/manual/dsagent_workflow.py +76 -0
- dsat/workflows/search/__init__.py +0 -0
- dsat/workflows/search/aflow_workflow.py +344 -0
- dsat/workflows/search/aide_workflow.py +283 -0
- dsat/workflows/search/automind_workflow.py +237 -0
- dsat/workflows/templates/__init__.py +0 -0
- dsat/workflows/templates/basic_kaggle_loop.py +71 -0
- dslighting/__init__.py +170 -0
- dslighting/core/__init__.py +13 -0
- dslighting/core/agent.py +646 -0
- dslighting/core/config_builder.py +318 -0
- dslighting/core/data_loader.py +422 -0
- dslighting/core/task_detector.py +422 -0
- dslighting/utils/__init__.py +19 -0
- dslighting/utils/defaults.py +151 -0
- dslighting-1.3.9.dist-info/METADATA +554 -0
- dslighting-1.3.9.dist-info/RECORD +80 -0
- dslighting-1.3.9.dist-info/WHEEL +5 -0
- dslighting-1.3.9.dist-info/top_level.txt +2 -0
dslighting/__init__.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DSLighting: Simplified API for Data Science Agent Automation
|
|
3
|
+
|
|
4
|
+
A progressive API that provides sensible defaults with full control when needed.
|
|
5
|
+
|
|
6
|
+
Quick Start:
|
|
7
|
+
>>> import dslighting
|
|
8
|
+
>>>
|
|
9
|
+
>>> # Simple usage
|
|
10
|
+
>>> data = dslighting.load_data("path/to/data")
|
|
11
|
+
>>> agent = dslighting.Agent()
|
|
12
|
+
>>> result = agent.run(data)
|
|
13
|
+
>>>
|
|
14
|
+
>>> # One-liner
|
|
15
|
+
>>> result = dslighting.run_agent("path/to/data")
|
|
16
|
+
|
|
17
|
+
Advanced Usage:
|
|
18
|
+
>>> agent = dslighting.Agent(
|
|
19
|
+
... workflow="autokaggle",
|
|
20
|
+
... model="gpt-4o",
|
|
21
|
+
... temperature=0.5,
|
|
22
|
+
... max_iterations=10
|
|
23
|
+
... )
|
|
24
|
+
>>> result = agent.run(data)
|
|
25
|
+
|
|
26
|
+
For more information, see: https://github.com/usail-hkust/dslighting
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
__version__ = "1.3.9"
|
|
30
|
+
__author__ = "DSLighting Team"
|
|
31
|
+
|
|
32
|
+
# Core API classes
|
|
33
|
+
from dslighting.core.agent import Agent, AgentResult
|
|
34
|
+
from dslighting.core.data_loader import DataLoader, LoadedData
|
|
35
|
+
|
|
36
|
+
# Convenience functions
|
|
37
|
+
def load_data(source, **kwargs):
|
|
38
|
+
"""
|
|
39
|
+
Load and auto-detect data type.
|
|
40
|
+
|
|
41
|
+
This is a convenience function that creates a DataLoader and loads data.
|
|
42
|
+
For Kaggle/MLE-Bench competitions, it automatically extracts task_id from the path.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
source: Data source (path, DataFrame, dict, etc.)
|
|
46
|
+
**kwargs: Additional parameters passed to DataLoader
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
LoadedData with auto-detected task information and task_id
|
|
50
|
+
|
|
51
|
+
Examples:
|
|
52
|
+
>>> # Load from competition path (task_id auto-detected)
|
|
53
|
+
>>> data = dslighting.load_data("data/competitions/bike-sharing-demand")
|
|
54
|
+
>>> print(data.task_id) # "bike-sharing-demand"
|
|
55
|
+
>>>
|
|
56
|
+
>>> agent = dslighting.Agent()
|
|
57
|
+
>>> result = agent.run(data) # Auto-graded using task_id
|
|
58
|
+
|
|
59
|
+
>>> # Load DataFrame (no task_id)
|
|
60
|
+
>>> df = pd.read_csv("data.csv")
|
|
61
|
+
>>> data = dslighting.load_data(df)
|
|
62
|
+
|
|
63
|
+
>>> data = dslighting.load_data("data/house-prices")
|
|
64
|
+
>>> print(data.task_detection.task_type)
|
|
65
|
+
'kaggle'
|
|
66
|
+
"""
|
|
67
|
+
loader = DataLoader()
|
|
68
|
+
return loader.load(source, **kwargs)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def run_agent(data=None, task_id=None, data_dir=None, keep_workspace=False, keep_workspace_on_failure=True, **kwargs):
|
|
72
|
+
"""
|
|
73
|
+
Quick one-liner: load data and run with defaults.
|
|
74
|
+
|
|
75
|
+
This function creates an Agent with the specified parameters and runs it on the data.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
data: Optional data source (path, DataFrame, dict, etc.)
|
|
79
|
+
task_id: Task/Competition identifier (e.g., "bike-sharing-demand")
|
|
80
|
+
data_dir: Base data directory (default: "data/competitions")
|
|
81
|
+
keep_workspace: Keep workspace after completion (default: False)
|
|
82
|
+
keep_workspace_on_failure: Keep workspace on failure (default: True)
|
|
83
|
+
**kwargs: Parameters passed to Agent.__init__ and Agent.run
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
AgentResult with output, metrics, and metadata
|
|
87
|
+
|
|
88
|
+
Examples:
|
|
89
|
+
>>> # Recommended: using task_id
|
|
90
|
+
>>> result = dslighting.run_agent(
|
|
91
|
+
... task_id="bike-sharing-demand",
|
|
92
|
+
... data_dir="data/competitions"
|
|
93
|
+
... )
|
|
94
|
+
>>> print(f"Score: {result.score}, Cost: ${result.cost}")
|
|
95
|
+
|
|
96
|
+
>>> # Legacy: using data path
|
|
97
|
+
>>> result = dslighting.run_agent("data/titanic")
|
|
98
|
+
|
|
99
|
+
>>> # With custom parameters
|
|
100
|
+
>>> result = dslighting.run_agent(
|
|
101
|
+
... task_id="bike-sharing-demand",
|
|
102
|
+
... workflow="autokaggle",
|
|
103
|
+
... model="gpt-4o",
|
|
104
|
+
... keep_workspace=True # Keep workspace for debugging
|
|
105
|
+
... )
|
|
106
|
+
"""
|
|
107
|
+
# Extract run-specific parameters if present
|
|
108
|
+
run_kwargs = {}
|
|
109
|
+
agent_params = {}
|
|
110
|
+
|
|
111
|
+
# Parameters that should go to run(), not __init__
|
|
112
|
+
run_only_params = {'task_id', 'data_dir', 'output_path', 'description'}
|
|
113
|
+
|
|
114
|
+
for key, value in kwargs.items():
|
|
115
|
+
if key in run_only_params:
|
|
116
|
+
run_kwargs[key] = value
|
|
117
|
+
else:
|
|
118
|
+
agent_params[key] = value
|
|
119
|
+
|
|
120
|
+
# Add explicit parameters to run_kwargs
|
|
121
|
+
if task_id is not None:
|
|
122
|
+
run_kwargs['task_id'] = task_id
|
|
123
|
+
if data_dir is not None:
|
|
124
|
+
run_kwargs['data_dir'] = data_dir
|
|
125
|
+
|
|
126
|
+
# Add workspace preservation parameters to agent
|
|
127
|
+
agent_params['keep_workspace'] = keep_workspace
|
|
128
|
+
agent_params['keep_workspace_on_failure'] = keep_workspace_on_failure
|
|
129
|
+
|
|
130
|
+
# Create agent and run
|
|
131
|
+
agent = Agent(**agent_params)
|
|
132
|
+
return agent.run(data, **run_kwargs)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# Public API
|
|
136
|
+
__all__ = [
|
|
137
|
+
# Version info
|
|
138
|
+
"__version__",
|
|
139
|
+
"__author__",
|
|
140
|
+
|
|
141
|
+
# Core classes
|
|
142
|
+
"Agent",
|
|
143
|
+
"AgentResult",
|
|
144
|
+
"DataLoader",
|
|
145
|
+
"LoadedData",
|
|
146
|
+
|
|
147
|
+
# Convenience functions
|
|
148
|
+
"load_data",
|
|
149
|
+
"run_agent",
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# Import logging configuration
|
|
154
|
+
try:
|
|
155
|
+
import logging
|
|
156
|
+
from rich.logging import RichHandler
|
|
157
|
+
|
|
158
|
+
# Set up rich logging if available
|
|
159
|
+
logging.basicConfig(
|
|
160
|
+
level="INFO",
|
|
161
|
+
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
162
|
+
datefmt="%H:%M:%S",
|
|
163
|
+
handlers=[RichHandler(show_path=False)],
|
|
164
|
+
)
|
|
165
|
+
except ImportError:
|
|
166
|
+
# Fallback to basic logging
|
|
167
|
+
logging.basicConfig(
|
|
168
|
+
level="INFO",
|
|
169
|
+
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
170
|
+
)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core modules for DSLighting simplified API.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from dslighting.core.agent import Agent, AgentResult
|
|
6
|
+
from dslighting.core.data_loader import DataLoader, LoadedData
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"Agent",
|
|
10
|
+
"AgentResult",
|
|
11
|
+
"DataLoader",
|
|
12
|
+
"LoadedData",
|
|
13
|
+
]
|