dslighting 1.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsat/__init__.py +3 -0
- dsat/benchmark/__init__.py +1 -0
- dsat/benchmark/benchmark.py +168 -0
- dsat/benchmark/datasci.py +291 -0
- dsat/benchmark/mle.py +777 -0
- dsat/benchmark/sciencebench.py +304 -0
- dsat/common/__init__.py +0 -0
- dsat/common/constants.py +11 -0
- dsat/common/exceptions.py +48 -0
- dsat/common/typing.py +19 -0
- dsat/config.py +79 -0
- dsat/models/__init__.py +3 -0
- dsat/models/candidates.py +16 -0
- dsat/models/formats.py +52 -0
- dsat/models/task.py +64 -0
- dsat/operators/__init__.py +0 -0
- dsat/operators/aflow_ops.py +90 -0
- dsat/operators/autokaggle_ops.py +170 -0
- dsat/operators/automind_ops.py +38 -0
- dsat/operators/base.py +22 -0
- dsat/operators/code.py +45 -0
- dsat/operators/dsagent_ops.py +123 -0
- dsat/operators/llm_basic.py +84 -0
- dsat/prompts/__init__.py +0 -0
- dsat/prompts/aflow_prompt.py +76 -0
- dsat/prompts/aide_prompt.py +52 -0
- dsat/prompts/autokaggle_prompt.py +290 -0
- dsat/prompts/automind_prompt.py +29 -0
- dsat/prompts/common.py +51 -0
- dsat/prompts/data_interpreter_prompt.py +82 -0
- dsat/prompts/dsagent_prompt.py +88 -0
- dsat/runner.py +554 -0
- dsat/services/__init__.py +0 -0
- dsat/services/data_analyzer.py +387 -0
- dsat/services/llm.py +486 -0
- dsat/services/llm_single.py +421 -0
- dsat/services/sandbox.py +386 -0
- dsat/services/states/__init__.py +0 -0
- dsat/services/states/autokaggle_state.py +43 -0
- dsat/services/states/base.py +14 -0
- dsat/services/states/dsa_log.py +13 -0
- dsat/services/states/experience.py +237 -0
- dsat/services/states/journal.py +153 -0
- dsat/services/states/operator_library.py +290 -0
- dsat/services/vdb.py +76 -0
- dsat/services/workspace.py +178 -0
- dsat/tasks/__init__.py +3 -0
- dsat/tasks/handlers.py +376 -0
- dsat/templates/open_ended/grade_template.py +107 -0
- dsat/tools/__init__.py +4 -0
- dsat/utils/__init__.py +0 -0
- dsat/utils/context.py +172 -0
- dsat/utils/dynamic_import.py +71 -0
- dsat/utils/parsing.py +33 -0
- dsat/workflows/__init__.py +12 -0
- dsat/workflows/base.py +53 -0
- dsat/workflows/factory.py +439 -0
- dsat/workflows/manual/__init__.py +0 -0
- dsat/workflows/manual/autokaggle_workflow.py +148 -0
- dsat/workflows/manual/data_interpreter_workflow.py +153 -0
- dsat/workflows/manual/deepanalyze_workflow.py +484 -0
- dsat/workflows/manual/dsagent_workflow.py +76 -0
- dsat/workflows/search/__init__.py +0 -0
- dsat/workflows/search/aflow_workflow.py +344 -0
- dsat/workflows/search/aide_workflow.py +283 -0
- dsat/workflows/search/automind_workflow.py +237 -0
- dsat/workflows/templates/__init__.py +0 -0
- dsat/workflows/templates/basic_kaggle_loop.py +71 -0
- dslighting/__init__.py +170 -0
- dslighting/core/__init__.py +13 -0
- dslighting/core/agent.py +646 -0
- dslighting/core/config_builder.py +318 -0
- dslighting/core/data_loader.py +422 -0
- dslighting/core/task_detector.py +422 -0
- dslighting/utils/__init__.py +19 -0
- dslighting/utils/defaults.py +151 -0
- dslighting-1.3.9.dist-info/METADATA +554 -0
- dslighting-1.3.9.dist-info/RECORD +80 -0
- dslighting-1.3.9.dist-info/WHEEL +5 -0
- dslighting-1.3.9.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration building and management.
|
|
3
|
+
|
|
4
|
+
This module handles merging of default configurations, environment variables,
|
|
5
|
+
and user parameters to create the final DSATConfig.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
from typing import Any, Dict, Optional
|
|
12
|
+
|
|
13
|
+
from dsat.config import (
|
|
14
|
+
DSATConfig,
|
|
15
|
+
LLMConfig,
|
|
16
|
+
RunConfig,
|
|
17
|
+
WorkflowConfig,
|
|
18
|
+
AgentConfig,
|
|
19
|
+
SandboxConfig,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
from dslighting.utils.defaults import (
|
|
23
|
+
DEFAULT_CONFIG,
|
|
24
|
+
DEFAULT_WORKSPACE_DIR,
|
|
25
|
+
ENV_API_KEY,
|
|
26
|
+
ENV_API_BASE,
|
|
27
|
+
ENV_LLM_MODEL,
|
|
28
|
+
ENV_LLM_PROVIDER,
|
|
29
|
+
ENV_LLM_MODEL_CONFIGS,
|
|
30
|
+
ENV_LLM_TEMPERATURE,
|
|
31
|
+
ENV_DSLIGHTING_DEFAULT_WORKFLOW,
|
|
32
|
+
ENV_DSLIGHTING_WORKSPACE_DIR,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ConfigBuilder:
|
|
39
|
+
"""
|
|
40
|
+
Build DSATConfig by merging defaults, environment variables, and user parameters.
|
|
41
|
+
|
|
42
|
+
Priority order (highest to lowest):
|
|
43
|
+
1. User parameters (function arguments)
|
|
44
|
+
2. Environment variables
|
|
45
|
+
3. Default configuration
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(self):
|
|
49
|
+
self.logger = logger
|
|
50
|
+
|
|
51
|
+
def build_config(
|
|
52
|
+
self,
|
|
53
|
+
workflow: str = None,
|
|
54
|
+
model: str = None,
|
|
55
|
+
api_key: str = None,
|
|
56
|
+
api_base: str = None,
|
|
57
|
+
provider: str = None,
|
|
58
|
+
temperature: float = None,
|
|
59
|
+
max_iterations: int = None,
|
|
60
|
+
num_drafts: int = None,
|
|
61
|
+
workspace_dir: str = None,
|
|
62
|
+
run_name: str = None,
|
|
63
|
+
keep_workspace: bool = None,
|
|
64
|
+
keep_workspace_on_failure: bool = None,
|
|
65
|
+
**kwargs
|
|
66
|
+
) -> DSATConfig:
|
|
67
|
+
"""
|
|
68
|
+
Build DSATConfig by merging all configuration sources.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
workflow: Workflow name (aide, autokaggle, etc.)
|
|
72
|
+
model: LLM model name
|
|
73
|
+
api_key: API key for LLM
|
|
74
|
+
api_base: API base URL
|
|
75
|
+
provider: LLM provider (for LiteLLM)
|
|
76
|
+
temperature: LLM temperature
|
|
77
|
+
max_iterations: Maximum agent iterations
|
|
78
|
+
num_drafts: Number of drafts to generate
|
|
79
|
+
workspace_dir: Workspace directory
|
|
80
|
+
run_name: Name for this run
|
|
81
|
+
keep_workspace: Keep workspace after completion
|
|
82
|
+
keep_workspace_on_failure: Keep workspace on failure
|
|
83
|
+
**kwargs: Additional parameters
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
DSATConfig with all configurations merged
|
|
87
|
+
"""
|
|
88
|
+
# 1. Start with defaults
|
|
89
|
+
config = DEFAULT_CONFIG.copy()
|
|
90
|
+
|
|
91
|
+
# 2. Load environment overrides
|
|
92
|
+
env_config = self._load_env_config()
|
|
93
|
+
config = self._deep_merge(config, env_config)
|
|
94
|
+
|
|
95
|
+
# 3. Apply user parameters
|
|
96
|
+
user_config = self._build_user_config(
|
|
97
|
+
workflow=workflow,
|
|
98
|
+
model=model,
|
|
99
|
+
api_key=api_key,
|
|
100
|
+
api_base=api_base,
|
|
101
|
+
provider=provider,
|
|
102
|
+
temperature=temperature,
|
|
103
|
+
max_iterations=max_iterations,
|
|
104
|
+
num_drafts=num_drafts,
|
|
105
|
+
workspace_dir=workspace_dir,
|
|
106
|
+
run_name=run_name,
|
|
107
|
+
keep_workspace=keep_workspace,
|
|
108
|
+
keep_workspace_on_failure=keep_workspace_on_failure,
|
|
109
|
+
**kwargs
|
|
110
|
+
)
|
|
111
|
+
config = self._deep_merge(config, user_config)
|
|
112
|
+
|
|
113
|
+
# 4. Load model-specific configs if any
|
|
114
|
+
model_name = config.get("llm", {}).get("model")
|
|
115
|
+
if model_name:
|
|
116
|
+
model_configs = self._load_model_configs()
|
|
117
|
+
if model_name in model_configs:
|
|
118
|
+
model_override = model_configs[model_name]
|
|
119
|
+
# Model configs have lower priority than direct user params
|
|
120
|
+
config["llm"] = self._deep_merge(config["llm"], model_override)
|
|
121
|
+
|
|
122
|
+
# 5. Convert to DSATConfig objects
|
|
123
|
+
return self._create_dsat_config(config)
|
|
124
|
+
|
|
125
|
+
def _load_env_config(self) -> Dict[str, Any]:
|
|
126
|
+
"""Load configuration from environment variables."""
|
|
127
|
+
config = {}
|
|
128
|
+
|
|
129
|
+
# LLM settings
|
|
130
|
+
if os.getenv(ENV_API_KEY):
|
|
131
|
+
config.setdefault("llm", {})["api_key"] = os.getenv(ENV_API_KEY)
|
|
132
|
+
|
|
133
|
+
if os.getenv(ENV_API_BASE):
|
|
134
|
+
config.setdefault("llm", {})["api_base"] = os.getenv(ENV_API_BASE)
|
|
135
|
+
|
|
136
|
+
if os.getenv(ENV_LLM_MODEL):
|
|
137
|
+
config.setdefault("llm", {})["model"] = os.getenv(ENV_LLM_MODEL)
|
|
138
|
+
|
|
139
|
+
if os.getenv(ENV_LLM_PROVIDER):
|
|
140
|
+
config.setdefault("llm", {})["provider"] = os.getenv(ENV_LLM_PROVIDER)
|
|
141
|
+
|
|
142
|
+
if os.getenv(ENV_LLM_TEMPERATURE):
|
|
143
|
+
try:
|
|
144
|
+
temp = float(os.getenv(ENV_LLM_TEMPERATURE))
|
|
145
|
+
config.setdefault("llm", {})["temperature"] = temp
|
|
146
|
+
except ValueError:
|
|
147
|
+
logger.warning(f"Invalid {ENV_LLM_TEMPERATURE} value")
|
|
148
|
+
|
|
149
|
+
# DSLighting settings
|
|
150
|
+
if os.getenv(ENV_DSLIGHTING_DEFAULT_WORKFLOW):
|
|
151
|
+
config.setdefault("workflow", {})["name"] = os.getenv(
|
|
152
|
+
ENV_DSLIGHTING_DEFAULT_WORKFLOW
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
if os.getenv(ENV_DSLIGHTING_WORKSPACE_DIR):
|
|
156
|
+
config.setdefault("run", {}).setdefault("parameters", {})["workspace_dir"] = \
|
|
157
|
+
os.getenv(ENV_DSLIGHTING_WORKSPACE_DIR)
|
|
158
|
+
|
|
159
|
+
return config
|
|
160
|
+
|
|
161
|
+
def _load_model_configs(self) -> Dict[str, Dict[str, Any]]:
|
|
162
|
+
"""
|
|
163
|
+
Load per-model overrides from LLM_MODEL_CONFIGS env var.
|
|
164
|
+
|
|
165
|
+
Expected format (JSON object):
|
|
166
|
+
{
|
|
167
|
+
"<model_name>": {
|
|
168
|
+
"api_key": "sk-..." | ["sk-1", "sk-2"],
|
|
169
|
+
"api_base": "https://.../v1",
|
|
170
|
+
"provider": "siliconflow",
|
|
171
|
+
"temperature": 0.7
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
"""
|
|
175
|
+
raw = os.getenv(ENV_LLM_MODEL_CONFIGS)
|
|
176
|
+
if not raw:
|
|
177
|
+
return {}
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
parsed = json.loads(raw)
|
|
181
|
+
except Exception as exc:
|
|
182
|
+
logger.warning(f"Failed to parse LLM_MODEL_CONFIGS as JSON: {exc}")
|
|
183
|
+
return {}
|
|
184
|
+
|
|
185
|
+
if not isinstance(parsed, dict):
|
|
186
|
+
logger.warning("LLM_MODEL_CONFIGS must be a JSON object")
|
|
187
|
+
return {}
|
|
188
|
+
|
|
189
|
+
# Process each model config
|
|
190
|
+
result = {}
|
|
191
|
+
for k, v in parsed.items():
|
|
192
|
+
if not isinstance(k, str) or not isinstance(v, dict):
|
|
193
|
+
continue
|
|
194
|
+
|
|
195
|
+
# Handle api_key as list (take the first one)
|
|
196
|
+
if "api_key" in v and isinstance(v["api_key"], list):
|
|
197
|
+
if len(v["api_key"]) > 0:
|
|
198
|
+
v = v.copy() # Shallow copy to avoid mutating original
|
|
199
|
+
v["api_key"] = v["api_key"][0]
|
|
200
|
+
logger.debug(f"Model '{k}': using first API key from list of {len(v['api_key'])}")
|
|
201
|
+
|
|
202
|
+
result[k] = v
|
|
203
|
+
|
|
204
|
+
return result
|
|
205
|
+
|
|
206
|
+
def _build_user_config(
|
|
207
|
+
self,
|
|
208
|
+
workflow: str = None,
|
|
209
|
+
model: str = None,
|
|
210
|
+
api_key: str = None,
|
|
211
|
+
api_base: str = None,
|
|
212
|
+
provider: str = None,
|
|
213
|
+
temperature: float = None,
|
|
214
|
+
max_iterations: int = None,
|
|
215
|
+
num_drafts: int = None,
|
|
216
|
+
workspace_dir: str = None,
|
|
217
|
+
run_name: str = None,
|
|
218
|
+
keep_workspace: bool = None,
|
|
219
|
+
keep_workspace_on_failure: bool = None,
|
|
220
|
+
**kwargs
|
|
221
|
+
) -> Dict[str, Any]:
|
|
222
|
+
"""Build user configuration from parameters."""
|
|
223
|
+
config = {}
|
|
224
|
+
|
|
225
|
+
if workflow is not None:
|
|
226
|
+
config.setdefault("workflow", {})["name"] = workflow
|
|
227
|
+
|
|
228
|
+
if model is not None:
|
|
229
|
+
config.setdefault("llm", {})["model"] = model
|
|
230
|
+
|
|
231
|
+
if api_key is not None:
|
|
232
|
+
config.setdefault("llm", {})["api_key"] = api_key
|
|
233
|
+
|
|
234
|
+
if api_base is not None:
|
|
235
|
+
config.setdefault("llm", {})["api_base"] = api_base
|
|
236
|
+
|
|
237
|
+
if provider is not None:
|
|
238
|
+
config.setdefault("llm", {})["provider"] = provider
|
|
239
|
+
|
|
240
|
+
if temperature is not None:
|
|
241
|
+
config.setdefault("llm", {})["temperature"] = temperature
|
|
242
|
+
|
|
243
|
+
if max_iterations is not None:
|
|
244
|
+
config.setdefault("agent", {}).setdefault("search", {})["max_iterations"] = max_iterations
|
|
245
|
+
config.setdefault("run", {})["total_steps"] = max_iterations
|
|
246
|
+
|
|
247
|
+
if num_drafts is not None:
|
|
248
|
+
config.setdefault("agent", {}).setdefault("search", {})["num_drafts"] = num_drafts
|
|
249
|
+
|
|
250
|
+
if run_name is not None:
|
|
251
|
+
config.setdefault("run", {})["name"] = run_name
|
|
252
|
+
|
|
253
|
+
if workspace_dir is not None:
|
|
254
|
+
config.setdefault("run", {}).setdefault("parameters", {})["workspace_dir"] = workspace_dir
|
|
255
|
+
|
|
256
|
+
if keep_workspace is not None:
|
|
257
|
+
config.setdefault("run", {})["keep_all_workspaces"] = keep_workspace
|
|
258
|
+
|
|
259
|
+
if keep_workspace_on_failure is not None:
|
|
260
|
+
config.setdefault("run", {})["keep_workspace_on_failure"] = keep_workspace_on_failure
|
|
261
|
+
|
|
262
|
+
# Additional kwargs are added to run.parameters
|
|
263
|
+
if kwargs:
|
|
264
|
+
config.setdefault("run", {}).setdefault("parameters", {}).update(kwargs)
|
|
265
|
+
|
|
266
|
+
return config
|
|
267
|
+
|
|
268
|
+
def _create_dsat_config(self, config_dict: Dict[str, Any]) -> DSATConfig:
|
|
269
|
+
"""Convert configuration dict to DSATConfig object."""
|
|
270
|
+
# Extract LLM config
|
|
271
|
+
llm_dict = config_dict.get("llm", {})
|
|
272
|
+
llm_config = LLMConfig(**llm_dict)
|
|
273
|
+
|
|
274
|
+
# Extract workflow config
|
|
275
|
+
workflow_dict = config_dict.get("workflow", {})
|
|
276
|
+
workflow_config = WorkflowConfig(**workflow_dict)
|
|
277
|
+
|
|
278
|
+
# Extract run config
|
|
279
|
+
run_dict = config_dict.get("run", {})
|
|
280
|
+
run_config = RunConfig(**run_dict)
|
|
281
|
+
|
|
282
|
+
# Extract agent config
|
|
283
|
+
agent_dict = config_dict.get("agent", {})
|
|
284
|
+
agent_config = AgentConfig(**agent_dict)
|
|
285
|
+
|
|
286
|
+
# Extract sandbox config
|
|
287
|
+
sandbox_dict = config_dict.get("sandbox", {})
|
|
288
|
+
sandbox_config = SandboxConfig(**sandbox_dict)
|
|
289
|
+
|
|
290
|
+
# Create DSATConfig
|
|
291
|
+
return DSATConfig(
|
|
292
|
+
llm=llm_config,
|
|
293
|
+
workflow=workflow_config,
|
|
294
|
+
run=run_config,
|
|
295
|
+
agent=agent_config,
|
|
296
|
+
sandbox=sandbox_config,
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
def _deep_merge(self, base: Dict, update: Dict) -> Dict:
|
|
300
|
+
"""
|
|
301
|
+
Deep merge two dictionaries.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
base: Base dictionary
|
|
305
|
+
update: Dictionary with updates (higher priority)
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
Merged dictionary
|
|
309
|
+
"""
|
|
310
|
+
result = base.copy()
|
|
311
|
+
|
|
312
|
+
for key, value in update.items():
|
|
313
|
+
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
|
|
314
|
+
result[key] = self._deep_merge(result[key], value)
|
|
315
|
+
else:
|
|
316
|
+
result[key] = value
|
|
317
|
+
|
|
318
|
+
return result
|