dslighting 1.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. dsat/__init__.py +3 -0
  2. dsat/benchmark/__init__.py +1 -0
  3. dsat/benchmark/benchmark.py +168 -0
  4. dsat/benchmark/datasci.py +291 -0
  5. dsat/benchmark/mle.py +777 -0
  6. dsat/benchmark/sciencebench.py +304 -0
  7. dsat/common/__init__.py +0 -0
  8. dsat/common/constants.py +11 -0
  9. dsat/common/exceptions.py +48 -0
  10. dsat/common/typing.py +19 -0
  11. dsat/config.py +79 -0
  12. dsat/models/__init__.py +3 -0
  13. dsat/models/candidates.py +16 -0
  14. dsat/models/formats.py +52 -0
  15. dsat/models/task.py +64 -0
  16. dsat/operators/__init__.py +0 -0
  17. dsat/operators/aflow_ops.py +90 -0
  18. dsat/operators/autokaggle_ops.py +170 -0
  19. dsat/operators/automind_ops.py +38 -0
  20. dsat/operators/base.py +22 -0
  21. dsat/operators/code.py +45 -0
  22. dsat/operators/dsagent_ops.py +123 -0
  23. dsat/operators/llm_basic.py +84 -0
  24. dsat/prompts/__init__.py +0 -0
  25. dsat/prompts/aflow_prompt.py +76 -0
  26. dsat/prompts/aide_prompt.py +52 -0
  27. dsat/prompts/autokaggle_prompt.py +290 -0
  28. dsat/prompts/automind_prompt.py +29 -0
  29. dsat/prompts/common.py +51 -0
  30. dsat/prompts/data_interpreter_prompt.py +82 -0
  31. dsat/prompts/dsagent_prompt.py +88 -0
  32. dsat/runner.py +554 -0
  33. dsat/services/__init__.py +0 -0
  34. dsat/services/data_analyzer.py +387 -0
  35. dsat/services/llm.py +486 -0
  36. dsat/services/llm_single.py +421 -0
  37. dsat/services/sandbox.py +386 -0
  38. dsat/services/states/__init__.py +0 -0
  39. dsat/services/states/autokaggle_state.py +43 -0
  40. dsat/services/states/base.py +14 -0
  41. dsat/services/states/dsa_log.py +13 -0
  42. dsat/services/states/experience.py +237 -0
  43. dsat/services/states/journal.py +153 -0
  44. dsat/services/states/operator_library.py +290 -0
  45. dsat/services/vdb.py +76 -0
  46. dsat/services/workspace.py +178 -0
  47. dsat/tasks/__init__.py +3 -0
  48. dsat/tasks/handlers.py +376 -0
  49. dsat/templates/open_ended/grade_template.py +107 -0
  50. dsat/tools/__init__.py +4 -0
  51. dsat/utils/__init__.py +0 -0
  52. dsat/utils/context.py +172 -0
  53. dsat/utils/dynamic_import.py +71 -0
  54. dsat/utils/parsing.py +33 -0
  55. dsat/workflows/__init__.py +12 -0
  56. dsat/workflows/base.py +53 -0
  57. dsat/workflows/factory.py +439 -0
  58. dsat/workflows/manual/__init__.py +0 -0
  59. dsat/workflows/manual/autokaggle_workflow.py +148 -0
  60. dsat/workflows/manual/data_interpreter_workflow.py +153 -0
  61. dsat/workflows/manual/deepanalyze_workflow.py +484 -0
  62. dsat/workflows/manual/dsagent_workflow.py +76 -0
  63. dsat/workflows/search/__init__.py +0 -0
  64. dsat/workflows/search/aflow_workflow.py +344 -0
  65. dsat/workflows/search/aide_workflow.py +283 -0
  66. dsat/workflows/search/automind_workflow.py +237 -0
  67. dsat/workflows/templates/__init__.py +0 -0
  68. dsat/workflows/templates/basic_kaggle_loop.py +71 -0
  69. dslighting/__init__.py +170 -0
  70. dslighting/core/__init__.py +13 -0
  71. dslighting/core/agent.py +646 -0
  72. dslighting/core/config_builder.py +318 -0
  73. dslighting/core/data_loader.py +422 -0
  74. dslighting/core/task_detector.py +422 -0
  75. dslighting/utils/__init__.py +19 -0
  76. dslighting/utils/defaults.py +151 -0
  77. dslighting-1.3.9.dist-info/METADATA +554 -0
  78. dslighting-1.3.9.dist-info/RECORD +80 -0
  79. dslighting-1.3.9.dist-info/WHEEL +5 -0
  80. dslighting-1.3.9.dist-info/top_level.txt +2 -0
@@ -0,0 +1,318 @@
1
+ """
2
+ Configuration building and management.
3
+
4
+ This module handles merging of default configurations, environment variables,
5
+ and user parameters to create the final DSATConfig.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ import os
11
+ from typing import Any, Dict, Optional
12
+
13
+ from dsat.config import (
14
+ DSATConfig,
15
+ LLMConfig,
16
+ RunConfig,
17
+ WorkflowConfig,
18
+ AgentConfig,
19
+ SandboxConfig,
20
+ )
21
+
22
+ from dslighting.utils.defaults import (
23
+ DEFAULT_CONFIG,
24
+ DEFAULT_WORKSPACE_DIR,
25
+ ENV_API_KEY,
26
+ ENV_API_BASE,
27
+ ENV_LLM_MODEL,
28
+ ENV_LLM_PROVIDER,
29
+ ENV_LLM_MODEL_CONFIGS,
30
+ ENV_LLM_TEMPERATURE,
31
+ ENV_DSLIGHTING_DEFAULT_WORKFLOW,
32
+ ENV_DSLIGHTING_WORKSPACE_DIR,
33
+ )
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+
38
+ class ConfigBuilder:
39
+ """
40
+ Build DSATConfig by merging defaults, environment variables, and user parameters.
41
+
42
+ Priority order (highest to lowest):
43
+ 1. User parameters (function arguments)
44
+ 2. Environment variables
45
+ 3. Default configuration
46
+ """
47
+
48
+ def __init__(self):
49
+ self.logger = logger
50
+
51
+ def build_config(
52
+ self,
53
+ workflow: str = None,
54
+ model: str = None,
55
+ api_key: str = None,
56
+ api_base: str = None,
57
+ provider: str = None,
58
+ temperature: float = None,
59
+ max_iterations: int = None,
60
+ num_drafts: int = None,
61
+ workspace_dir: str = None,
62
+ run_name: str = None,
63
+ keep_workspace: bool = None,
64
+ keep_workspace_on_failure: bool = None,
65
+ **kwargs
66
+ ) -> DSATConfig:
67
+ """
68
+ Build DSATConfig by merging all configuration sources.
69
+
70
+ Args:
71
+ workflow: Workflow name (aide, autokaggle, etc.)
72
+ model: LLM model name
73
+ api_key: API key for LLM
74
+ api_base: API base URL
75
+ provider: LLM provider (for LiteLLM)
76
+ temperature: LLM temperature
77
+ max_iterations: Maximum agent iterations
78
+ num_drafts: Number of drafts to generate
79
+ workspace_dir: Workspace directory
80
+ run_name: Name for this run
81
+ keep_workspace: Keep workspace after completion
82
+ keep_workspace_on_failure: Keep workspace on failure
83
+ **kwargs: Additional parameters
84
+
85
+ Returns:
86
+ DSATConfig with all configurations merged
87
+ """
88
+ # 1. Start with defaults
89
+ config = DEFAULT_CONFIG.copy()
90
+
91
+ # 2. Load environment overrides
92
+ env_config = self._load_env_config()
93
+ config = self._deep_merge(config, env_config)
94
+
95
+ # 3. Apply user parameters
96
+ user_config = self._build_user_config(
97
+ workflow=workflow,
98
+ model=model,
99
+ api_key=api_key,
100
+ api_base=api_base,
101
+ provider=provider,
102
+ temperature=temperature,
103
+ max_iterations=max_iterations,
104
+ num_drafts=num_drafts,
105
+ workspace_dir=workspace_dir,
106
+ run_name=run_name,
107
+ keep_workspace=keep_workspace,
108
+ keep_workspace_on_failure=keep_workspace_on_failure,
109
+ **kwargs
110
+ )
111
+ config = self._deep_merge(config, user_config)
112
+
113
+ # 4. Load model-specific configs if any
114
+ model_name = config.get("llm", {}).get("model")
115
+ if model_name:
116
+ model_configs = self._load_model_configs()
117
+ if model_name in model_configs:
118
+ model_override = model_configs[model_name]
119
+ # Model configs have lower priority than direct user params
120
+ config["llm"] = self._deep_merge(config["llm"], model_override)
121
+
122
+ # 5. Convert to DSATConfig objects
123
+ return self._create_dsat_config(config)
124
+
125
+ def _load_env_config(self) -> Dict[str, Any]:
126
+ """Load configuration from environment variables."""
127
+ config = {}
128
+
129
+ # LLM settings
130
+ if os.getenv(ENV_API_KEY):
131
+ config.setdefault("llm", {})["api_key"] = os.getenv(ENV_API_KEY)
132
+
133
+ if os.getenv(ENV_API_BASE):
134
+ config.setdefault("llm", {})["api_base"] = os.getenv(ENV_API_BASE)
135
+
136
+ if os.getenv(ENV_LLM_MODEL):
137
+ config.setdefault("llm", {})["model"] = os.getenv(ENV_LLM_MODEL)
138
+
139
+ if os.getenv(ENV_LLM_PROVIDER):
140
+ config.setdefault("llm", {})["provider"] = os.getenv(ENV_LLM_PROVIDER)
141
+
142
+ if os.getenv(ENV_LLM_TEMPERATURE):
143
+ try:
144
+ temp = float(os.getenv(ENV_LLM_TEMPERATURE))
145
+ config.setdefault("llm", {})["temperature"] = temp
146
+ except ValueError:
147
+ logger.warning(f"Invalid {ENV_LLM_TEMPERATURE} value")
148
+
149
+ # DSLighting settings
150
+ if os.getenv(ENV_DSLIGHTING_DEFAULT_WORKFLOW):
151
+ config.setdefault("workflow", {})["name"] = os.getenv(
152
+ ENV_DSLIGHTING_DEFAULT_WORKFLOW
153
+ )
154
+
155
+ if os.getenv(ENV_DSLIGHTING_WORKSPACE_DIR):
156
+ config.setdefault("run", {}).setdefault("parameters", {})["workspace_dir"] = \
157
+ os.getenv(ENV_DSLIGHTING_WORKSPACE_DIR)
158
+
159
+ return config
160
+
161
+ def _load_model_configs(self) -> Dict[str, Dict[str, Any]]:
162
+ """
163
+ Load per-model overrides from LLM_MODEL_CONFIGS env var.
164
+
165
+ Expected format (JSON object):
166
+ {
167
+ "<model_name>": {
168
+ "api_key": "sk-..." | ["sk-1", "sk-2"],
169
+ "api_base": "https://.../v1",
170
+ "provider": "siliconflow",
171
+ "temperature": 0.7
172
+ }
173
+ }
174
+ """
175
+ raw = os.getenv(ENV_LLM_MODEL_CONFIGS)
176
+ if not raw:
177
+ return {}
178
+
179
+ try:
180
+ parsed = json.loads(raw)
181
+ except Exception as exc:
182
+ logger.warning(f"Failed to parse LLM_MODEL_CONFIGS as JSON: {exc}")
183
+ return {}
184
+
185
+ if not isinstance(parsed, dict):
186
+ logger.warning("LLM_MODEL_CONFIGS must be a JSON object")
187
+ return {}
188
+
189
+ # Process each model config
190
+ result = {}
191
+ for k, v in parsed.items():
192
+ if not isinstance(k, str) or not isinstance(v, dict):
193
+ continue
194
+
195
+ # Handle api_key as list (take the first one)
196
+ if "api_key" in v and isinstance(v["api_key"], list):
197
+ if len(v["api_key"]) > 0:
198
+ v = v.copy() # Shallow copy to avoid mutating original
199
+ v["api_key"] = v["api_key"][0]
200
+ logger.debug(f"Model '{k}': using first API key from list of {len(v['api_key'])}")
201
+
202
+ result[k] = v
203
+
204
+ return result
205
+
206
+ def _build_user_config(
207
+ self,
208
+ workflow: str = None,
209
+ model: str = None,
210
+ api_key: str = None,
211
+ api_base: str = None,
212
+ provider: str = None,
213
+ temperature: float = None,
214
+ max_iterations: int = None,
215
+ num_drafts: int = None,
216
+ workspace_dir: str = None,
217
+ run_name: str = None,
218
+ keep_workspace: bool = None,
219
+ keep_workspace_on_failure: bool = None,
220
+ **kwargs
221
+ ) -> Dict[str, Any]:
222
+ """Build user configuration from parameters."""
223
+ config = {}
224
+
225
+ if workflow is not None:
226
+ config.setdefault("workflow", {})["name"] = workflow
227
+
228
+ if model is not None:
229
+ config.setdefault("llm", {})["model"] = model
230
+
231
+ if api_key is not None:
232
+ config.setdefault("llm", {})["api_key"] = api_key
233
+
234
+ if api_base is not None:
235
+ config.setdefault("llm", {})["api_base"] = api_base
236
+
237
+ if provider is not None:
238
+ config.setdefault("llm", {})["provider"] = provider
239
+
240
+ if temperature is not None:
241
+ config.setdefault("llm", {})["temperature"] = temperature
242
+
243
+ if max_iterations is not None:
244
+ config.setdefault("agent", {}).setdefault("search", {})["max_iterations"] = max_iterations
245
+ config.setdefault("run", {})["total_steps"] = max_iterations
246
+
247
+ if num_drafts is not None:
248
+ config.setdefault("agent", {}).setdefault("search", {})["num_drafts"] = num_drafts
249
+
250
+ if run_name is not None:
251
+ config.setdefault("run", {})["name"] = run_name
252
+
253
+ if workspace_dir is not None:
254
+ config.setdefault("run", {}).setdefault("parameters", {})["workspace_dir"] = workspace_dir
255
+
256
+ if keep_workspace is not None:
257
+ config.setdefault("run", {})["keep_all_workspaces"] = keep_workspace
258
+
259
+ if keep_workspace_on_failure is not None:
260
+ config.setdefault("run", {})["keep_workspace_on_failure"] = keep_workspace_on_failure
261
+
262
+ # Additional kwargs are added to run.parameters
263
+ if kwargs:
264
+ config.setdefault("run", {}).setdefault("parameters", {}).update(kwargs)
265
+
266
+ return config
267
+
268
+ def _create_dsat_config(self, config_dict: Dict[str, Any]) -> DSATConfig:
269
+ """Convert configuration dict to DSATConfig object."""
270
+ # Extract LLM config
271
+ llm_dict = config_dict.get("llm", {})
272
+ llm_config = LLMConfig(**llm_dict)
273
+
274
+ # Extract workflow config
275
+ workflow_dict = config_dict.get("workflow", {})
276
+ workflow_config = WorkflowConfig(**workflow_dict)
277
+
278
+ # Extract run config
279
+ run_dict = config_dict.get("run", {})
280
+ run_config = RunConfig(**run_dict)
281
+
282
+ # Extract agent config
283
+ agent_dict = config_dict.get("agent", {})
284
+ agent_config = AgentConfig(**agent_dict)
285
+
286
+ # Extract sandbox config
287
+ sandbox_dict = config_dict.get("sandbox", {})
288
+ sandbox_config = SandboxConfig(**sandbox_dict)
289
+
290
+ # Create DSATConfig
291
+ return DSATConfig(
292
+ llm=llm_config,
293
+ workflow=workflow_config,
294
+ run=run_config,
295
+ agent=agent_config,
296
+ sandbox=sandbox_config,
297
+ )
298
+
299
+ def _deep_merge(self, base: Dict, update: Dict) -> Dict:
300
+ """
301
+ Deep merge two dictionaries.
302
+
303
+ Args:
304
+ base: Base dictionary
305
+ update: Dictionary with updates (higher priority)
306
+
307
+ Returns:
308
+ Merged dictionary
309
+ """
310
+ result = base.copy()
311
+
312
+ for key, value in update.items():
313
+ if key in result and isinstance(result[key], dict) and isinstance(value, dict):
314
+ result[key] = self._deep_merge(result[key], value)
315
+ else:
316
+ result[key] = value
317
+
318
+ return result