dslighting 1.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. dsat/__init__.py +3 -0
  2. dsat/benchmark/__init__.py +1 -0
  3. dsat/benchmark/benchmark.py +168 -0
  4. dsat/benchmark/datasci.py +291 -0
  5. dsat/benchmark/mle.py +777 -0
  6. dsat/benchmark/sciencebench.py +304 -0
  7. dsat/common/__init__.py +0 -0
  8. dsat/common/constants.py +11 -0
  9. dsat/common/exceptions.py +48 -0
  10. dsat/common/typing.py +19 -0
  11. dsat/config.py +79 -0
  12. dsat/models/__init__.py +3 -0
  13. dsat/models/candidates.py +16 -0
  14. dsat/models/formats.py +52 -0
  15. dsat/models/task.py +64 -0
  16. dsat/operators/__init__.py +0 -0
  17. dsat/operators/aflow_ops.py +90 -0
  18. dsat/operators/autokaggle_ops.py +170 -0
  19. dsat/operators/automind_ops.py +38 -0
  20. dsat/operators/base.py +22 -0
  21. dsat/operators/code.py +45 -0
  22. dsat/operators/dsagent_ops.py +123 -0
  23. dsat/operators/llm_basic.py +84 -0
  24. dsat/prompts/__init__.py +0 -0
  25. dsat/prompts/aflow_prompt.py +76 -0
  26. dsat/prompts/aide_prompt.py +52 -0
  27. dsat/prompts/autokaggle_prompt.py +290 -0
  28. dsat/prompts/automind_prompt.py +29 -0
  29. dsat/prompts/common.py +51 -0
  30. dsat/prompts/data_interpreter_prompt.py +82 -0
  31. dsat/prompts/dsagent_prompt.py +88 -0
  32. dsat/runner.py +554 -0
  33. dsat/services/__init__.py +0 -0
  34. dsat/services/data_analyzer.py +387 -0
  35. dsat/services/llm.py +486 -0
  36. dsat/services/llm_single.py +421 -0
  37. dsat/services/sandbox.py +386 -0
  38. dsat/services/states/__init__.py +0 -0
  39. dsat/services/states/autokaggle_state.py +43 -0
  40. dsat/services/states/base.py +14 -0
  41. dsat/services/states/dsa_log.py +13 -0
  42. dsat/services/states/experience.py +237 -0
  43. dsat/services/states/journal.py +153 -0
  44. dsat/services/states/operator_library.py +290 -0
  45. dsat/services/vdb.py +76 -0
  46. dsat/services/workspace.py +178 -0
  47. dsat/tasks/__init__.py +3 -0
  48. dsat/tasks/handlers.py +376 -0
  49. dsat/templates/open_ended/grade_template.py +107 -0
  50. dsat/tools/__init__.py +4 -0
  51. dsat/utils/__init__.py +0 -0
  52. dsat/utils/context.py +172 -0
  53. dsat/utils/dynamic_import.py +71 -0
  54. dsat/utils/parsing.py +33 -0
  55. dsat/workflows/__init__.py +12 -0
  56. dsat/workflows/base.py +53 -0
  57. dsat/workflows/factory.py +439 -0
  58. dsat/workflows/manual/__init__.py +0 -0
  59. dsat/workflows/manual/autokaggle_workflow.py +148 -0
  60. dsat/workflows/manual/data_interpreter_workflow.py +153 -0
  61. dsat/workflows/manual/deepanalyze_workflow.py +484 -0
  62. dsat/workflows/manual/dsagent_workflow.py +76 -0
  63. dsat/workflows/search/__init__.py +0 -0
  64. dsat/workflows/search/aflow_workflow.py +344 -0
  65. dsat/workflows/search/aide_workflow.py +283 -0
  66. dsat/workflows/search/automind_workflow.py +237 -0
  67. dsat/workflows/templates/__init__.py +0 -0
  68. dsat/workflows/templates/basic_kaggle_loop.py +71 -0
  69. dslighting/__init__.py +170 -0
  70. dslighting/core/__init__.py +13 -0
  71. dslighting/core/agent.py +646 -0
  72. dslighting/core/config_builder.py +318 -0
  73. dslighting/core/data_loader.py +422 -0
  74. dslighting/core/task_detector.py +422 -0
  75. dslighting/utils/__init__.py +19 -0
  76. dslighting/utils/defaults.py +151 -0
  77. dslighting-1.3.9.dist-info/METADATA +554 -0
  78. dslighting-1.3.9.dist-info/RECORD +80 -0
  79. dslighting-1.3.9.dist-info/WHEEL +5 -0
  80. dslighting-1.3.9.dist-info/top_level.txt +2 -0
@@ -0,0 +1,422 @@
1
+ """
2
+ Task detection and automatic task type inference.
3
+
4
+ This module provides intelligent detection of data science task types
5
+ from various data sources (directories, files, DataFrames, etc.).
6
+ """
7
+
8
+ import logging
9
+ from dataclasses import dataclass
10
+ from pathlib import Path
11
+ from typing import Any, Dict, List, Optional, Union
12
+
13
+ import pandas as pd
14
+
15
+ from dslighting.utils.defaults import WORKFLOW_RECOMMENDATIONS
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclass
21
+ class TaskDetection:
22
+ """
23
+ Result of task detection.
24
+
25
+ Attributes:
26
+ task_type: Detected task type (kaggle, qa, datasci, open_ended)
27
+ task_mode: Task mode (standard_ml, open_ended)
28
+ data_dir: Data directory path (if applicable)
29
+ description: Task description (if found)
30
+ io_instructions: I/O instructions
31
+ recommended_workflow: Recommended workflow for this task
32
+ confidence: Detection confidence (0-1)
33
+ metadata: Additional detection metadata
34
+ """
35
+ task_type: str
36
+ task_mode: str
37
+ data_dir: Optional[Path] = None
38
+ description: str = ""
39
+ io_instructions: str = ""
40
+ recommended_workflow: str = "aide"
41
+ confidence: float = 0.8
42
+ metadata: Dict[str, Any] = None
43
+
44
+ def __post_init__(self):
45
+ if self.metadata is None:
46
+ self.metadata = {}
47
+
48
+
49
+ class TaskDetector:
50
+ """
51
+ Automatically detect task types from data sources.
52
+
53
+ This class analyzes directory structures, file types, and content
54
+ to determine the most appropriate task type and workflow.
55
+ """
56
+
57
+ def __init__(self):
58
+ self.logger = logger
59
+
60
+ def detect(self, source: Any) -> TaskDetection:
61
+ """
62
+ Detect task type from a data source.
63
+
64
+ Args:
65
+ source: Data source (path, DataFrame, dict, etc.)
66
+
67
+ Returns:
68
+ TaskDetection with inferred task information
69
+ """
70
+ # DataFrame: Custom ML task (highest priority for DataFrames)
71
+ if isinstance(source, pd.DataFrame):
72
+ return self._detect_dataframe_task(source)
73
+
74
+ # Path: Directory or file (check before treating as string)
75
+ if isinstance(source, (str, Path)):
76
+ path = Path(source)
77
+ if path.is_dir():
78
+ return self._detect_from_directory(path)
79
+ elif path.is_file():
80
+ return self._detect_from_file(path)
81
+ # If path doesn't exist, check if it's a short question (QA task)
82
+ elif len(source) < 500 and not any(c in source for c in ['/', '\\', '.csv', '.json', '.txt']):
83
+ return self._detect_qa_task(source)
84
+
85
+ # Dict: QA task
86
+ if isinstance(source, dict):
87
+ return self._detect_qa_task(str(source))
88
+
89
+ # Fallback
90
+ return TaskDetection(
91
+ task_type="datasci",
92
+ task_mode="standard_ml",
93
+ description="Custom data science task",
94
+ recommended_workflow="aide",
95
+ confidence=0.5
96
+ )
97
+
98
+ def _detect_qa_task(self, question: str) -> TaskDetection:
99
+ """Detect a QA (question-answering) task."""
100
+ self.logger.info("Detected QA task")
101
+
102
+ return TaskDetection(
103
+ task_type="qa",
104
+ task_mode="standard_ml",
105
+ description=f"Answer the following question: {question}",
106
+ io_instructions="Provide a clear, concise answer.",
107
+ recommended_workflow=WORKFLOW_RECOMMENDATIONS["qa"]["default"],
108
+ confidence=0.95,
109
+ metadata={"question": question}
110
+ )
111
+
112
+ def _detect_dataframe_task(self, df: pd.DataFrame) -> TaskDetection:
113
+ """Detect task type from a pandas DataFrame."""
114
+ self.logger.info(f"Detected DataFrame task with {len(df)} rows, {len(df.columns)} columns")
115
+
116
+ # Check if it has a target column (common pattern: last column is target)
117
+ has_target = len(df.columns) > 1
118
+
119
+ if has_target:
120
+ description = (
121
+ f"Build a machine learning model to predict the target variable. "
122
+ f"Dataset has {len(df)} rows and {len(df.columns)} columns."
123
+ )
124
+ io_instructions = (
125
+ "Train a model on the data and provide predictions. "
126
+ "Use appropriate evaluation metrics."
127
+ )
128
+ recommended_workflow = "aide"
129
+ else:
130
+ description = (
131
+ f"Perform exploratory data analysis and clustering. "
132
+ f"Dataset has {len(df)} rows and {len(df.columns)} columns."
133
+ )
134
+ io_instructions = "Analyze the data and provide insights."
135
+ recommended_workflow = "data_interpreter"
136
+
137
+ return TaskDetection(
138
+ task_type="datasci",
139
+ task_mode="standard_ml" if has_target else "open_ended",
140
+ description=description,
141
+ io_instructions=io_instructions,
142
+ recommended_workflow=recommended_workflow,
143
+ confidence=0.85,
144
+ metadata={
145
+ "shape": df.shape,
146
+ "columns": list(df.columns),
147
+ "dtypes": df.dtypes.to_dict()
148
+ }
149
+ )
150
+
151
+ def _detect_from_directory(self, dir_path: Path) -> TaskDetection:
152
+ """Detect task type from a directory structure."""
153
+ self.logger.info(f"Detecting task from directory: {dir_path}")
154
+
155
+ # Priority 1: Check for MLE competition structure (prepared/public & prepared/private)
156
+ if self._is_mle_competition(dir_path):
157
+ return self._detect_mle_competition(dir_path)
158
+
159
+ # Priority 2: Check for Kaggle competition structure (train.csv, test.csv)
160
+ if self._is_kaggle_format(dir_path):
161
+ return self._detect_kaggle_competition(dir_path)
162
+
163
+ # Priority 3: Check for open-ended task
164
+ if self._is_open_ended_task(dir_path):
165
+ return self._detect_open_ended_task(dir_path)
166
+
167
+ # Priority 4: Check for DataSci task
168
+ if self._is_datasci_task(dir_path):
169
+ return self._detect_datasci_task(dir_path)
170
+
171
+ # Default: treat as generic data directory
172
+ return self._detect_generic_directory(dir_path)
173
+
174
+ def _detect_from_file(self, file_path: Path) -> TaskDetection:
175
+ """Detect task type from a single file."""
176
+ self.logger.info(f"Detecting task from file: {file_path}")
177
+
178
+ suffix = file_path.suffix.lower()
179
+
180
+ if suffix == ".csv":
181
+ return self._detect_csv_file(file_path)
182
+ elif suffix in [".json", ".jsonl"]:
183
+ return self._detect_json_file(file_path)
184
+ elif suffix in [".txt", ".md"]:
185
+ return self._detect_text_file(file_path)
186
+ else:
187
+ # Unknown file type
188
+ return TaskDetection(
189
+ task_type="datasci",
190
+ task_mode="standard_ml",
191
+ description=f"Process file: {file_path.name}",
192
+ recommended_workflow="aide",
193
+ confidence=0.6
194
+ )
195
+
196
+ def _is_mle_competition(self, dir_path: Path) -> bool:
197
+ """Check if directory is an MLE competition structure (prepared/public & prepared/private)."""
198
+ # Check for prepared/public and prepared/private
199
+ prepared = dir_path / "prepared"
200
+ if prepared.exists():
201
+ public = prepared / "public"
202
+ private = prepared / "private"
203
+ if public.exists() and private.exists():
204
+ return True
205
+ return False
206
+
207
+ def _is_kaggle_format(self, dir_path: Path) -> bool:
208
+ """Check if directory has Kaggle-style files (train.csv, test.csv, sample_submission.csv)."""
209
+ # Check for train.csv + test.csv
210
+ has_train = (dir_path / "train.csv").exists()
211
+ has_test = (dir_path / "test.csv").exists()
212
+ has_sample_submission = (dir_path / "sample_submission.csv").exists()
213
+
214
+ return has_train and (has_test or has_sample_submission)
215
+
216
+ def _is_open_ended_task(self, dir_path: Path) -> bool:
217
+ """Check if directory is an open-ended task."""
218
+ has_description = (dir_path / "description.md").exists()
219
+ has_rubric = (dir_path / "rubric.md").exists()
220
+ return has_description and has_rubric
221
+
222
+ def _is_datasci_task(self, dir_path: Path) -> bool:
223
+ """Check if directory is a DataSci task."""
224
+ has_prompt = (dir_path / "prompt.txt").exists()
225
+ has_description = (dir_path / "description.md").exists()
226
+ return has_prompt or has_description
227
+
228
+ def _detect_mle_competition(self, dir_path: Path) -> TaskDetection:
229
+ """Detect MLE competition task (standard DSAT format with prepared/public & prepared/private)."""
230
+ self.logger.info("Detected MLE competition structure (prepared/public & prepared/private)")
231
+
232
+ # Load description if exists
233
+ description = ""
234
+ description_file = dir_path / "description.md"
235
+ if description_file.exists():
236
+ description = description_file.read_text(encoding='utf-8')
237
+
238
+ # MLE competitions use kaggle task_type internally
239
+ return TaskDetection(
240
+ task_type="kaggle",
241
+ task_mode="standard_ml",
242
+ data_dir=dir_path,
243
+ description=description or "MLE competition task",
244
+ io_instructions="Train a model and generate predictions for the test set.",
245
+ recommended_workflow="aide",
246
+ confidence=0.95,
247
+ metadata={"structure": "mle_competition"}
248
+ )
249
+
250
+ def _detect_kaggle_competition(self, dir_path: Path) -> TaskDetection:
251
+ """Detect Kaggle competition task."""
252
+ self.logger.info("Detected Kaggle competition structure")
253
+
254
+ # Load description if exists
255
+ description = ""
256
+ description_file = dir_path / "description.md"
257
+ if description_file.exists():
258
+ description = description_file.read_text(encoding='utf-8')
259
+
260
+ # Detect if tabular or time series
261
+ is_tabular = self._is_tabular_competition(dir_path)
262
+ is_time_series = self._is_time_series_competition(dir_path)
263
+
264
+ if is_time_series:
265
+ recommended = WORKFLOW_RECOMMENDATIONS["kaggle_competition"]["time_series"][0]
266
+ elif is_tabular:
267
+ recommended = WORKFLOW_RECOMMENDATIONS["kaggle_competition"]["tabular"][0]
268
+ else:
269
+ recommended = WORKFLOW_RECOMMENDATIONS["kaggle_competition"]["default"]
270
+
271
+ return TaskDetection(
272
+ task_type="kaggle",
273
+ task_mode="standard_ml",
274
+ data_dir=dir_path,
275
+ description=description or "Kaggle competition task",
276
+ io_instructions="Train a model and generate predictions for the test set.",
277
+ recommended_workflow=recommended,
278
+ confidence=0.9,
279
+ metadata={"structure": "kaggle_competition"}
280
+ )
281
+
282
+ def _detect_open_ended_task(self, dir_path: Path) -> TaskDetection:
283
+ """Detect open-ended exploration task."""
284
+ self.logger.info("Detected open-ended task")
285
+
286
+ description_file = dir_path / "description.md"
287
+ description = ""
288
+ if description_file.exists():
289
+ description = description_file.read_text(encoding='utf-8')
290
+
291
+ return TaskDetection(
292
+ task_type="open_ended",
293
+ task_mode="open_ended",
294
+ data_dir=dir_path,
295
+ description=description or "Open-ended data exploration task",
296
+ io_instructions="Explore the data and provide comprehensive insights.",
297
+ recommended_workflow=WORKFLOW_RECOMMENDATIONS["open_ended"]["default"],
298
+ confidence=0.9,
299
+ metadata={"structure": "open_ended"}
300
+ )
301
+
302
+ def _detect_datasci_task(self, dir_path: Path) -> TaskDetection:
303
+ """Detect DataSci task."""
304
+ self.logger.info("Detected DataSci task")
305
+
306
+ prompt_file = dir_path / "prompt.txt"
307
+ description_file = dir_path / "description.md"
308
+
309
+ description = ""
310
+ if prompt_file.exists():
311
+ description = prompt_file.read_text(encoding='utf-8')
312
+ elif description_file.exists():
313
+ description = description_file.read_text(encoding='utf-8')
314
+
315
+ return TaskDetection(
316
+ task_type="datasci",
317
+ task_mode="standard_ml",
318
+ data_dir=dir_path,
319
+ description=description or "Data science task",
320
+ io_instructions="Complete the data science task as described.",
321
+ recommended_workflow=WORKFLOW_RECOMMENDATIONS["datasci"]["default"],
322
+ confidence=0.85,
323
+ metadata={"structure": "datasci"}
324
+ )
325
+
326
+ def _detect_generic_directory(self, dir_path: Path) -> TaskDetection:
327
+ """Detect task from generic directory."""
328
+ self.logger.info("Treating as generic data directory")
329
+
330
+ # List files in directory
331
+ files = list(dir_path.glob("*"))
332
+ file_names = [f.name for f in files if f.is_file()]
333
+
334
+ description = (
335
+ f"Process data in directory: {dir_path.name}. "
336
+ f"Contains {len(files)} files: {', '.join(file_names[:5])}"
337
+ )
338
+
339
+ return TaskDetection(
340
+ task_type="datasci",
341
+ task_mode="standard_ml",
342
+ data_dir=dir_path,
343
+ description=description,
344
+ io_instructions="Analyze the data and provide results.",
345
+ recommended_workflow="aide",
346
+ confidence=0.7,
347
+ metadata={"files": file_names}
348
+ )
349
+
350
+ def _detect_csv_file(self, file_path: Path) -> TaskDetection:
351
+ """Detect task type from a CSV file."""
352
+ try:
353
+ df = pd.read_csv(file_path, nrows=10)
354
+ return self._detect_dataframe_task(df)
355
+ except Exception as e:
356
+ self.logger.warning(f"Failed to read CSV file {file_path}: {e}")
357
+ return TaskDetection(
358
+ task_type="datasci",
359
+ task_mode="standard_ml",
360
+ description=f"Process CSV file: {file_path.name}",
361
+ recommended_workflow="aide",
362
+ confidence=0.6
363
+ )
364
+
365
+ def _detect_json_file(self, file_path: Path) -> TaskDetection:
366
+ """Detect task type from a JSON file."""
367
+ import json
368
+
369
+ try:
370
+ with open(file_path, 'r', encoding='utf-8') as f:
371
+ data = json.load(f)
372
+
373
+ if isinstance(data, list) and len(data) > 0:
374
+ # Likely a dataset
375
+ return TaskDetection(
376
+ task_type="datasci",
377
+ task_mode="standard_ml",
378
+ description=f"Process JSON dataset with {len(data)} records",
379
+ recommended_workflow="aide",
380
+ confidence=0.7
381
+ )
382
+ else:
383
+ # Likely a QA task or config
384
+ return self._detect_qa_task(str(data))
385
+ except Exception as e:
386
+ self.logger.warning(f"Failed to read JSON file {file_path}: {e}")
387
+ return TaskDetection(
388
+ task_type="datasci",
389
+ task_mode="standard_ml",
390
+ description=f"Process JSON file: {file_path.name}",
391
+ recommended_workflow="aide",
392
+ confidence=0.6
393
+ )
394
+
395
+ def _detect_text_file(self, file_path: Path) -> TaskDetection:
396
+ """Detect task type from a text/markdown file."""
397
+ content = file_path.read_text(encoding='utf-8')
398
+
399
+ if len(content) < 1000:
400
+ # Short text: likely QA or description
401
+ return self._detect_qa_task(content)
402
+ else:
403
+ # Long text: likely description for a task
404
+ return TaskDetection(
405
+ task_type="open_ended",
406
+ task_mode="open_ended",
407
+ description=content,
408
+ recommended_workflow="deepanalyze",
409
+ confidence=0.75
410
+ )
411
+
412
+ def _is_tabular_competition(self, dir_path: Path) -> bool:
413
+ """Check if competition is tabular (has CSV files)."""
414
+ csv_files = list(dir_path.glob("**/*.csv"))
415
+ return len(csv_files) > 0
416
+
417
+ def _is_time_series_competition(self, dir_path: Path) -> bool:
418
+ """Check if competition is time series."""
419
+ # Heuristic: check for time-related keywords in file/directory names
420
+ name_lower = str(dir_path).lower()
421
+ time_keywords = ["time", "temporal", "forecast", "series", "date"]
422
+ return any(keyword in name_lower for keyword in time_keywords)
@@ -0,0 +1,19 @@
1
+ """
2
+ Utility modules for DSLighting.
3
+ """
4
+
5
+ from dslighting.utils.defaults import (
6
+ DEFAULT_WORKFLOW,
7
+ DEFAULT_LLM_MODEL,
8
+ DEFAULT_TEMPERATURE,
9
+ DEFAULT_MAX_ITERATIONS,
10
+ WORKFLOW_RECOMMENDATIONS,
11
+ )
12
+
13
+ __all__ = [
14
+ "DEFAULT_WORKFLOW",
15
+ "DEFAULT_LLM_MODEL",
16
+ "DEFAULT_TEMPERATURE",
17
+ "DEFAULT_MAX_ITERATIONS",
18
+ "WORKFLOW_RECOMMENDATIONS",
19
+ ]
@@ -0,0 +1,151 @@
1
+ """
2
+ Default configurations for DSLighting simplified API.
3
+
4
+ This module defines sensible defaults that can be overridden by:
5
+ 1. User parameters (highest priority)
6
+ 2. Environment variables
7
+ 3. These defaults (lowest priority)
8
+ """
9
+
10
+ from typing import Dict, List, Any
11
+
12
+
13
+ # ============================================================================
14
+ # LLM Defaults
15
+ # ============================================================================
16
+
17
+ DEFAULT_LLM_MODEL = "gpt-4o-mini"
18
+ DEFAULT_TEMPERATURE = 0.7
19
+ DEFAULT_MAX_RETRIES = 3
20
+ DEFAULT_API_BASE = "https://api.openai.com/v1"
21
+
22
+
23
+ # ============================================================================
24
+ # Workflow Defaults
25
+ # ============================================================================
26
+
27
+ DEFAULT_WORKFLOW = "aide"
28
+ DEFAULT_MAX_ITERATIONS = 5
29
+ DEFAULT_NUM_DRAFTS = 5
30
+
31
+
32
+ # ============================================================================
33
+ # Sandbox Defaults
34
+ # ============================================================================
35
+
36
+ DEFAULT_SANDBOX_TIMEOUT = 6 * 3600 # 6 hours
37
+
38
+
39
+ # ============================================================================
40
+ # Workspace Defaults
41
+ # ============================================================================
42
+
43
+ DEFAULT_WORKSPACE_DIR = "./runs/dslighting"
44
+ DEFAULT_KEEP_WORKSPACE_ON_FAILURE = True
45
+ DEFAULT_KEEP_ALL_WORKSPACES = False
46
+
47
+
48
+ # ============================================================================
49
+ # Workflow Recommendations
50
+ # ============================================================================
51
+
52
+ """
53
+ Workflow recommendations based on task type and data characteristics.
54
+
55
+ This mapping helps auto-select the best workflow for a given task.
56
+ Users can override this by explicitly specifying the workflow.
57
+ """
58
+
59
+ WORKFLOW_RECOMMENDATIONS: Dict[str, Dict[str, Any]] = {
60
+ "kaggle_competition": {
61
+ "tabular": ["autokaggle", "aide"],
62
+ "time_series": ["aide", "automind"],
63
+ "default": "aide"
64
+ },
65
+ "open_ended": {
66
+ "analysis": ["deepanalyze", "automind"],
67
+ "modeling": ["aide", "deepanalyze"],
68
+ "default": "deepanalyze"
69
+ },
70
+ "quick_analysis": {
71
+ "eda": ["data_interpreter"],
72
+ "debugging": ["data_interpreter"],
73
+ "default": "data_interpreter"
74
+ },
75
+ "qa": {
76
+ "default": "aide"
77
+ },
78
+ "datasci": {
79
+ "default": "aide"
80
+ }
81
+ }
82
+
83
+
84
+ # ============================================================================
85
+ # Full Default Configuration
86
+ # ============================================================================
87
+
88
+ """
89
+ Complete default configuration structure.
90
+
91
+ This can be merged with user parameters and environment variables
92
+ to create the final DSATConfig.
93
+ """
94
+
95
+ DEFAULT_CONFIG: Dict[str, Any] = {
96
+ "llm": {
97
+ "model": DEFAULT_LLM_MODEL,
98
+ "temperature": DEFAULT_TEMPERATURE,
99
+ "max_retries": DEFAULT_MAX_RETRIES,
100
+ "api_base": DEFAULT_API_BASE,
101
+ "api_key": None, # Will be loaded from env
102
+ },
103
+ "workflow": {
104
+ "name": DEFAULT_WORKFLOW,
105
+ "params": {}
106
+ },
107
+ "run": {
108
+ "name": "dsat_run", # Use "dsat_run" to let DSATRunner auto-generate: dsat_run_{task_id}_{uid}
109
+ "total_steps": DEFAULT_MAX_ITERATIONS,
110
+ "keep_all_workspaces": DEFAULT_KEEP_ALL_WORKSPACES,
111
+ "keep_workspace_on_failure": DEFAULT_KEEP_WORKSPACE_ON_FAILURE,
112
+ "parameters": {},
113
+ },
114
+ "sandbox": {
115
+ "timeout": DEFAULT_SANDBOX_TIMEOUT,
116
+ },
117
+ "agent": {
118
+ "search": {
119
+ "num_drafts": DEFAULT_NUM_DRAFTS,
120
+ "max_iterations": DEFAULT_MAX_ITERATIONS,
121
+ "debug_prob": 0.8,
122
+ "max_debug_depth": 10,
123
+ },
124
+ "max_retries": 10,
125
+ "autokaggle": {
126
+ "max_attempts_per_phase": 10,
127
+ "success_threshold": 3.0,
128
+ }
129
+ }
130
+ }
131
+
132
+
133
+ # ============================================================================
134
+ # Environment Variable Names
135
+ # ============================================================================
136
+
137
+ """
138
+ Environment variables that DSLighting reads.
139
+
140
+ These can be set in .env file or exported in the shell.
141
+ """
142
+
143
+ ENV_API_KEY = "API_KEY"
144
+ ENV_API_BASE = "API_BASE"
145
+ ENV_LLM_MODEL = "LLM_MODEL"
146
+ ENV_LLM_PROVIDER = "LLM_PROVIDER"
147
+ ENV_LLM_MODEL_CONFIGS = "LLM_MODEL_CONFIGS"
148
+ ENV_LLM_TEMPERATURE = "LLM_TEMPERATURE"
149
+
150
+ ENV_DSLIGHTING_DEFAULT_WORKFLOW = "DSLIGHTING_DEFAULT_WORKFLOW"
151
+ ENV_DSLIGHTING_WORKSPACE_DIR = "DSLIGHTING_WORKSPACE_DIR"