dslighting 1.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsat/__init__.py +3 -0
- dsat/benchmark/__init__.py +1 -0
- dsat/benchmark/benchmark.py +168 -0
- dsat/benchmark/datasci.py +291 -0
- dsat/benchmark/mle.py +777 -0
- dsat/benchmark/sciencebench.py +304 -0
- dsat/common/__init__.py +0 -0
- dsat/common/constants.py +11 -0
- dsat/common/exceptions.py +48 -0
- dsat/common/typing.py +19 -0
- dsat/config.py +79 -0
- dsat/models/__init__.py +3 -0
- dsat/models/candidates.py +16 -0
- dsat/models/formats.py +52 -0
- dsat/models/task.py +64 -0
- dsat/operators/__init__.py +0 -0
- dsat/operators/aflow_ops.py +90 -0
- dsat/operators/autokaggle_ops.py +170 -0
- dsat/operators/automind_ops.py +38 -0
- dsat/operators/base.py +22 -0
- dsat/operators/code.py +45 -0
- dsat/operators/dsagent_ops.py +123 -0
- dsat/operators/llm_basic.py +84 -0
- dsat/prompts/__init__.py +0 -0
- dsat/prompts/aflow_prompt.py +76 -0
- dsat/prompts/aide_prompt.py +52 -0
- dsat/prompts/autokaggle_prompt.py +290 -0
- dsat/prompts/automind_prompt.py +29 -0
- dsat/prompts/common.py +51 -0
- dsat/prompts/data_interpreter_prompt.py +82 -0
- dsat/prompts/dsagent_prompt.py +88 -0
- dsat/runner.py +554 -0
- dsat/services/__init__.py +0 -0
- dsat/services/data_analyzer.py +387 -0
- dsat/services/llm.py +486 -0
- dsat/services/llm_single.py +421 -0
- dsat/services/sandbox.py +386 -0
- dsat/services/states/__init__.py +0 -0
- dsat/services/states/autokaggle_state.py +43 -0
- dsat/services/states/base.py +14 -0
- dsat/services/states/dsa_log.py +13 -0
- dsat/services/states/experience.py +237 -0
- dsat/services/states/journal.py +153 -0
- dsat/services/states/operator_library.py +290 -0
- dsat/services/vdb.py +76 -0
- dsat/services/workspace.py +178 -0
- dsat/tasks/__init__.py +3 -0
- dsat/tasks/handlers.py +376 -0
- dsat/templates/open_ended/grade_template.py +107 -0
- dsat/tools/__init__.py +4 -0
- dsat/utils/__init__.py +0 -0
- dsat/utils/context.py +172 -0
- dsat/utils/dynamic_import.py +71 -0
- dsat/utils/parsing.py +33 -0
- dsat/workflows/__init__.py +12 -0
- dsat/workflows/base.py +53 -0
- dsat/workflows/factory.py +439 -0
- dsat/workflows/manual/__init__.py +0 -0
- dsat/workflows/manual/autokaggle_workflow.py +148 -0
- dsat/workflows/manual/data_interpreter_workflow.py +153 -0
- dsat/workflows/manual/deepanalyze_workflow.py +484 -0
- dsat/workflows/manual/dsagent_workflow.py +76 -0
- dsat/workflows/search/__init__.py +0 -0
- dsat/workflows/search/aflow_workflow.py +344 -0
- dsat/workflows/search/aide_workflow.py +283 -0
- dsat/workflows/search/automind_workflow.py +237 -0
- dsat/workflows/templates/__init__.py +0 -0
- dsat/workflows/templates/basic_kaggle_loop.py +71 -0
- dslighting/__init__.py +170 -0
- dslighting/core/__init__.py +13 -0
- dslighting/core/agent.py +646 -0
- dslighting/core/config_builder.py +318 -0
- dslighting/core/data_loader.py +422 -0
- dslighting/core/task_detector.py +422 -0
- dslighting/utils/__init__.py +19 -0
- dslighting/utils/defaults.py +151 -0
- dslighting-1.3.9.dist-info/METADATA +554 -0
- dslighting-1.3.9.dist-info/RECORD +80 -0
- dslighting-1.3.9.dist-info/WHEEL +5 -0
- dslighting-1.3.9.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# dsat/workflows/manual/dsagent_workflow.py
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import difflib
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, Any
|
|
7
|
+
|
|
8
|
+
from dsat.workflows.base import DSATWorkflow
|
|
9
|
+
from dsat.services.states.dsa_log import DSAgentState
|
|
10
|
+
|
|
11
|
+
from dsat.services.sandbox import SandboxService
|
|
12
|
+
from dsat.operators.base import Operator
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DSAgentWorkflow(DSATWorkflow):
|
|
19
|
+
"""
|
|
20
|
+
Implements the core algorithmic loop of DS-Agent: Plan -> Execute -> Log.
|
|
21
|
+
This workflow now conforms to the DSATWorkflow interface.
|
|
22
|
+
"""
|
|
23
|
+
def __init__(self, operators: Dict[str, Operator], services: Dict[str, Any], agent_config: Dict[str, Any]):
|
|
24
|
+
super().__init__(operators, services, agent_config)
|
|
25
|
+
self.state: DSAgentState = services["state"]
|
|
26
|
+
self.sandbox_service: SandboxService = services["sandbox"]
|
|
27
|
+
self.planner_op = self.operators["planner"]
|
|
28
|
+
self.executor_op = self.operators["executor"]
|
|
29
|
+
self.logger_op = self.operators["logger"]
|
|
30
|
+
|
|
31
|
+
async def solve(self, description: str, io_instructions: str, data_dir: Path, output_path: Path) -> None:
|
|
32
|
+
"""
|
|
33
|
+
Use DS-Agent's Plan-Execute-Log loop...
|
|
34
|
+
"""
|
|
35
|
+
logger.info(f"DSAgentWorkflow starting to solve task. Target output: {output_path}")
|
|
36
|
+
|
|
37
|
+
self.state.running_log = "[Initial State] Starting analysis."
|
|
38
|
+
self.state.final_code = "# Basic Initialization. Analyze the data report and I/O requirements."
|
|
39
|
+
|
|
40
|
+
task_goal = description
|
|
41
|
+
current_io_instructions = io_instructions
|
|
42
|
+
|
|
43
|
+
max_iterations = self.agent_config.get("max_iterations", 2)
|
|
44
|
+
|
|
45
|
+
for step in range(max_iterations):
|
|
46
|
+
logger.info(f"--- Starting DS-Agent Solve Step {step + 1}/{max_iterations} ---")
|
|
47
|
+
|
|
48
|
+
# 1. Plan
|
|
49
|
+
plan = await self.planner_op(research_problem=task_goal, io_instructions=current_io_instructions, running_log=self.state.running_log)
|
|
50
|
+
|
|
51
|
+
# 2. Execute (refine the code)
|
|
52
|
+
initial_code = self.state.final_code
|
|
53
|
+
exec_result, refined_code = await self.executor_op(
|
|
54
|
+
initial_code=initial_code, plan=plan,
|
|
55
|
+
research_problem=task_goal, io_instructions=current_io_instructions, running_log=self.state.running_log
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
diff = "".join(difflib.unified_diff(
|
|
59
|
+
initial_code.splitlines(keepends=True),
|
|
60
|
+
refined_code.splitlines(keepends=True),
|
|
61
|
+
))
|
|
62
|
+
|
|
63
|
+
# 3. Log
|
|
64
|
+
new_log_entry = await self.logger_op(running_log=self.state.running_log, plan=plan, exec_result=exec_result, diff=diff)
|
|
65
|
+
|
|
66
|
+
self.state.running_log = new_log_entry
|
|
67
|
+
|
|
68
|
+
self.state.final_code = refined_code
|
|
69
|
+
logger.info(f"Step {step + 1} complete. Code has been refined.")
|
|
70
|
+
|
|
71
|
+
logger.info("Max iterations reached. Executing the final refined code to produce the output file...")
|
|
72
|
+
final_exec_result = self.sandbox_service.run_script(self.state.final_code)
|
|
73
|
+
|
|
74
|
+
if not final_exec_result.success:
|
|
75
|
+
logger.error(f"Final code execution failed!\\n{final_exec_result.stderr}")
|
|
76
|
+
|
|
File without changes
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
# dsat/workflows/search/aflow_workflow.py
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
import contextlib
|
|
5
|
+
import logging
|
|
6
|
+
import asyncio
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Dict, Any, Optional, Callable, Coroutine, Tuple
|
|
9
|
+
import time
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
import shutil # Import shutil
|
|
12
|
+
|
|
13
|
+
from dsat.services.states.experience import Experience
|
|
14
|
+
from dsat.services.llm import LLMService
|
|
15
|
+
from dsat.models.candidates import WorkflowCandidate
|
|
16
|
+
from dsat.prompts.aflow_prompt import get_graph_optimize_prompt, GraphOptimize
|
|
17
|
+
from dsat.workflows.templates.basic_kaggle_loop import get_initial_workflow_code
|
|
18
|
+
from dsat.operators.aflow_ops import ScEnsembleOperator, ReviewOperator, ReviseOperator
|
|
19
|
+
from dsat.utils.dynamic_import import import_workflow_from_string
|
|
20
|
+
from dsat.benchmark.benchmark import BaseBenchmark
|
|
21
|
+
from dsat.services.data_analyzer import DataAnalyzer
|
|
22
|
+
from dsat.common.exceptions import DynamicImportError # Import DynamicImportError
|
|
23
|
+
try:
|
|
24
|
+
# Preferred when run via run_benchmark.py (module aliases).
|
|
25
|
+
from mlebench.utils import get_repo_dir, import_fn, load_answers, load_yaml, read_csv
|
|
26
|
+
except ModuleNotFoundError:
|
|
27
|
+
try:
|
|
28
|
+
# Allows importing DSAT modules without relying on runtime aliasing.
|
|
29
|
+
from benchmarks.mlebench.utils import get_repo_dir, import_fn, load_answers, load_yaml, read_csv
|
|
30
|
+
except ModuleNotFoundError:
|
|
31
|
+
# mlebench not available in standalone package - define placeholders
|
|
32
|
+
def get_repo_dir():
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
return Path.cwd()
|
|
35
|
+
def import_fn(*args, **kwargs):
|
|
36
|
+
return None
|
|
37
|
+
def load_answers(*args, **kwargs):
|
|
38
|
+
return {}
|
|
39
|
+
def load_yaml(*args, **kwargs):
|
|
40
|
+
return {}
|
|
41
|
+
def read_csv(*args, **kwargs):
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
logger = logging.getLogger(__name__)
|
|
45
|
+
|
|
46
|
+
class AFlowWorkflow:
|
|
47
|
+
"""
|
|
48
|
+
AFlow workflow implements meta-optimization. It orchestrates an evolutionary
|
|
49
|
+
search to find the best workflow for a given task, using a benchmark's
|
|
50
|
+
validation set for fitness.
|
|
51
|
+
|
|
52
|
+
This class is now a dedicated optimizer, not a DSATWorkflow.
|
|
53
|
+
Its primary method is `optimize()`, which returns the best found workflow code.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(self, operators: Dict[str, Any], services: Dict[str, Any], agent_config: Dict[str, Any], benchmark: Optional[BaseBenchmark] = None):
|
|
57
|
+
# NOTE: It does not call super().__init__ as it's not a DSATWorkflow.
|
|
58
|
+
self.llm_service: LLMService = services["llm"]
|
|
59
|
+
self.workspace = services["workspace"]
|
|
60
|
+
self.sandbox_service = services["sandbox"]
|
|
61
|
+
self.experience = Experience(self.workspace)
|
|
62
|
+
self.benchmark = benchmark
|
|
63
|
+
|
|
64
|
+
self.aflow_operators = operators if operators else {
|
|
65
|
+
"ScEnsemble": ScEnsembleOperator(llm_service=self.llm_service),
|
|
66
|
+
"Review": ReviewOperator(llm_service=self.llm_service),
|
|
67
|
+
"Revise": ReviseOperator(llm_service=self.llm_service)
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
optimizer_config = agent_config.get("optimizer", {})
|
|
71
|
+
self.max_rounds = optimizer_config.get("max_rounds", 4)
|
|
72
|
+
self.validation_runs_per_candidate = optimizer_config.get("validation_runs_per_candidate", 2)
|
|
73
|
+
self.top_k_selection = optimizer_config.get("top_k_selection", 3)
|
|
74
|
+
|
|
75
|
+
def _resolve_competition_dir(self, competition_id: str) -> Path:
|
|
76
|
+
"""Resolve the competition directory without importing preparers."""
|
|
77
|
+
repo_dir = get_repo_dir()
|
|
78
|
+
dabench_root = repo_dir / "benchmarks" / "dabench" / "competitions"
|
|
79
|
+
sciencebench_root = repo_dir / "benchmarks" / "sciencebench" / "competitions"
|
|
80
|
+
legacy_root = repo_dir / "benchmarks" / "mlebench" / "competitions"
|
|
81
|
+
|
|
82
|
+
if competition_id.startswith("sciencebench-") and (sciencebench_root / competition_id).exists():
|
|
83
|
+
return sciencebench_root / competition_id
|
|
84
|
+
if competition_id.startswith("dabench-") and (dabench_root / competition_id).exists():
|
|
85
|
+
return dabench_root / competition_id
|
|
86
|
+
if (legacy_root / competition_id).exists():
|
|
87
|
+
return legacy_root / competition_id
|
|
88
|
+
if (dabench_root / competition_id).exists():
|
|
89
|
+
return dabench_root / competition_id
|
|
90
|
+
return legacy_root / competition_id
|
|
91
|
+
|
|
92
|
+
def _get_prepared_dirs(self, competition_id: str) -> tuple[Path, Path]:
|
|
93
|
+
"""
|
|
94
|
+
Prefer prepared/public_val & prepared/private_val if they exist,
|
|
95
|
+
otherwise fall back to prepared/public & prepared/private.
|
|
96
|
+
This avoids any dependency on prepare.py/prepare_val.py.
|
|
97
|
+
"""
|
|
98
|
+
base_data_dir = self.benchmark.registry.get_data_dir() # type: ignore[union-attr]
|
|
99
|
+
public_val = base_data_dir / competition_id / "prepared" / "public_val"
|
|
100
|
+
private_val = base_data_dir / competition_id / "prepared" / "private_val"
|
|
101
|
+
public = base_data_dir / competition_id / "prepared" / "public"
|
|
102
|
+
private = base_data_dir / competition_id / "prepared" / "private"
|
|
103
|
+
return (
|
|
104
|
+
public_val if public_val.exists() else public,
|
|
105
|
+
private_val if private_val.exists() else private,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def _load_competition_description(self, competition_id: str) -> str:
|
|
109
|
+
"""Load description from competition folder or legacy config path."""
|
|
110
|
+
comp_dir = self._resolve_competition_dir(competition_id)
|
|
111
|
+
desc_path = comp_dir / "description.md"
|
|
112
|
+
if desc_path.exists():
|
|
113
|
+
return desc_path.read_text()
|
|
114
|
+
config = load_yaml(comp_dir / "config.yaml")
|
|
115
|
+
legacy_desc = get_repo_dir() / config["description"]
|
|
116
|
+
if not legacy_desc.exists() and str(config["description"]).startswith("mlebench/"):
|
|
117
|
+
legacy_desc = get_repo_dir() / "benchmarks" / config["description"]
|
|
118
|
+
return legacy_desc.read_text()
|
|
119
|
+
|
|
120
|
+
def _grade_dabench_without_preparer(self, submission_path: Path, competition_id: str) -> float:
|
|
121
|
+
"""Grade a DABench submission using existing prepared answers and local grade.py."""
|
|
122
|
+
comp_dir = self._resolve_competition_dir(competition_id)
|
|
123
|
+
config = load_yaml(comp_dir / "config.yaml")
|
|
124
|
+
|
|
125
|
+
# Resolve grade function from file, regardless of legacy import strings.
|
|
126
|
+
grade_import = config["grader"]["grade_fn"]
|
|
127
|
+
module_str, fn_name = grade_import.split(":")
|
|
128
|
+
leaf = module_str.split(".")[-1] # usually "grade"
|
|
129
|
+
grade_file = comp_dir / f"{leaf}.py"
|
|
130
|
+
grade_fn = import_fn(f"file:{grade_file}:{fn_name}")
|
|
131
|
+
|
|
132
|
+
# Resolve answers path with val->test fallback.
|
|
133
|
+
_, private_dir = self._get_prepared_dirs(competition_id)
|
|
134
|
+
answers_rel = config["dataset"]["answers"]
|
|
135
|
+
answers_path = self.benchmark.registry.get_data_dir() / answers_rel # type: ignore[union-attr]
|
|
136
|
+
if private_dir.name.endswith("_val") and "/private/" in str(answers_rel):
|
|
137
|
+
answers_path = Path(str(answers_path).replace("/private/", "/private_val/"))
|
|
138
|
+
|
|
139
|
+
submission_df = read_csv(submission_path)
|
|
140
|
+
answers = load_answers(answers_path)
|
|
141
|
+
score = grade_fn(submission_df, answers)
|
|
142
|
+
return float(score) if score is not None else 0.0
|
|
143
|
+
|
|
144
|
+
async def optimize(self) -> str:
|
|
145
|
+
"""
|
|
146
|
+
Drives the entire meta-optimization process and returns the best workflow code.
|
|
147
|
+
"""
|
|
148
|
+
meta_started_at = datetime.now(timezone.utc)
|
|
149
|
+
meta_perf_start = time.perf_counter()
|
|
150
|
+
usage_before = self.llm_service.get_usage_summary()
|
|
151
|
+
best_workflow_code = ""
|
|
152
|
+
|
|
153
|
+
if not self.benchmark or not hasattr(self.benchmark, 'set_mode') or not hasattr(self.benchmark, 'grade'):
|
|
154
|
+
raise NotImplementedError(
|
|
155
|
+
f"AFlow requires a compatible benchmark with `set_mode` and `grade` methods. "
|
|
156
|
+
f"'{type(self.benchmark).__name__}' is not compatible."
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
if not self.benchmark.problems:
|
|
160
|
+
raise ValueError(f"No problems found for benchmark '{self.benchmark.name}'. AFlow cannot proceed.")
|
|
161
|
+
|
|
162
|
+
logger.info("AFlow starting meta-optimization...")
|
|
163
|
+
|
|
164
|
+
# Set benchmark to 'validation' mode for the optimization phase.
|
|
165
|
+
self.benchmark.set_mode('validation')
|
|
166
|
+
logger.info("Benchmark set to 'validation' mode for optimization.")
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
best_workflow_code = await self._run_optimization_loop()
|
|
170
|
+
return best_workflow_code
|
|
171
|
+
finally:
|
|
172
|
+
meta_ended_at = datetime.now(timezone.utc)
|
|
173
|
+
duration_seconds = round(time.perf_counter() - meta_perf_start, 4)
|
|
174
|
+
usage_after = self.llm_service.get_usage_summary()
|
|
175
|
+
meta_usage_delta = {
|
|
176
|
+
"prompt_tokens": usage_after.get("prompt_tokens", 0) - usage_before.get("prompt_tokens", 0),
|
|
177
|
+
"completion_tokens": usage_after.get("completion_tokens", 0) - usage_before.get("completion_tokens", 0),
|
|
178
|
+
"total_tokens": usage_after.get("total_tokens", 0) - usage_before.get("total_tokens", 0),
|
|
179
|
+
"total_cost": round(float(usage_after.get("total_cost", 0.0) - usage_before.get("total_cost", 0.0)), 12),
|
|
180
|
+
"call_count": usage_after.get("call_count", 0) - usage_before.get("call_count", 0),
|
|
181
|
+
}
|
|
182
|
+
self.experience.record_score(
|
|
183
|
+
-1,
|
|
184
|
+
0.0,
|
|
185
|
+
best_workflow_code or "",
|
|
186
|
+
score_type="meta_summary",
|
|
187
|
+
extra={
|
|
188
|
+
"started_at": meta_started_at.isoformat().replace("+00:00", "Z"),
|
|
189
|
+
"ended_at": meta_ended_at.isoformat().replace("+00:00", "Z"),
|
|
190
|
+
"duration_seconds": duration_seconds,
|
|
191
|
+
"usage_delta": meta_usage_delta,
|
|
192
|
+
"usage_total": usage_after,
|
|
193
|
+
},
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
async def _run_optimization_loop(self) -> str:
|
|
197
|
+
"""Manages the evolutionary loop to find the best workflow."""
|
|
198
|
+
initial_workflow_code = get_initial_workflow_code()
|
|
199
|
+
|
|
200
|
+
# Get a representative problem to use for evaluation during optimization.
|
|
201
|
+
# The original logic used only the first problem, we replicate that here.
|
|
202
|
+
representative_problem = self.benchmark.problems[0]
|
|
203
|
+
|
|
204
|
+
# We no longer generate a full data report here. The optimizer prompt will
|
|
205
|
+
# now be more generic and focus on workflow logic, not specific filenames.
|
|
206
|
+
# This prevents the optimizer from learning to hardcode "submission.csv".
|
|
207
|
+
logger.info("Starting optimization loop without a pre-generated, instance-specific data report.")
|
|
208
|
+
|
|
209
|
+
initial_fitness = await self._evaluate_workflow(initial_workflow_code, representative_problem)
|
|
210
|
+
self.experience.record_score(0, initial_fitness, initial_workflow_code)
|
|
211
|
+
logger.info(f"Initial workflow fitness: {initial_fitness:.4f}")
|
|
212
|
+
|
|
213
|
+
best_workflow_code = initial_workflow_code
|
|
214
|
+
best_fitness = initial_fitness
|
|
215
|
+
|
|
216
|
+
for round_num in range(1, self.max_rounds):
|
|
217
|
+
logger.info(f"--- AFlow Optimization Round {round_num}/{self.max_rounds-1} ---")
|
|
218
|
+
parent_candidate = self.experience.select_parent_candidate(self.top_k_selection)
|
|
219
|
+
if not parent_candidate:
|
|
220
|
+
parent_candidate = WorkflowCandidate(
|
|
221
|
+
workflow_code=initial_workflow_code,
|
|
222
|
+
fitness=initial_fitness,
|
|
223
|
+
round_num=0
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
try:
|
|
227
|
+
optimized_code, modification = await self._optimize_workflow(
|
|
228
|
+
parent_candidate.workflow_code,
|
|
229
|
+
parent_candidate.fitness or 0.0,
|
|
230
|
+
parent_candidate.round_num
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
new_fitness = await self._evaluate_workflow(optimized_code, representative_problem)
|
|
234
|
+
|
|
235
|
+
parent_round = parent_candidate.round_num if parent_candidate.round_num is not None else 0
|
|
236
|
+
self.experience.record_score(round_num, new_fitness, optimized_code)
|
|
237
|
+
self.experience.record_experience(parent_round, round_num, modification, parent_candidate.fitness or 0.0, new_fitness)
|
|
238
|
+
|
|
239
|
+
logger.info(f"Round {round_num}: {modification} -> fitness: {new_fitness:.4f}")
|
|
240
|
+
|
|
241
|
+
if new_fitness > best_fitness:
|
|
242
|
+
best_workflow_code = optimized_code
|
|
243
|
+
best_fitness = new_fitness
|
|
244
|
+
logger.info(f"New best workflow found with fitness: {best_fitness:.4f}")
|
|
245
|
+
except Exception as e:
|
|
246
|
+
logger.error(f"Error in optimization round {round_num}: {e}", exc_info=True)
|
|
247
|
+
|
|
248
|
+
return best_workflow_code
|
|
249
|
+
|
|
250
|
+
async def _optimize_workflow(self, parent_code: str, parent_score: float, parent_round_num: Optional[int]) -> tuple[str, str]:
|
|
251
|
+
"""Generates an optimized workflow using an LLM."""
|
|
252
|
+
experience_str = self.experience.get_experience_summary(parent_round_num)
|
|
253
|
+
|
|
254
|
+
optimize_prompt = get_graph_optimize_prompt(
|
|
255
|
+
experience=experience_str,
|
|
256
|
+
score=parent_score,
|
|
257
|
+
graph_code=parent_code,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
response = await self.llm_service.call_with_json(optimize_prompt, output_model=GraphOptimize)
|
|
261
|
+
return response.graph, response.modification
|
|
262
|
+
|
|
263
|
+
async def _evaluate_workflow(self, workflow_code: str, problem: Dict) -> float:
|
|
264
|
+
"""
|
|
265
|
+
Evaluates a single candidate workflow on a single representative problem
|
|
266
|
+
and returns its fitness score. This is the core fitness function.
|
|
267
|
+
"""
|
|
268
|
+
scores = []
|
|
269
|
+
|
|
270
|
+
# Get description and data_dir from the representative problem.
|
|
271
|
+
# This assumes a Kaggle-like task handled by mle.py.
|
|
272
|
+
competition_id = problem.get("competition_id")
|
|
273
|
+
if not competition_id:
|
|
274
|
+
raise ValueError("Representative problem for AFlow must have a 'competition_id'.")
|
|
275
|
+
|
|
276
|
+
# Avoid registry.get_competition here to prevent importing prepare/prepare_val.
|
|
277
|
+
raw_description = self._load_competition_description(competition_id)
|
|
278
|
+
public_dir, _ = self._get_prepared_dirs(competition_id)
|
|
279
|
+
data_dir = public_dir.absolute()
|
|
280
|
+
|
|
281
|
+
analyzer = DataAnalyzer()
|
|
282
|
+
|
|
283
|
+
# 1. Perform static analysis only ONCE.
|
|
284
|
+
base_report = analyzer.analyze_data(data_dir, task_type="kaggle")
|
|
285
|
+
|
|
286
|
+
for i in range(self.validation_runs_per_candidate):
|
|
287
|
+
unique_id = uuid.uuid4().hex[:6]
|
|
288
|
+
temp_output_filename = f"validation_submission_{i}_{unique_id}.csv"
|
|
289
|
+
temp_output_path = self.workspace.get_path("artifacts") / temp_output_filename
|
|
290
|
+
|
|
291
|
+
try:
|
|
292
|
+
io_instructions = analyzer.generate_io_instructions(temp_output_path.name, optimization_context=False)
|
|
293
|
+
|
|
294
|
+
# 3. Combine the raw description and cached base report. (IO instructions are passed separately now)
|
|
295
|
+
description = f"{raw_description}\n{base_report}"
|
|
296
|
+
|
|
297
|
+
# 4. Setup the environment
|
|
298
|
+
self.workspace.link_data_to_workspace(data_dir)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
# 5. Import the workflow, handling potential errors
|
|
302
|
+
try:
|
|
303
|
+
workflow_class = import_workflow_from_string(workflow_code)
|
|
304
|
+
except DynamicImportError as e:
|
|
305
|
+
logger.warning(f"Workflow evaluation run {i+1} failed due to invalid code: {e}")
|
|
306
|
+
scores.append(0.0)
|
|
307
|
+
continue
|
|
308
|
+
|
|
309
|
+
# The dynamically created workflow requires these services.
|
|
310
|
+
instance_services = {
|
|
311
|
+
"llm": self.llm_service,
|
|
312
|
+
"sandbox": self.sandbox_service,
|
|
313
|
+
"workspace": self.workspace
|
|
314
|
+
}
|
|
315
|
+
# It also needs a set of operators to choose from.
|
|
316
|
+
instance = workflow_class(operators=self.aflow_operators, services=instance_services, agent_config={})
|
|
317
|
+
|
|
318
|
+
await instance.solve(description, io_instructions, data_dir, temp_output_path)
|
|
319
|
+
|
|
320
|
+
sandbox_workdir = self.workspace.get_path("sandbox_workdir")
|
|
321
|
+
generated_file = sandbox_workdir / temp_output_path.name
|
|
322
|
+
|
|
323
|
+
if generated_file.exists():
|
|
324
|
+
# ... (copy logic)
|
|
325
|
+
if generated_file.resolve() != temp_output_path.resolve():
|
|
326
|
+
shutil.copy(generated_file, temp_output_path)
|
|
327
|
+
|
|
328
|
+
# Grade without depending on preparers for DABench tasks.
|
|
329
|
+
if competition_id.startswith("dabench-"):
|
|
330
|
+
score = self._grade_dabench_without_preparer(temp_output_path, competition_id)
|
|
331
|
+
else:
|
|
332
|
+
score = await self.benchmark.grade(temp_output_path)
|
|
333
|
+
scores.append(score)
|
|
334
|
+
|
|
335
|
+
except Exception as e:
|
|
336
|
+
logger.warning(f"Workflow evaluation run {i+1} failed: {e}", exc_info=False)
|
|
337
|
+
scores.append(0.0)
|
|
338
|
+
|
|
339
|
+
finally:
|
|
340
|
+
if temp_output_path.exists():
|
|
341
|
+
with contextlib.suppress(OSError):
|
|
342
|
+
temp_output_path.unlink()
|
|
343
|
+
return sum(scores) / len(scores) if scores else 0.0
|
|
344
|
+
|