dslighting 1.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. dsat/__init__.py +3 -0
  2. dsat/benchmark/__init__.py +1 -0
  3. dsat/benchmark/benchmark.py +168 -0
  4. dsat/benchmark/datasci.py +291 -0
  5. dsat/benchmark/mle.py +777 -0
  6. dsat/benchmark/sciencebench.py +304 -0
  7. dsat/common/__init__.py +0 -0
  8. dsat/common/constants.py +11 -0
  9. dsat/common/exceptions.py +48 -0
  10. dsat/common/typing.py +19 -0
  11. dsat/config.py +79 -0
  12. dsat/models/__init__.py +3 -0
  13. dsat/models/candidates.py +16 -0
  14. dsat/models/formats.py +52 -0
  15. dsat/models/task.py +64 -0
  16. dsat/operators/__init__.py +0 -0
  17. dsat/operators/aflow_ops.py +90 -0
  18. dsat/operators/autokaggle_ops.py +170 -0
  19. dsat/operators/automind_ops.py +38 -0
  20. dsat/operators/base.py +22 -0
  21. dsat/operators/code.py +45 -0
  22. dsat/operators/dsagent_ops.py +123 -0
  23. dsat/operators/llm_basic.py +84 -0
  24. dsat/prompts/__init__.py +0 -0
  25. dsat/prompts/aflow_prompt.py +76 -0
  26. dsat/prompts/aide_prompt.py +52 -0
  27. dsat/prompts/autokaggle_prompt.py +290 -0
  28. dsat/prompts/automind_prompt.py +29 -0
  29. dsat/prompts/common.py +51 -0
  30. dsat/prompts/data_interpreter_prompt.py +82 -0
  31. dsat/prompts/dsagent_prompt.py +88 -0
  32. dsat/runner.py +554 -0
  33. dsat/services/__init__.py +0 -0
  34. dsat/services/data_analyzer.py +387 -0
  35. dsat/services/llm.py +486 -0
  36. dsat/services/llm_single.py +421 -0
  37. dsat/services/sandbox.py +386 -0
  38. dsat/services/states/__init__.py +0 -0
  39. dsat/services/states/autokaggle_state.py +43 -0
  40. dsat/services/states/base.py +14 -0
  41. dsat/services/states/dsa_log.py +13 -0
  42. dsat/services/states/experience.py +237 -0
  43. dsat/services/states/journal.py +153 -0
  44. dsat/services/states/operator_library.py +290 -0
  45. dsat/services/vdb.py +76 -0
  46. dsat/services/workspace.py +178 -0
  47. dsat/tasks/__init__.py +3 -0
  48. dsat/tasks/handlers.py +376 -0
  49. dsat/templates/open_ended/grade_template.py +107 -0
  50. dsat/tools/__init__.py +4 -0
  51. dsat/utils/__init__.py +0 -0
  52. dsat/utils/context.py +172 -0
  53. dsat/utils/dynamic_import.py +71 -0
  54. dsat/utils/parsing.py +33 -0
  55. dsat/workflows/__init__.py +12 -0
  56. dsat/workflows/base.py +53 -0
  57. dsat/workflows/factory.py +439 -0
  58. dsat/workflows/manual/__init__.py +0 -0
  59. dsat/workflows/manual/autokaggle_workflow.py +148 -0
  60. dsat/workflows/manual/data_interpreter_workflow.py +153 -0
  61. dsat/workflows/manual/deepanalyze_workflow.py +484 -0
  62. dsat/workflows/manual/dsagent_workflow.py +76 -0
  63. dsat/workflows/search/__init__.py +0 -0
  64. dsat/workflows/search/aflow_workflow.py +344 -0
  65. dsat/workflows/search/aide_workflow.py +283 -0
  66. dsat/workflows/search/automind_workflow.py +237 -0
  67. dsat/workflows/templates/__init__.py +0 -0
  68. dsat/workflows/templates/basic_kaggle_loop.py +71 -0
  69. dslighting/__init__.py +170 -0
  70. dslighting/core/__init__.py +13 -0
  71. dslighting/core/agent.py +646 -0
  72. dslighting/core/config_builder.py +318 -0
  73. dslighting/core/data_loader.py +422 -0
  74. dslighting/core/task_detector.py +422 -0
  75. dslighting/utils/__init__.py +19 -0
  76. dslighting/utils/defaults.py +151 -0
  77. dslighting-1.3.9.dist-info/METADATA +554 -0
  78. dslighting-1.3.9.dist-info/RECORD +80 -0
  79. dslighting-1.3.9.dist-info/WHEEL +5 -0
  80. dslighting-1.3.9.dist-info/top_level.txt +2 -0
@@ -0,0 +1,237 @@
1
+ """
2
+ Implements Experience, which manages the state of a meta-optimization process.
3
+ This is the core state representation for Paradigm 3 (AFlow-style) evolutionary search.
4
+ """
5
+ import json
6
+ import logging
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+ from typing import Optional, List, Any, Dict, Tuple
10
+
11
+ import numpy as np
12
+
13
+ from dsat.models.candidates import WorkflowCandidate
14
+ from dsat.services.states.base import State
15
+ from dsat.services.workspace import WorkspaceService
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ class Experience(State):
20
+ """
21
+ Acts as the database for the meta-optimizer. It saves and loads
22
+ workflow scores and modification history to guide the search process,
23
+ persisting state to the filesystem within the run's workspace.
24
+ """
25
+ def __init__(self, workspace: WorkspaceService):
26
+ self.workspace = workspace
27
+ # Define paths within the managed workspace
28
+ self.scores_file = workspace.get_path("state") / "scores.jsonl"
29
+ self.experience_file = workspace.get_path("state") / "experience.json"
30
+ self.candidates_dir = workspace.get_path("candidates")
31
+
32
+ # Initialize state files if they don't exist
33
+ self.scores_file.touch()
34
+ if not self.experience_file.exists():
35
+ with open(self.experience_file, 'w') as f:
36
+ json.dump({}, f)
37
+
38
+ def _load_all_candidates(self) -> List[WorkflowCandidate]:
39
+ """Loads all recorded candidates from the scores file."""
40
+ candidates = []
41
+ if not self.scores_file.exists() or self.scores_file.stat().st_size == 0:
42
+ return []
43
+
44
+ with open(self.scores_file, "r", encoding="utf-8") as f:
45
+ for line in f:
46
+ try:
47
+ data = json.loads(line)
48
+ score_type = data.get("score_type", "fitness")
49
+ if score_type not in {"fitness", "fine"}:
50
+ continue
51
+ code_path = Path(data['code_path'])
52
+ if code_path.exists():
53
+ with open(code_path, "r", encoding="utf-8") as code_file:
54
+ code = code_file.read()
55
+ candidates.append(
56
+ WorkflowCandidate(
57
+ workflow_code=code,
58
+ fitness=data['fitness'],
59
+ round_num=data.get('round')
60
+ )
61
+ )
62
+ except (json.JSONDecodeError, KeyError) as e:
63
+ logger.warning(f"Skipping malformed line in scores.jsonl: {e}")
64
+ return candidates
65
+
66
+ def get_experience_summary(self, parent_round_num: Optional[int]) -> str:
67
+ """
68
+ Loads and formats the experience log for a specific parent candidate.
69
+ """
70
+ if parent_round_num is None:
71
+ parent_round_num = -1
72
+
73
+ if not self.experience_file.exists():
74
+ return "Experience log not found."
75
+
76
+ with open(self.experience_file, "r", encoding="utf-8") as f:
77
+ try:
78
+ all_experience = json.load(f)
79
+ except json.JSONDecodeError:
80
+ return "Could not parse experience log."
81
+
82
+ if not isinstance(all_experience, dict):
83
+ return "Could not parse experience log."
84
+
85
+ def _coerce_list(value: Any) -> list[dict]:
86
+ if not isinstance(value, list):
87
+ return []
88
+ out: list[dict] = []
89
+ for item in value:
90
+ if isinstance(item, dict):
91
+ out.append(item)
92
+ return out
93
+
94
+ summary_lines = []
95
+ if parent_round_num >= 0:
96
+ summary_lines.append(f"History of modifications for parent from round {parent_round_num}:")
97
+ else:
98
+ summary_lines.append("History of modifications (no specific parent selected):")
99
+
100
+ parent_key = str(parent_round_num)
101
+ parent_exp = all_experience.get(parent_key) if isinstance(all_experience.get(parent_key), dict) else {}
102
+ parent_success = _coerce_list(parent_exp.get("success"))
103
+ parent_failure = _coerce_list(parent_exp.get("failure"))
104
+
105
+ summary_lines.append("\n### Successful Modifications:")
106
+ if parent_success:
107
+ for mod in parent_success:
108
+ child = mod.get("child_round")
109
+ score_after = float(mod.get("score_after", 0.0) or 0.0)
110
+ delta = mod.get("delta")
111
+ delta_str = ""
112
+ if isinstance(delta, (int, float)):
113
+ delta_str = f", Δ={float(delta):+.4f}"
114
+ summary_lines.append(
115
+ f"- (Child Round {child}, New Score: {score_after:.4f}{delta_str}) {mod.get('modification','')}"
116
+ )
117
+ else:
118
+ summary_lines.append("- (none yet)")
119
+
120
+ summary_lines.append("\n### Failed Modifications:")
121
+ if parent_failure:
122
+ for mod in parent_failure:
123
+ child = mod.get("child_round")
124
+ score_after = float(mod.get("score_after", 0.0) or 0.0)
125
+ delta = mod.get("delta")
126
+ delta_str = ""
127
+ if isinstance(delta, (int, float)):
128
+ delta_str = f", Δ={float(delta):+.4f}"
129
+ summary_lines.append(
130
+ f"- (Child Round {child}, New Score: {score_after:.4f}{delta_str}) {mod.get('modification','')}"
131
+ )
132
+ else:
133
+ summary_lines.append("- (none yet)")
134
+
135
+ # Add global successful examples to give the optimizer concrete positive patterns,
136
+ # even when the selected parent has no successes yet.
137
+ global_success: list[Tuple[float, str, dict]] = []
138
+ for pkey, pexp in all_experience.items():
139
+ if not isinstance(pexp, dict):
140
+ continue
141
+ for mod in _coerce_list(pexp.get("success")):
142
+ try:
143
+ score_after = float(mod.get("score_after", 0.0) or 0.0)
144
+ except Exception:
145
+ score_after = 0.0
146
+ global_success.append((score_after, str(pkey), mod))
147
+
148
+ global_success.sort(key=lambda t: t[0], reverse=True)
149
+ top_global = global_success[:5]
150
+ summary_lines.append("\n### Successful Examples (Global Top-5):")
151
+ if top_global:
152
+ for score_after, pkey, mod in top_global:
153
+ child = mod.get("child_round")
154
+ delta = mod.get("delta")
155
+ delta_str = ""
156
+ if isinstance(delta, (int, float)):
157
+ delta_str = f", Δ={float(delta):+.4f}"
158
+ summary_lines.append(
159
+ f"- (Parent {pkey} → Child {child}, Score: {score_after:.4f}{delta_str}) {mod.get('modification','')}"
160
+ )
161
+ else:
162
+ summary_lines.append("- (none yet)")
163
+
164
+ return "\n".join(summary_lines)
165
+
166
+ def select_parent_candidate(self, top_k: int) -> Optional[WorkflowCandidate]:
167
+ """
168
+ Selects a parent candidate using a softmax probability distribution over the
169
+ top_k best-performing unique candidates, balancing exploration and exploitation.
170
+ """
171
+ all_candidates = self._load_all_candidates()
172
+ if not all_candidates:
173
+ return None
174
+
175
+ # Sort by fitness (higher is better) and take the top k
176
+ sorted_candidates = sorted(all_candidates, key=lambda c: c.fitness or -1.0, reverse=True)
177
+ top_candidates = sorted_candidates[:top_k]
178
+
179
+ if not top_candidates:
180
+ return None
181
+
182
+ fitness_scores = np.array([c.fitness for c in top_candidates])
183
+ # Softmax probabilities: e^score / sum(e^scores)
184
+ probabilities = np.exp(fitness_scores) / np.sum(np.exp(fitness_scores))
185
+
186
+ return np.random.choice(top_candidates, p=probabilities)
187
+
188
+ def record_score(
189
+ self,
190
+ round_num: int,
191
+ fitness: float,
192
+ code: str,
193
+ *,
194
+ score_type: str = "fitness",
195
+ extra: Optional[Dict[str, Any]] = None,
196
+ ) -> None:
197
+ """Saves the workflow code and appends its score to the log."""
198
+ candidate_code_path = self.candidates_dir / f"round_{round_num}_workflow.py"
199
+ with open(candidate_code_path, "w", encoding="utf-8") as f:
200
+ f.write(code)
201
+
202
+ with open(self.scores_file, "a", encoding="utf-8") as f:
203
+ payload: Dict[str, Any] = {
204
+ "round": round_num,
205
+ "fitness": fitness,
206
+ "code_path": str(candidate_code_path),
207
+ "score_type": str(score_type or "fitness"),
208
+ }
209
+ if extra:
210
+ payload["extra"] = extra
211
+ f.write(json.dumps(payload) + "\n")
212
+
213
+ def record_experience(self, parent_round: int, child_round: int, modification: str, score_before: float, score_after: float):
214
+ """Records the outcome of a modification attempt in the experience log."""
215
+ with open(self.experience_file, 'r+') as f:
216
+ data = json.load(f)
217
+ parent_key = str(parent_round)
218
+ if parent_key not in data:
219
+ data[parent_key] = {"success": [], "failure": []}
220
+
221
+ outcome = {
222
+ "child_round": child_round,
223
+ "modification": modification,
224
+ "score_before": float(score_before),
225
+ "score_after": score_after,
226
+ "delta": float(score_after) - float(score_before),
227
+ "recorded_at": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
228
+ }
229
+
230
+ if score_after > score_before:
231
+ data[parent_key]["success"].append(outcome)
232
+ else:
233
+ data[parent_key]["failure"].append(outcome)
234
+
235
+ f.seek(0)
236
+ json.dump(data, f, indent=4)
237
+ f.truncate()
@@ -0,0 +1,153 @@
1
+ # dsat/services/states/journal.py
2
+
3
+ """
4
+ Implements JournalState, which manages a tree of solution attempts (Nodes).
5
+ This is the core state representation for Paradigm 2 (AIDE/AutoMind-style) search agents.
6
+ """
7
+ import uuid
8
+ from functools import total_ordering
9
+ from typing import Optional, Any, List, Dict, Set
10
+
11
+ from pydantic import BaseModel, Field
12
+
13
+ from dsat.common.typing import ExecutionResult
14
+ from dsat.utils.context import truncate_output
15
+ from dsat.services.states.base import State
16
+
17
+
18
+ @total_ordering
19
+ class MetricValue(BaseModel):
20
+ """
21
+ Represents a comparable metric that can be configured for maximization or minimization.
22
+ A value of None is considered worse than any numeric value.
23
+ """
24
+ value: Optional[float]
25
+ maximize: bool = True
26
+
27
+ def __gt__(self, other: "MetricValue") -> bool:
28
+ if self.value is None:
29
+ return False
30
+ if other.value is None:
31
+ return True
32
+ return (self.value > other.value) if self.maximize else (self.value < other.value)
33
+
34
+ def __eq__(self, other: Any) -> bool:
35
+ return isinstance(other, MetricValue) and self.value == other.value
36
+
37
+ def __str__(self) -> str:
38
+ direction = "↑" if self.maximize else "↓"
39
+ val_str = f"{self.value:.4f}" if self.value is not None else "N/A"
40
+ return f"Metric{direction}({val_str})"
41
+
42
+ class Node(BaseModel):
43
+ """
44
+ Represents a single attempt or node in the solution search tree.
45
+ Each node contains the code, plan, execution results, and review analysis.
46
+ """
47
+ code: str
48
+ plan: str
49
+
50
+ id: str = Field(default_factory=lambda: uuid.uuid4().hex)
51
+ parent_id: Optional[str] = None
52
+ children_ids: Set[str] = Field(default_factory=set)
53
+
54
+ # Execution Results
55
+ term_out: str = ""
56
+ exec_time: float = 0.0
57
+ exc_type: Optional[str] = None
58
+ exec_metadata: Dict[str, Any] = Field(default_factory=dict)
59
+
60
+ # LLM Recordings
61
+ task_context: Dict[str, Any] = Field(default_factory=dict)
62
+ generate_prompt: Optional[str] = None
63
+ llm_generate: Optional[Dict[str, Any]] = None
64
+ review_context: Optional[Dict[str, Any]] = None
65
+ llm_review: Optional[Dict[str, Any]] = None
66
+
67
+ # Review Results
68
+ analysis: str = ""
69
+ metric: MetricValue = Field(default_factory=lambda: MetricValue(value=None))
70
+ is_buggy: bool = True
71
+ step: int = -1
72
+
73
+ # Artifact paths
74
+ code_artifact_path: Optional[str] = None
75
+ final_submission_path: Optional[str] = None
76
+
77
+ def absorb_exec_result(self, exec_result: ExecutionResult):
78
+ """Updates the node with the results from a sandbox execution."""
79
+ stdout = exec_result.stdout or ""
80
+ stderr = exec_result.stderr or ""
81
+ combined_output = f"STDOUT:\n{stdout}\n\nSTDERR:\n{stderr}".strip()
82
+ self.term_out = truncate_output(combined_output)
83
+ self.exc_type = exec_result.exc_type
84
+ self.is_buggy = not exec_result.success
85
+ self.exec_metadata = exec_result.metadata or {}
86
+
87
+ class Config:
88
+ """Pydantic configuration."""
89
+ json_encoders = {set: list} # Allow sets to be serialized to lists in JSON
90
+
91
+ class JournalState(State, BaseModel):
92
+ """
93
+ Manages the entire search tree (the "Journal") of solution nodes.
94
+ Provides methods for appending nodes, traversing the tree, and selecting
95
+ nodes based on different criteria (e.g., best, buggy).
96
+ """
97
+ nodes: Dict[str, Node] = Field(default_factory=dict)
98
+
99
+ def __len__(self) -> int:
100
+ return len(self.nodes)
101
+
102
+ def append(self, node: Node, parent: Optional[Node] = None):
103
+ """Adds a new node to the journal, linking it to a parent if provided."""
104
+ if parent:
105
+ if parent.id not in self.nodes:
106
+ raise ValueError(f"Parent node with id {parent.id} not in journal.")
107
+ node.parent_id = parent.id
108
+ self.nodes[parent.id].children_ids.add(node.id)
109
+ node.step = len(self)
110
+ self.nodes[node.id] = node
111
+
112
+ def get_node(self, node_id: str) -> Optional[Node]:
113
+ """Retrieves a node by its ID."""
114
+ return self.nodes.get(node_id)
115
+
116
+ def get_best_node(self) -> Optional[Node]:
117
+ """Finds the best-performing, non-buggy node in the entire journal."""
118
+ good_nodes = [n for n in self.nodes.values() if not n.is_buggy]
119
+ if not good_nodes:
120
+ return None
121
+ return max(good_nodes, key=lambda n: n.metric)
122
+
123
+ def generate_summary(self, max_nodes: int = 3) -> str:
124
+ """
125
+ Creates a textual summary of successful past attempts for prompt context.
126
+ MODIFIED: Now selects the `max_nodes` BEST performing successful attempts.
127
+ """
128
+ good_nodes = sorted(
129
+ [n for n in self.nodes.values() if not n.is_buggy and n.metric.value is not None],
130
+ key=lambda x: x.metric,
131
+ reverse=True # MetricValue handles > comparison correctly, so reverse=True gets the best
132
+ )
133
+ if not good_nodes:
134
+ return "No successful solutions have been found yet."
135
+
136
+ # Apply windowing: take the top `max_nodes`
137
+ selected_nodes = good_nodes[:max_nodes]
138
+
139
+ summary_parts = []
140
+ for n in selected_nodes:
141
+ summary_part = (
142
+ f"Attempt #{n.step}:\n"
143
+ f"Plan: {n.plan}\n"
144
+ f"Result Analysis: {n.analysis}\n"
145
+ f"Validation Metric: {n.metric}\n"
146
+ )
147
+ summary_parts.append(summary_part)
148
+
149
+ prefix = ""
150
+ if len(good_nodes) > len(selected_nodes):
151
+ prefix = f"[... {len(good_nodes) - len(selected_nodes)} other successful attempts exist ...]\n"
152
+
153
+ return prefix + "Here is a summary of the best performing attempts:\n" + "\n------------------\n".join(summary_parts)
@@ -0,0 +1,290 @@
1
+ """
2
+ Persistent library for discovered operators.
3
+
4
+ This stores LLM-proposed Operator code + metadata across runs, and tracks basic
5
+ usage/success statistics to support simple operator-level selection.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import hashlib
11
+ import json
12
+ import math
13
+ from dataclasses import dataclass
14
+ from datetime import datetime, timezone
15
+ from pathlib import Path
16
+ from typing import Any, Dict, Iterable, List, Optional, Tuple
17
+
18
+
19
+ def _utc_now_iso() -> str:
20
+ return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
21
+
22
+
23
+ def _safe_int(value: Any, default: int = 0) -> int:
24
+ try:
25
+ return int(value)
26
+ except Exception:
27
+ return default
28
+
29
+
30
+ @dataclass(frozen=True)
31
+ class LibraryOperatorSpec:
32
+ name: str
33
+ version: int
34
+ code: str
35
+ description: str
36
+ inputs: str
37
+ outputs: str
38
+ triggers: str
39
+ task_types: List[str]
40
+ uses: int
41
+ successes: int
42
+ failures: int
43
+
44
+
45
+ class OperatorLibrary:
46
+ def __init__(self, path: Path):
47
+ self.path = path
48
+ self._data: Dict[str, Any] = {"operators": {}}
49
+ self._load()
50
+
51
+ def _load(self) -> None:
52
+ if not self.path.exists():
53
+ self._data = {"operators": {}}
54
+ return
55
+ try:
56
+ payload = json.loads(self.path.read_text(encoding="utf-8"))
57
+ except Exception:
58
+ self._data = {"operators": {}}
59
+ return
60
+
61
+ if not isinstance(payload, dict) or not isinstance(payload.get("operators"), dict):
62
+ self._data = {"operators": {}}
63
+ return
64
+ self._data = payload
65
+
66
+ def _save(self) -> None:
67
+ self.path.parent.mkdir(parents=True, exist_ok=True)
68
+ tmp = self.path.with_suffix(self.path.suffix + ".tmp")
69
+ tmp.write_text(
70
+ json.dumps(self._data, ensure_ascii=False, indent=2, sort_keys=True),
71
+ encoding="utf-8",
72
+ )
73
+ tmp.replace(self.path)
74
+
75
+ def has(self, name: str) -> bool:
76
+ return name in self._data.get("operators", {})
77
+
78
+ def get_best_version(self, name: str) -> Optional[LibraryOperatorSpec]:
79
+ """
80
+ Return the best available version for an operator name.
81
+
82
+ Selection is based on (success_rate, uses, version) in descending order.
83
+ Returns None if the operator is missing or has no usable versions.
84
+ """
85
+ operators = self._data.get("operators", {})
86
+ if not isinstance(operators, dict):
87
+ return None
88
+
89
+ record = operators.get(name)
90
+ if not isinstance(record, dict):
91
+ return None
92
+ versions = record.get("versions", [])
93
+ if not isinstance(versions, list) or not versions:
94
+ return None
95
+
96
+ best: Optional[Dict[str, Any]] = None
97
+ best_key: Optional[tuple[float, int, int]] = None
98
+ best_version = 1
99
+
100
+ for v in versions:
101
+ if not isinstance(v, dict):
102
+ continue
103
+ code = str(v.get("code") or "")
104
+ if not code.strip():
105
+ continue
106
+ uses = _safe_int(v.get("uses"), 0)
107
+ successes = _safe_int(v.get("successes"), 0)
108
+ version = _safe_int(v.get("version"), 1)
109
+ success_rate = successes / uses if uses > 0 else 0.0
110
+ key = (float(success_rate), int(uses), int(version))
111
+ if best_key is None or key > best_key:
112
+ best = v
113
+ best_key = key
114
+ best_version = version
115
+
116
+ if best is None:
117
+ return None
118
+
119
+ return LibraryOperatorSpec(
120
+ name=str(name),
121
+ version=int(best_version),
122
+ code=str(best.get("code") or ""),
123
+ description=str(best.get("description") or "").strip(),
124
+ inputs=str(best.get("inputs") or "").strip(),
125
+ outputs=str(best.get("outputs") or "").strip(),
126
+ triggers=str(best.get("triggers") or "").strip(),
127
+ task_types=list(best.get("task_types") or []),
128
+ uses=_safe_int(best.get("uses"), 0),
129
+ successes=_safe_int(best.get("successes"), 0),
130
+ failures=_safe_int(best.get("failures"), 0),
131
+ )
132
+
133
+ def add_version(
134
+ self,
135
+ name: str,
136
+ *,
137
+ code: str,
138
+ description: str,
139
+ inputs: str = "",
140
+ outputs: str = "",
141
+ triggers: str = "",
142
+ task_types: Optional[Iterable[str]] = None,
143
+ ) -> int:
144
+ operators = self._data.setdefault("operators", {})
145
+ record = operators.setdefault(name, {"versions": []})
146
+ versions: List[Dict[str, Any]] = record.setdefault("versions", [])
147
+
148
+ code_hash = hashlib.sha256(code.encode("utf-8")).hexdigest()[:16]
149
+ for v in versions:
150
+ if v.get("code_hash") == code_hash:
151
+ return _safe_int(v.get("version"), default=1)
152
+
153
+ next_version = 1 + max((_safe_int(v.get("version"), 0) for v in versions), default=0)
154
+ versions.append(
155
+ {
156
+ "version": next_version,
157
+ "code_hash": code_hash,
158
+ "code": code,
159
+ "description": (description or "").strip(),
160
+ "inputs": (inputs or "").strip(),
161
+ "outputs": (outputs or "").strip(),
162
+ "triggers": (triggers or "").strip(),
163
+ "task_types": [t for t in (task_types or []) if isinstance(t, str) and t.strip()],
164
+ "created_at": _utc_now_iso(),
165
+ "uses": 0,
166
+ "successes": 0,
167
+ "failures": 0,
168
+ "seen_competition_ids": [],
169
+ }
170
+ )
171
+ self._save()
172
+ return next_version
173
+
174
+ def record_outcome(
175
+ self,
176
+ name: str,
177
+ version: int,
178
+ *,
179
+ success: bool,
180
+ competition_ids: Optional[Iterable[str]] = None,
181
+ ) -> None:
182
+ operators = self._data.get("operators", {})
183
+ record = operators.get(name)
184
+ if not isinstance(record, dict):
185
+ return
186
+ versions = record.get("versions", [])
187
+ if not isinstance(versions, list):
188
+ return
189
+
190
+ for v in versions:
191
+ if _safe_int(v.get("version"), -1) != int(version):
192
+ continue
193
+ v["uses"] = _safe_int(v.get("uses"), 0) + 1
194
+ if success:
195
+ v["successes"] = _safe_int(v.get("successes"), 0) + 1
196
+ v["last_success_at"] = _utc_now_iso()
197
+ else:
198
+ v["failures"] = _safe_int(v.get("failures"), 0) + 1
199
+ v["last_used_at"] = _utc_now_iso()
200
+
201
+ if competition_ids:
202
+ seen = set(v.get("seen_competition_ids") or [])
203
+ for cid in competition_ids:
204
+ if isinstance(cid, str) and cid:
205
+ seen.add(cid)
206
+ v["seen_competition_ids"] = sorted(seen)
207
+
208
+ self._save()
209
+ return
210
+
211
+ def select_for_prompt(
212
+ self,
213
+ max_ops: int,
214
+ *,
215
+ competition_ids: Optional[Iterable[str]] = None,
216
+ task_types: Optional[Iterable[str]] = None,
217
+ ) -> List[LibraryOperatorSpec]:
218
+ """
219
+ Select up to `max_ops` operators (best version per operator) using a simple UCB score.
220
+ Adds a small bonus when an operator has succeeded on the same competition_ids or task_types.
221
+ """
222
+ operators = self._data.get("operators", {})
223
+ if not isinstance(operators, dict) or max_ops <= 0:
224
+ return []
225
+
226
+ preferred_competitions = {
227
+ cid for cid in (competition_ids or []) if isinstance(cid, str) and cid.strip()
228
+ }
229
+ preferred_task_types = {
230
+ t.strip().lower() for t in (task_types or []) if isinstance(t, str) and t.strip()
231
+ }
232
+
233
+ all_versions: List[Tuple[str, Dict[str, Any]]] = []
234
+ for name, record in operators.items():
235
+ if not isinstance(record, dict):
236
+ continue
237
+ for v in record.get("versions", []) or []:
238
+ if isinstance(v, dict):
239
+ all_versions.append((name, v))
240
+
241
+ total_uses = sum(_safe_int(v.get("uses"), 0) for _, v in all_versions)
242
+ total_uses = max(1, total_uses)
243
+
244
+ best_by_name: Dict[str, Tuple[float, Dict[str, Any]]] = {}
245
+ for name, v in all_versions:
246
+ uses = _safe_int(v.get("uses"), 0)
247
+ successes = _safe_int(v.get("successes"), 0)
248
+ mean = successes / max(1, uses)
249
+ ucb = mean + math.sqrt(2.0 * math.log(total_uses + 1.0) / (uses + 1.0))
250
+
251
+ bonus = 0.0
252
+ if preferred_competitions:
253
+ seen = {c for c in (v.get("seen_competition_ids") or []) if isinstance(c, str) and c}
254
+ if seen & preferred_competitions:
255
+ bonus += 0.25
256
+ if preferred_task_types:
257
+ vtypes = {
258
+ t.strip().lower()
259
+ for t in (v.get("task_types") or [])
260
+ if isinstance(t, str) and t.strip()
261
+ }
262
+ if vtypes & preferred_task_types:
263
+ bonus += 0.15
264
+
265
+ score = ucb + bonus
266
+ prev = best_by_name.get(name)
267
+ if prev is None or score > prev[0]:
268
+ best_by_name[name] = (score, v)
269
+
270
+ ranked = sorted(best_by_name.items(), key=lambda kv: kv[1][0], reverse=True)
271
+ selected = ranked[:max_ops]
272
+
273
+ specs: List[LibraryOperatorSpec] = []
274
+ for name, (_score, v) in selected:
275
+ specs.append(
276
+ LibraryOperatorSpec(
277
+ name=name,
278
+ version=_safe_int(v.get("version"), 1),
279
+ code=str(v.get("code") or ""),
280
+ description=str(v.get("description") or "").strip(),
281
+ inputs=str(v.get("inputs") or "").strip(),
282
+ outputs=str(v.get("outputs") or "").strip(),
283
+ triggers=str(v.get("triggers") or "").strip(),
284
+ task_types=list(v.get("task_types") or []),
285
+ uses=_safe_int(v.get("uses"), 0),
286
+ successes=_safe_int(v.get("successes"), 0),
287
+ failures=_safe_int(v.get("failures"), 0),
288
+ )
289
+ )
290
+ return specs