lm-deluge 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (108) hide show
  1. lm_deluge/__init__.py +1 -2
  2. lm_deluge/api_requests/anthropic.py +117 -22
  3. lm_deluge/api_requests/base.py +84 -11
  4. lm_deluge/api_requests/bedrock.py +30 -6
  5. lm_deluge/api_requests/chat_reasoning.py +4 -0
  6. lm_deluge/api_requests/gemini.py +166 -20
  7. lm_deluge/api_requests/openai.py +145 -25
  8. lm_deluge/batches.py +15 -45
  9. lm_deluge/client.py +309 -50
  10. lm_deluge/config.py +15 -3
  11. lm_deluge/models/__init__.py +14 -1
  12. lm_deluge/models/anthropic.py +29 -14
  13. lm_deluge/models/arcee.py +16 -0
  14. lm_deluge/models/deepseek.py +36 -4
  15. lm_deluge/models/google.py +42 -0
  16. lm_deluge/models/grok.py +24 -0
  17. lm_deluge/models/kimi.py +36 -0
  18. lm_deluge/models/minimax.py +18 -0
  19. lm_deluge/models/openai.py +100 -0
  20. lm_deluge/models/openrouter.py +133 -7
  21. lm_deluge/models/together.py +11 -0
  22. lm_deluge/models/zai.py +50 -0
  23. lm_deluge/pipelines/gepa/__init__.py +95 -0
  24. lm_deluge/pipelines/gepa/core.py +354 -0
  25. lm_deluge/pipelines/gepa/docs/samples.py +705 -0
  26. lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
  27. lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
  28. lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
  29. lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
  30. lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
  31. lm_deluge/pipelines/gepa/optimizer.py +435 -0
  32. lm_deluge/pipelines/gepa/proposer.py +235 -0
  33. lm_deluge/pipelines/gepa/util.py +165 -0
  34. lm_deluge/{llm_tools → pipelines}/score.py +2 -2
  35. lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
  36. lm_deluge/prompt.py +537 -88
  37. lm_deluge/request_context.py +7 -2
  38. lm_deluge/server/__init__.py +24 -0
  39. lm_deluge/server/__main__.py +144 -0
  40. lm_deluge/server/adapters.py +369 -0
  41. lm_deluge/server/app.py +388 -0
  42. lm_deluge/server/auth.py +71 -0
  43. lm_deluge/server/model_policy.py +215 -0
  44. lm_deluge/server/models_anthropic.py +172 -0
  45. lm_deluge/server/models_openai.py +175 -0
  46. lm_deluge/tool/__init__.py +1130 -0
  47. lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
  48. lm_deluge/tool/builtin/anthropic/bash.py +0 -0
  49. lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
  50. lm_deluge/tool/builtin/gemini.py +59 -0
  51. lm_deluge/tool/builtin/openai.py +74 -0
  52. lm_deluge/tool/cua/__init__.py +173 -0
  53. lm_deluge/tool/cua/actions.py +148 -0
  54. lm_deluge/tool/cua/base.py +27 -0
  55. lm_deluge/tool/cua/batch.py +215 -0
  56. lm_deluge/tool/cua/converters.py +466 -0
  57. lm_deluge/tool/cua/kernel.py +702 -0
  58. lm_deluge/tool/cua/trycua.py +989 -0
  59. lm_deluge/tool/prefab/__init__.py +45 -0
  60. lm_deluge/tool/prefab/batch_tool.py +156 -0
  61. lm_deluge/tool/prefab/docs.py +1119 -0
  62. lm_deluge/tool/prefab/email.py +294 -0
  63. lm_deluge/tool/prefab/filesystem.py +1711 -0
  64. lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
  65. lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
  66. lm_deluge/tool/prefab/memory.py +458 -0
  67. lm_deluge/tool/prefab/otc/__init__.py +165 -0
  68. lm_deluge/tool/prefab/otc/executor.py +281 -0
  69. lm_deluge/tool/prefab/otc/parse.py +188 -0
  70. lm_deluge/tool/prefab/random.py +212 -0
  71. lm_deluge/tool/prefab/rlm/__init__.py +296 -0
  72. lm_deluge/tool/prefab/rlm/executor.py +349 -0
  73. lm_deluge/tool/prefab/rlm/parse.py +144 -0
  74. lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
  75. lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
  76. lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
  77. lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
  78. lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
  79. lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
  80. lm_deluge/tool/prefab/sheets.py +385 -0
  81. lm_deluge/tool/prefab/skills.py +0 -0
  82. lm_deluge/tool/prefab/subagents.py +233 -0
  83. lm_deluge/tool/prefab/todos.py +342 -0
  84. lm_deluge/tool/prefab/tool_search.py +169 -0
  85. lm_deluge/tool/prefab/web_search.py +199 -0
  86. lm_deluge/tracker.py +16 -13
  87. lm_deluge/util/schema.py +412 -0
  88. lm_deluge/warnings.py +8 -0
  89. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +23 -9
  90. lm_deluge-0.0.90.dist-info/RECORD +132 -0
  91. lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
  92. lm_deluge/built_in_tools/openai.py +0 -28
  93. lm_deluge/presets/cerebras.py +0 -17
  94. lm_deluge/presets/meta.py +0 -13
  95. lm_deluge/tool.py +0 -849
  96. lm_deluge-0.0.67.dist-info/RECORD +0 -72
  97. lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
  98. /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
  99. /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
  100. /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
  101. /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
  102. /lm_deluge/{built_in_tools/anthropic/bash.py → skills/anthropic.py} +0 -0
  103. /lm_deluge/{built_in_tools/anthropic/computer_use.py → skills/compat.py} +0 -0
  104. /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
  105. /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
  106. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
  107. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
  108. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,50 @@
1
+ ZAI_MODELS = {
2
+ "glm-4.7": {
3
+ "id": "glm-4.7",
4
+ "name": "glm-4.7",
5
+ "api_base": "https://api.z.ai/api/anthropic/v1",
6
+ "api_key_env_var": "ZAI_API_KEY",
7
+ "supports_json": True,
8
+ "api_spec": "anthropic",
9
+ "input_cost": 0.6,
10
+ "cached_input_cost": 0.6,
11
+ "cache_write_cost": 0.6,
12
+ "output_cost": 2.20,
13
+ },
14
+ "glm-4.6": {
15
+ "id": "glm-4.6",
16
+ "name": "glm-4.6",
17
+ "api_base": "https://api.z.ai/api/anthropic/v1",
18
+ "api_key_env_var": "ZAI_API_KEY",
19
+ "supports_json": True,
20
+ "api_spec": "anthropic",
21
+ "input_cost": 0.6,
22
+ "cached_input_cost": 0.6,
23
+ "cache_write_cost": 0.6,
24
+ "output_cost": 2.20,
25
+ },
26
+ "glm-4.5": {
27
+ "id": "glm-4.5",
28
+ "name": "glm-4.5",
29
+ "api_base": "https://api.z.ai/api/anthropic/v1",
30
+ "api_key_env_var": "ZAI_API_KEY",
31
+ "supports_json": True,
32
+ "api_spec": "anthropic",
33
+ "input_cost": 0.6,
34
+ "cached_input_cost": 0.6,
35
+ "cache_write_cost": 0.6,
36
+ "output_cost": 2.20,
37
+ },
38
+ "glm-4.5-air": {
39
+ "id": "glm-4.5-air",
40
+ "name": "glm-4.5-air",
41
+ "api_base": "https://api.z.ai/api/anthropic/v1",
42
+ "api_key_env_var": "ZAI_API_KEY",
43
+ "supports_json": True,
44
+ "api_spec": "anthropic",
45
+ "input_cost": 0.6,
46
+ "cached_input_cost": 0.6,
47
+ "cache_write_cost": 0.6,
48
+ "output_cost": 2.20,
49
+ },
50
+ }
@@ -0,0 +1,95 @@
1
+ """
2
+ GEPA (Genetic Pareto) prompt optimizer for lm-deluge.
3
+
4
+ This module provides an evolutionary optimizer for text components in AI systems.
5
+ It analyzes whole trajectories to propose improvements to prompts, tool descriptions,
6
+ and other text-based configuration.
7
+
8
+ Example usage:
9
+ from lm_deluge import LLMClient
10
+ from lm_deluge.prompt import Conversation, Message
11
+ from lm_deluge.pipelines.gepa import Component, EvalResult, optimize
12
+
13
+ # Define components to optimize
14
+ components = {
15
+ "system_prompt": Component(
16
+ description="Instructions given to the model",
17
+ value="You are a helpful assistant.",
18
+ ),
19
+ }
20
+
21
+ # Define how to evaluate one example
22
+ def evaluate(client: LLMClient, values: dict[str, str], example: dict) -> EvalResult:
23
+ # Build prompt with current component values
24
+ conv = Conversation.system(values["system_prompt"])
25
+ conv = conv.add(Message.user(example["question"]))
26
+
27
+ # Run inference
28
+ response = client.process_prompts_sync([conv], show_progress=False)[0]
29
+ answer = response.completion
30
+
31
+ # Score the result
32
+ correct = example["answer"].lower() in answer.lower()
33
+ score = 1.0 if correct else 0.0
34
+
35
+ # Build feedback for the proposer
36
+ feedback = f"Score: {score}. Expected: {example['answer']}"
37
+
38
+ # Return full trajectory
39
+ full_conv = conv.add(Message.ai(answer))
40
+ return EvalResult(conversation=full_conv, score=score, feedback=feedback)
41
+
42
+ # Run optimization
43
+ result = optimize(
44
+ components=components,
45
+ evaluate_fn=evaluate,
46
+ dataset=train_examples,
47
+ task_client=LLMClient("gpt-4o-mini"),
48
+ proposer_client=LLMClient("gpt-4o"),
49
+ max_iterations=50,
50
+ )
51
+
52
+ print(f"Best score: {result.best_score}")
53
+ print(f"Best prompt: {result.best_candidate['system_prompt']}")
54
+ """
55
+
56
+ from lm_deluge.pipelines.gepa.core import (
57
+ Component,
58
+ EvalResult,
59
+ GEPAResult,
60
+ GEPAState,
61
+ Proposal,
62
+ )
63
+ from lm_deluge.pipelines.gepa.optimizer import GEPAEngine, optimize
64
+ from lm_deluge.pipelines.gepa.proposer import (
65
+ DEFAULT_PROPOSAL_PROMPT,
66
+ build_proposal_prompt,
67
+ parse_proposal_response,
68
+ propose_improvement_sync,
69
+ )
70
+ from lm_deluge.pipelines.gepa.util import (
71
+ extract_text_from_response,
72
+ format_components_for_prompt,
73
+ format_conversation_compact,
74
+ )
75
+
76
+ __all__ = [
77
+ # Core types
78
+ "Component",
79
+ "EvalResult",
80
+ "Proposal",
81
+ "GEPAState",
82
+ "GEPAResult",
83
+ # Main API
84
+ "optimize",
85
+ "GEPAEngine",
86
+ # Proposer utilities
87
+ "DEFAULT_PROPOSAL_PROMPT",
88
+ "build_proposal_prompt",
89
+ "parse_proposal_response",
90
+ "propose_improvement_sync",
91
+ # Formatting utilities
92
+ "format_conversation_compact",
93
+ "format_components_for_prompt",
94
+ "extract_text_from_response",
95
+ ]
@@ -0,0 +1,354 @@
1
+ """
2
+ Core types for GEPA optimization.
3
+
4
+ This module defines the fundamental data structures used throughout the optimizer.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import pickle
11
+ from dataclasses import dataclass, field
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from lm_deluge.prompt import Conversation
16
+
17
+
18
+ @dataclass
19
+ class Component:
20
+ """
21
+ A text component to optimize.
22
+
23
+ Attributes:
24
+ description: What this component does, shown to the proposer LLM
25
+ (e.g., "System prompt given to the agent at conversation start")
26
+ value: The current text value
27
+ """
28
+
29
+ description: str
30
+ value: str
31
+
32
+
33
+ @dataclass
34
+ class EvalResult:
35
+ """
36
+ Result of evaluating one example.
37
+
38
+ Attributes:
39
+ conversation: The full trajectory (what actually happened)
40
+ score: Numeric score, higher is better
41
+ feedback: Explanation of the result (shown to proposer)
42
+ """
43
+
44
+ conversation: Conversation
45
+ score: float
46
+ feedback: str
47
+
48
+
49
+ @dataclass
50
+ class Proposal:
51
+ """
52
+ A proposed change to one component.
53
+
54
+ Attributes:
55
+ component_name: Which component to change
56
+ new_value: The proposed new text
57
+ reasoning: Why the proposer thinks this will help
58
+ """
59
+
60
+ component_name: str
61
+ new_value: str
62
+ reasoning: str
63
+
64
+
65
+ @dataclass
66
+ class GEPAState:
67
+ """
68
+ Mutable optimization state.
69
+
70
+ Tracks all candidates, their scores, and the Pareto frontier.
71
+ """
72
+
73
+ # Component info (fixed after init)
74
+ component_names: list[str] = field(default_factory=list)
75
+ component_descriptions: dict[str, str] = field(default_factory=dict)
76
+
77
+ # Candidates: each is a dict mapping component_name -> text
78
+ candidates: list[dict[str, str]] = field(default_factory=list)
79
+ candidate_parents: list[int | None] = field(default_factory=list)
80
+
81
+ # Scores: candidate_scores[candidate_idx][example_idx] = score
82
+ candidate_scores: list[dict[int, float]] = field(default_factory=list)
83
+
84
+ # Pareto front tracking
85
+ # pareto_front[example_idx] = set of candidate indices achieving best score
86
+ pareto_front: dict[int, set[int]] = field(default_factory=dict)
87
+ # pareto_scores[example_idx] = best score achieved
88
+ pareto_scores: dict[int, float] = field(default_factory=dict)
89
+
90
+ # Counters
91
+ iteration: int = 0
92
+ total_evals: int = 0
93
+
94
+ @classmethod
95
+ def initialize(
96
+ cls,
97
+ components: dict[str, Component],
98
+ seed_scores: dict[int, float],
99
+ ) -> GEPAState:
100
+ """
101
+ Initialize state with seed candidate and its scores.
102
+
103
+ Args:
104
+ components: The components being optimized
105
+ seed_scores: Scores for seed candidate on each example (example_idx -> score)
106
+ """
107
+ state = cls()
108
+
109
+ # Store component info
110
+ state.component_names = list(components.keys())
111
+ state.component_descriptions = {
112
+ name: comp.description for name, comp in components.items()
113
+ }
114
+
115
+ # Add seed candidate
116
+ seed_values = {name: comp.value for name, comp in components.items()}
117
+ state.candidates = [seed_values]
118
+ state.candidate_parents = [None]
119
+ state.candidate_scores = [dict(seed_scores)]
120
+
121
+ # Initialize Pareto front with seed
122
+ state.pareto_front = {ex_idx: {0} for ex_idx in seed_scores}
123
+ state.pareto_scores = dict(seed_scores)
124
+
125
+ state.total_evals = len(seed_scores)
126
+
127
+ return state
128
+
129
+ def add_candidate(
130
+ self,
131
+ values: dict[str, str],
132
+ parent_idx: int | None,
133
+ scores: dict[int, float],
134
+ ) -> int:
135
+ """
136
+ Add a new candidate to the population.
137
+
138
+ Returns the index of the new candidate.
139
+ """
140
+ new_idx = len(self.candidates)
141
+
142
+ self.candidates.append(dict(values))
143
+ self.candidate_parents.append(parent_idx)
144
+ self.candidate_scores.append(dict(scores))
145
+
146
+ # Update Pareto front
147
+ for ex_idx, score in scores.items():
148
+ self._update_pareto(ex_idx, score, new_idx)
149
+
150
+ return new_idx
151
+
152
+ def _update_pareto(
153
+ self, example_idx: int, score: float, candidate_idx: int
154
+ ) -> None:
155
+ """Update Pareto front for one example."""
156
+ current_best = self.pareto_scores.get(example_idx, float("-inf"))
157
+
158
+ if score > current_best:
159
+ self.pareto_scores[example_idx] = score
160
+ self.pareto_front[example_idx] = {candidate_idx}
161
+ elif score == current_best:
162
+ if example_idx not in self.pareto_front:
163
+ self.pareto_front[example_idx] = set()
164
+ self.pareto_front[example_idx].add(candidate_idx)
165
+
166
+ def get_frontier_candidates(self) -> set[int]:
167
+ """Get all candidate indices that are on the Pareto front for any example."""
168
+ frontier: set[int] = set()
169
+ for candidates in self.pareto_front.values():
170
+ frontier.update(candidates)
171
+ return frontier
172
+
173
+ def best_candidate_idx(self) -> int:
174
+ """Get index of candidate with highest average score."""
175
+ if not self.candidates:
176
+ return 0
177
+
178
+ best_idx = 0
179
+ best_avg = float("-inf")
180
+
181
+ for idx, scores in enumerate(self.candidate_scores):
182
+ if scores:
183
+ avg = sum(scores.values()) / len(scores)
184
+ if avg > best_avg:
185
+ best_avg = avg
186
+ best_idx = idx
187
+
188
+ return best_idx
189
+
190
+ def get_candidate_avg_score(self, idx: int) -> float:
191
+ """Get average score for a candidate."""
192
+ scores = self.candidate_scores[idx]
193
+ if not scores:
194
+ return 0.0
195
+ return sum(scores.values()) / len(scores)
196
+
197
+ def get_improvable_examples(self, perfect_score: float = 1.0) -> list[int]:
198
+ """Get example indices where we haven't achieved perfect score."""
199
+ return [
200
+ ex_idx
201
+ for ex_idx, score in self.pareto_scores.items()
202
+ if score < perfect_score
203
+ ]
204
+
205
+ def save(self, run_dir: str | Path) -> None:
206
+ """Save state to disk."""
207
+ run_dir = Path(run_dir)
208
+ run_dir.mkdir(parents=True, exist_ok=True)
209
+
210
+ # Save full state as pickle
211
+ state_path = run_dir / "gepa_state.pkl"
212
+ with open(state_path, "wb") as f:
213
+ pickle.dump(self.__dict__, f)
214
+
215
+ # Save human-readable summary
216
+ summary = {
217
+ "num_candidates": len(self.candidates),
218
+ "iteration": self.iteration,
219
+ "total_evals": self.total_evals,
220
+ "best_idx": self.best_candidate_idx(),
221
+ "best_score": self.get_candidate_avg_score(self.best_candidate_idx()),
222
+ "components": self.component_names,
223
+ "pareto_size": len(self.get_frontier_candidates()),
224
+ }
225
+ summary_path = run_dir / "gepa_summary.json"
226
+ with open(summary_path, "w") as f:
227
+ json.dump(summary, f, indent=2)
228
+
229
+ @classmethod
230
+ def load(cls, run_dir: str | Path) -> GEPAState:
231
+ """Load state from disk."""
232
+ run_dir = Path(run_dir)
233
+ state_path = run_dir / "gepa_state.pkl"
234
+
235
+ with open(state_path, "rb") as f:
236
+ data = pickle.load(f)
237
+
238
+ state = cls()
239
+ state.__dict__.update(data)
240
+ return state
241
+
242
+
243
+ @dataclass(frozen=True)
244
+ class GEPAResult:
245
+ """
246
+ Immutable snapshot of optimization results.
247
+
248
+ Use this to inspect results after optimization completes.
249
+ """
250
+
251
+ candidates: tuple[dict[str, str], ...]
252
+ candidate_parents: tuple[int | None, ...]
253
+ candidate_avg_scores: tuple[float, ...]
254
+
255
+ best_idx: int
256
+ best_candidate: dict[str, str]
257
+ best_score: float
258
+
259
+ total_evals: int
260
+ iterations: int
261
+
262
+ component_names: tuple[str, ...]
263
+ component_descriptions: dict[str, str]
264
+
265
+ run_dir: str | None = None
266
+
267
+ @classmethod
268
+ def from_state(cls, state: GEPAState, run_dir: str | None = None) -> GEPAResult:
269
+ """Create an immutable result from mutable state."""
270
+ avg_scores = tuple(
271
+ state.get_candidate_avg_score(i) for i in range(len(state.candidates))
272
+ )
273
+ best_idx = state.best_candidate_idx()
274
+
275
+ return cls(
276
+ candidates=tuple(dict(c) for c in state.candidates),
277
+ candidate_parents=tuple(state.candidate_parents),
278
+ candidate_avg_scores=avg_scores,
279
+ best_idx=best_idx,
280
+ best_candidate=dict(state.candidates[best_idx]),
281
+ best_score=avg_scores[best_idx] if avg_scores else 0.0,
282
+ total_evals=state.total_evals,
283
+ iterations=state.iteration,
284
+ component_names=tuple(state.component_names),
285
+ component_descriptions=dict(state.component_descriptions),
286
+ run_dir=run_dir,
287
+ )
288
+
289
+ @property
290
+ def num_candidates(self) -> int:
291
+ return len(self.candidates)
292
+
293
+ def best_k(self, k: int = 5) -> list[tuple[int, dict[str, str], float]]:
294
+ """Get the top k candidates by average score."""
295
+ indexed = [
296
+ (i, self.candidates[i], self.candidate_avg_scores[i])
297
+ for i in range(len(self.candidates))
298
+ ]
299
+ indexed.sort(key=lambda x: x[2], reverse=True)
300
+ return indexed[:k]
301
+
302
+ def lineage(self, idx: int) -> list[int]:
303
+ """Get the ancestry chain for a candidate (oldest first)."""
304
+ chain = [idx]
305
+ while True:
306
+ parent = self.candidate_parents[chain[-1]]
307
+ if parent is None:
308
+ break
309
+ chain.append(parent)
310
+ return list(reversed(chain))
311
+
312
+ def diff(
313
+ self, parent_idx: int, child_idx: int, only_changed: bool = True
314
+ ) -> dict[str, tuple[str, str]]:
315
+ """
316
+ Show differences between two candidates.
317
+
318
+ Returns dict mapping component_name -> (old_value, new_value).
319
+ """
320
+ parent = self.candidates[parent_idx]
321
+ child = self.candidates[child_idx]
322
+
323
+ result = {}
324
+ all_keys = set(parent.keys()) | set(child.keys())
325
+
326
+ for key in all_keys:
327
+ old = parent.get(key, "")
328
+ new = child.get(key, "")
329
+ if not only_changed or old != new:
330
+ result[key] = (old, new)
331
+
332
+ return result
333
+
334
+ def to_dict(self) -> dict[str, Any]:
335
+ """Convert to JSON-serializable dict."""
336
+ return {
337
+ "candidates": list(self.candidates),
338
+ "candidate_parents": list(self.candidate_parents),
339
+ "candidate_avg_scores": list(self.candidate_avg_scores),
340
+ "best_idx": self.best_idx,
341
+ "best_candidate": self.best_candidate,
342
+ "best_score": self.best_score,
343
+ "total_evals": self.total_evals,
344
+ "iterations": self.iterations,
345
+ "component_names": list(self.component_names),
346
+ "run_dir": self.run_dir,
347
+ }
348
+
349
+ def __repr__(self) -> str:
350
+ return (
351
+ f"GEPAResult(candidates={self.num_candidates}, "
352
+ f"best_score={self.best_score:.4f}, "
353
+ f"total_evals={self.total_evals})"
354
+ )