opik-optimizer 2.0.1__py3-none-any.whl → 2.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/__init__.py +12 -0
- opik_optimizer/base_optimizer.py +33 -0
- opik_optimizer/hierarchical_reflective_optimizer/__init__.py +5 -0
- opik_optimizer/hierarchical_reflective_optimizer/hierarchical_reflective_optimizer.py +718 -0
- opik_optimizer/hierarchical_reflective_optimizer/hierarchical_root_cause_analyzer.py +355 -0
- opik_optimizer/hierarchical_reflective_optimizer/prompts.py +91 -0
- opik_optimizer/hierarchical_reflective_optimizer/reporting.py +679 -0
- opik_optimizer/hierarchical_reflective_optimizer/types.py +49 -0
- opik_optimizer/optimization_result.py +227 -6
- opik_optimizer/parameter_optimizer/__init__.py +11 -0
- opik_optimizer/parameter_optimizer/parameter_optimizer.py +382 -0
- opik_optimizer/parameter_optimizer/parameter_search_space.py +125 -0
- opik_optimizer/parameter_optimizer/parameter_spec.py +214 -0
- opik_optimizer/parameter_optimizer/search_space_types.py +24 -0
- opik_optimizer/parameter_optimizer/sensitivity_analysis.py +71 -0
- {opik_optimizer-2.0.1.dist-info → opik_optimizer-2.1.1.dist-info}/METADATA +4 -2
- {opik_optimizer-2.0.1.dist-info → opik_optimizer-2.1.1.dist-info}/RECORD +20 -8
- {opik_optimizer-2.0.1.dist-info → opik_optimizer-2.1.1.dist-info}/WHEEL +0 -0
- {opik_optimizer-2.0.1.dist-info → opik_optimizer-2.1.1.dist-info}/licenses/LICENSE +0 -0
- {opik_optimizer-2.0.1.dist-info → opik_optimizer-2.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,679 @@
|
|
1
|
+
from contextlib import contextmanager
|
2
|
+
from typing import Any
|
3
|
+
from collections.abc import Iterator
|
4
|
+
|
5
|
+
from rich.panel import Panel
|
6
|
+
from rich.text import Text
|
7
|
+
|
8
|
+
from ..optimization_config import chat_prompt
|
9
|
+
from ..reporting_utils import (
|
10
|
+
convert_tqdm_to_rich,
|
11
|
+
display_configuration, # noqa: F401
|
12
|
+
display_header, # noqa: F401
|
13
|
+
display_messages,
|
14
|
+
display_result, # noqa: F401
|
15
|
+
get_console,
|
16
|
+
suppress_opik_logs,
|
17
|
+
)
|
18
|
+
|
19
|
+
PANEL_WIDTH = 90
|
20
|
+
console = get_console()
|
21
|
+
|
22
|
+
|
23
|
+
def display_retry_attempt(
|
24
|
+
attempt: int,
|
25
|
+
max_attempts: int,
|
26
|
+
failure_mode_name: str,
|
27
|
+
verbose: int = 1,
|
28
|
+
) -> None:
|
29
|
+
"""Display retry attempt information."""
|
30
|
+
if verbose >= 1:
|
31
|
+
console.print(
|
32
|
+
Text(
|
33
|
+
f"│ Retry attempt {attempt + 1}/{max_attempts} for failure mode '{failure_mode_name}' (no improvement observed)",
|
34
|
+
style="yellow",
|
35
|
+
)
|
36
|
+
)
|
37
|
+
|
38
|
+
|
39
|
+
@contextmanager
|
40
|
+
def display_round_progress(max_rounds: int, verbose: int = 1) -> Any:
|
41
|
+
"""Context manager to display messages during an evaluation phase."""
|
42
|
+
|
43
|
+
# Create a simple object with a method to set the score
|
44
|
+
class Reporter:
|
45
|
+
def failed_to_generate(self, num_prompts: int, error: str) -> None:
|
46
|
+
if verbose >= 1:
|
47
|
+
console.print(
|
48
|
+
Text(
|
49
|
+
f"│ Failed to generate {num_prompts} candidate prompt{'' if num_prompts == 1 else 's'}: {error}",
|
50
|
+
style="red",
|
51
|
+
)
|
52
|
+
)
|
53
|
+
console.print(Text("│"))
|
54
|
+
|
55
|
+
def round_start(self, round_number: int) -> None:
|
56
|
+
if verbose >= 1:
|
57
|
+
console.print(
|
58
|
+
Text(
|
59
|
+
f"│ - Starting optimization round {round_number + 1} of {max_rounds}"
|
60
|
+
)
|
61
|
+
)
|
62
|
+
|
63
|
+
def round_end(self, round_number: int, score: float, best_score: float) -> None:
|
64
|
+
if verbose >= 1:
|
65
|
+
console.print(
|
66
|
+
Text(
|
67
|
+
f"│ Completed optimization round {round_number + 1} of {max_rounds}"
|
68
|
+
)
|
69
|
+
)
|
70
|
+
if best_score == 0 and score == 0:
|
71
|
+
console.print(
|
72
|
+
Text(
|
73
|
+
"│ No improvement in this optimization round - score is 0",
|
74
|
+
style="yellow",
|
75
|
+
)
|
76
|
+
)
|
77
|
+
elif best_score == 0:
|
78
|
+
console.print(
|
79
|
+
Text(
|
80
|
+
f"│ Found a new best performing prompt: {score:.4f}",
|
81
|
+
style="green",
|
82
|
+
)
|
83
|
+
)
|
84
|
+
elif score > best_score:
|
85
|
+
perc_change = (score - best_score) / best_score
|
86
|
+
console.print(
|
87
|
+
Text(
|
88
|
+
f"│ Found a new best performing prompt: {score:.4f} ({perc_change:.2%})",
|
89
|
+
style="green",
|
90
|
+
)
|
91
|
+
)
|
92
|
+
elif score <= best_score:
|
93
|
+
console.print(
|
94
|
+
Text(
|
95
|
+
"│ No improvement in this optimization round",
|
96
|
+
style="red",
|
97
|
+
)
|
98
|
+
)
|
99
|
+
|
100
|
+
console.print(Text("│"))
|
101
|
+
|
102
|
+
# Use our log suppression context manager and yield the reporter
|
103
|
+
with suppress_opik_logs():
|
104
|
+
with convert_tqdm_to_rich(verbose=verbose):
|
105
|
+
try:
|
106
|
+
yield Reporter()
|
107
|
+
finally:
|
108
|
+
pass
|
109
|
+
|
110
|
+
|
111
|
+
@contextmanager
|
112
|
+
def display_evaluation(
|
113
|
+
message: str = "First we will establish the baseline performance:",
|
114
|
+
verbose: int = 1,
|
115
|
+
indent: str = "> ",
|
116
|
+
baseline_score: float | None = None,
|
117
|
+
) -> Any:
|
118
|
+
"""Context manager to display messages during an evaluation phase.
|
119
|
+
|
120
|
+
Args:
|
121
|
+
message: Message to display
|
122
|
+
verbose: Verbosity level
|
123
|
+
indent: Prefix for the message (default "> " for top-level, "│ " for nested)
|
124
|
+
baseline_score: If provided, shows score comparison instead of "Baseline score"
|
125
|
+
"""
|
126
|
+
# Entry point
|
127
|
+
if verbose >= 1:
|
128
|
+
console.print(Text(f"{indent}{message}"))
|
129
|
+
|
130
|
+
# Create a simple object with a method to set the score
|
131
|
+
class Reporter:
|
132
|
+
def set_score(self, s: float) -> None:
|
133
|
+
if verbose >= 1:
|
134
|
+
# Adjust score indentation based on indent style
|
135
|
+
score_indent = " " if indent == "> " else "│ "
|
136
|
+
|
137
|
+
if baseline_score is None:
|
138
|
+
# This is the baseline evaluation
|
139
|
+
console.print(
|
140
|
+
Text(
|
141
|
+
f"\r{score_indent}Baseline score was: {s:.4f}.",
|
142
|
+
style="green",
|
143
|
+
)
|
144
|
+
)
|
145
|
+
console.print(Text("│"))
|
146
|
+
else:
|
147
|
+
# This is an improved prompt evaluation - show comparison
|
148
|
+
if s > baseline_score:
|
149
|
+
improvement_pct = (
|
150
|
+
((s - baseline_score) / baseline_score * 100)
|
151
|
+
if baseline_score > 0
|
152
|
+
else 0
|
153
|
+
)
|
154
|
+
console.print(
|
155
|
+
Text(
|
156
|
+
f"\r{score_indent}Score for updated prompt: {s:.4f} (+{improvement_pct:.1f}%)",
|
157
|
+
style="green bold",
|
158
|
+
)
|
159
|
+
)
|
160
|
+
elif s < baseline_score:
|
161
|
+
decline_pct = (
|
162
|
+
((baseline_score - s) / baseline_score * 100)
|
163
|
+
if baseline_score > 0
|
164
|
+
else 0
|
165
|
+
)
|
166
|
+
console.print(
|
167
|
+
Text(
|
168
|
+
f"\r{score_indent}Score for updated prompt: {s:.4f} (-{decline_pct:.1f}%)",
|
169
|
+
style="red",
|
170
|
+
)
|
171
|
+
)
|
172
|
+
else:
|
173
|
+
console.print(
|
174
|
+
Text(
|
175
|
+
f"\r{score_indent}Score for updated prompt: {s:.4f} (no change)",
|
176
|
+
style="yellow",
|
177
|
+
)
|
178
|
+
)
|
179
|
+
console.print(Text("│"))
|
180
|
+
|
181
|
+
# Use our log suppression context manager and yield the reporter
|
182
|
+
# Adjust progress bar indentation based on indent style
|
183
|
+
progress_indent = " Evaluation" if indent == "> " else "│ Evaluation"
|
184
|
+
with suppress_opik_logs():
|
185
|
+
with convert_tqdm_to_rich(progress_indent, verbose=verbose):
|
186
|
+
try:
|
187
|
+
yield Reporter()
|
188
|
+
finally:
|
189
|
+
pass
|
190
|
+
|
191
|
+
|
192
|
+
def display_optimization_start_message(verbose: int = 1) -> None:
|
193
|
+
if verbose >= 1:
|
194
|
+
console.print(Text("> Starting the optimization run"))
|
195
|
+
console.print(Text("│"))
|
196
|
+
|
197
|
+
|
198
|
+
class CandidateGenerationReporter:
|
199
|
+
def __init__(self, num_prompts: int):
|
200
|
+
self.num_prompts = num_prompts
|
201
|
+
|
202
|
+
def set_generated_prompts(self) -> None:
|
203
|
+
console.print(
|
204
|
+
Text(
|
205
|
+
f"│ Successfully generated {self.num_prompts} candidate prompt{'' if self.num_prompts == 1 else 's'}",
|
206
|
+
style="dim",
|
207
|
+
)
|
208
|
+
)
|
209
|
+
console.print(Text("│"))
|
210
|
+
|
211
|
+
|
212
|
+
def display_tool_description(description: str, label: str, color: str) -> None:
|
213
|
+
if not description.strip():
|
214
|
+
return
|
215
|
+
console.print(
|
216
|
+
Panel(
|
217
|
+
description.strip(),
|
218
|
+
title=label,
|
219
|
+
border_style=color,
|
220
|
+
)
|
221
|
+
)
|
222
|
+
|
223
|
+
|
224
|
+
@contextmanager
|
225
|
+
def display_candidate_generation_report(
|
226
|
+
num_prompts: int, verbose: int = 1
|
227
|
+
) -> Iterator[CandidateGenerationReporter]:
|
228
|
+
if verbose >= 1:
|
229
|
+
console.print(
|
230
|
+
Text(f"│ Generating candidate prompt{'' if num_prompts == 1 else 's'}:")
|
231
|
+
)
|
232
|
+
|
233
|
+
try:
|
234
|
+
yield CandidateGenerationReporter(num_prompts)
|
235
|
+
finally:
|
236
|
+
pass
|
237
|
+
|
238
|
+
|
239
|
+
@contextmanager
|
240
|
+
def display_prompt_candidate_scoring_report(verbose: int = 1) -> Any:
|
241
|
+
"""Context manager to display messages during an evaluation phase."""
|
242
|
+
|
243
|
+
# Create a simple object with a method to set the score
|
244
|
+
class Reporter:
|
245
|
+
def set_generated_prompts(
|
246
|
+
self, candidate_count: int, prompt: chat_prompt.ChatPrompt
|
247
|
+
) -> None:
|
248
|
+
if verbose >= 1:
|
249
|
+
console.print(
|
250
|
+
Text(f"│ Evaluating candidate prompt {candidate_count + 1}:")
|
251
|
+
)
|
252
|
+
display_messages(prompt.get_messages(), "│ ")
|
253
|
+
|
254
|
+
def set_final_score(self, best_score: float, score: float) -> None:
|
255
|
+
if verbose >= 1:
|
256
|
+
if best_score == 0 and score > 0:
|
257
|
+
console.print(
|
258
|
+
Text(
|
259
|
+
f"│ Evaluation score: {score:.4f}",
|
260
|
+
style="green",
|
261
|
+
)
|
262
|
+
)
|
263
|
+
elif best_score == 0 and score == 0:
|
264
|
+
console.print(
|
265
|
+
Text(
|
266
|
+
f"│ Evaluation score: {score:.4f}",
|
267
|
+
style="dim yellow",
|
268
|
+
)
|
269
|
+
)
|
270
|
+
elif score > best_score:
|
271
|
+
perc_change = (score - best_score) / best_score
|
272
|
+
console.print(
|
273
|
+
Text(
|
274
|
+
f"│ Evaluation score: {score:.4f} ({perc_change:.2%})",
|
275
|
+
style="green",
|
276
|
+
)
|
277
|
+
)
|
278
|
+
elif score < best_score:
|
279
|
+
perc_change = (score - best_score) / best_score
|
280
|
+
console.print(
|
281
|
+
Text(
|
282
|
+
f"│ Evaluation score: {score:.4f} ({perc_change:.2%})",
|
283
|
+
style="red",
|
284
|
+
)
|
285
|
+
)
|
286
|
+
else:
|
287
|
+
console.print(
|
288
|
+
Text(
|
289
|
+
f"│ Evaluation score: {score:.4f}",
|
290
|
+
style="dim yellow",
|
291
|
+
)
|
292
|
+
)
|
293
|
+
|
294
|
+
console.print(Text("│ "))
|
295
|
+
console.print(Text("│ "))
|
296
|
+
|
297
|
+
try:
|
298
|
+
with suppress_opik_logs():
|
299
|
+
with convert_tqdm_to_rich("│ Evaluation", verbose=verbose):
|
300
|
+
yield Reporter()
|
301
|
+
finally:
|
302
|
+
pass
|
303
|
+
|
304
|
+
|
305
|
+
@contextmanager
|
306
|
+
def display_optimization_iteration(iteration: int, verbose: int = 1) -> Iterator[Any]:
|
307
|
+
"""Context manager to display progress for a single optimization iteration."""
|
308
|
+
if verbose >= 1:
|
309
|
+
console.print()
|
310
|
+
console.print(Text("│"))
|
311
|
+
console.print(Text(f"│ Iteration {iteration}", style="bold cyan"))
|
312
|
+
|
313
|
+
class Reporter:
|
314
|
+
def iteration_complete(self, best_score: float, improved: bool) -> None:
|
315
|
+
if verbose >= 1:
|
316
|
+
if improved:
|
317
|
+
console.print(
|
318
|
+
Text(
|
319
|
+
f"│ Iteration {iteration} complete - New best score: {best_score:.4f}",
|
320
|
+
style="green",
|
321
|
+
)
|
322
|
+
)
|
323
|
+
else:
|
324
|
+
console.print(
|
325
|
+
Text(
|
326
|
+
f"│ Iteration {iteration} complete - No improvement (best: {best_score:.4f})",
|
327
|
+
style="yellow",
|
328
|
+
)
|
329
|
+
)
|
330
|
+
console.print(Text("│"))
|
331
|
+
|
332
|
+
try:
|
333
|
+
yield Reporter()
|
334
|
+
finally:
|
335
|
+
pass
|
336
|
+
|
337
|
+
|
338
|
+
@contextmanager
|
339
|
+
def display_root_cause_analysis(verbose: int = 1) -> Iterator[Any]:
|
340
|
+
"""Context manager to display progress during root cause analysis with batch tracking."""
|
341
|
+
if verbose >= 1:
|
342
|
+
console.print(Text("│ "))
|
343
|
+
console.print(
|
344
|
+
Text("│ Analyzing root cause of failed evaluation items", style="cyan")
|
345
|
+
)
|
346
|
+
|
347
|
+
class Reporter:
|
348
|
+
def set_completed(self, total_test_cases: int, num_batches: int) -> None:
|
349
|
+
if verbose >= 1:
|
350
|
+
console.print(
|
351
|
+
Text(
|
352
|
+
f"│ Analyzed {total_test_cases} test cases across {num_batches} batches",
|
353
|
+
style="green",
|
354
|
+
)
|
355
|
+
)
|
356
|
+
console.print(Text("│ "))
|
357
|
+
|
358
|
+
try:
|
359
|
+
with suppress_opik_logs():
|
360
|
+
with convert_tqdm_to_rich("│ Batch analysis", verbose=verbose):
|
361
|
+
yield Reporter()
|
362
|
+
finally:
|
363
|
+
pass
|
364
|
+
|
365
|
+
|
366
|
+
@contextmanager
|
367
|
+
def display_batch_synthesis(num_batches: int, verbose: int = 1) -> Iterator[Any]:
|
368
|
+
"""Context manager to display message during batch synthesis."""
|
369
|
+
if verbose >= 1:
|
370
|
+
console.print(Text("│ Synthesizing failure modes", style="cyan"))
|
371
|
+
|
372
|
+
class Reporter:
|
373
|
+
def set_completed(self, num_unified_modes: int) -> None:
|
374
|
+
# No completion message needed - failure modes will be displayed next
|
375
|
+
pass
|
376
|
+
|
377
|
+
with suppress_opik_logs():
|
378
|
+
yield Reporter()
|
379
|
+
|
380
|
+
|
381
|
+
def display_hierarchical_synthesis(
|
382
|
+
total_test_cases: int, num_batches: int, synthesis_notes: str, verbose: int = 1
|
383
|
+
) -> None:
|
384
|
+
"""Display hierarchical analysis synthesis information in a box."""
|
385
|
+
if verbose < 1:
|
386
|
+
return
|
387
|
+
|
388
|
+
synthesis_content = Text()
|
389
|
+
synthesis_content.append(
|
390
|
+
f"Analyzed {total_test_cases} test cases across {num_batches} batches\n\n",
|
391
|
+
style="bold",
|
392
|
+
)
|
393
|
+
synthesis_content.append("Synthesis Notes:\n", style="cyan")
|
394
|
+
synthesis_content.append(synthesis_notes)
|
395
|
+
|
396
|
+
panel = Panel(
|
397
|
+
synthesis_content,
|
398
|
+
title="🔍 Hierarchical Root Cause Analysis",
|
399
|
+
title_align="left",
|
400
|
+
border_style="cyan",
|
401
|
+
width=PANEL_WIDTH,
|
402
|
+
)
|
403
|
+
|
404
|
+
# Capture the panel as rendered text with ANSI styles and prefix each line
|
405
|
+
with console.capture() as capture:
|
406
|
+
console.print(panel)
|
407
|
+
|
408
|
+
rendered_panel = capture.get()
|
409
|
+
for line in rendered_panel.splitlines():
|
410
|
+
console.print(Text("│ ") + Text.from_ansi(line))
|
411
|
+
|
412
|
+
console.print()
|
413
|
+
|
414
|
+
|
415
|
+
def display_failure_modes(failure_modes: list[Any], verbose: int = 1) -> None:
|
416
|
+
"""Display identified failure modes in formatted panels."""
|
417
|
+
if verbose < 1:
|
418
|
+
return
|
419
|
+
|
420
|
+
# Display header panel
|
421
|
+
header_panel = Panel(
|
422
|
+
Text(
|
423
|
+
f"Found {len(failure_modes)} distinct failure pattern{'s' if len(failure_modes) != 1 else ''}",
|
424
|
+
style="bold yellow",
|
425
|
+
),
|
426
|
+
title="⚠️ IDENTIFIED FAILURE MODES",
|
427
|
+
title_align="left",
|
428
|
+
border_style="yellow",
|
429
|
+
width=PANEL_WIDTH,
|
430
|
+
)
|
431
|
+
|
432
|
+
with console.capture() as capture:
|
433
|
+
console.print(header_panel)
|
434
|
+
|
435
|
+
rendered_header = capture.get()
|
436
|
+
for line in rendered_header.splitlines():
|
437
|
+
console.print(Text("│ ") + Text.from_ansi(line))
|
438
|
+
|
439
|
+
console.print()
|
440
|
+
|
441
|
+
for idx, failure_mode in enumerate(failure_modes, 1):
|
442
|
+
# Create content for this failure mode
|
443
|
+
mode_content = Text()
|
444
|
+
mode_content.append(f"{failure_mode.name}\n\n", style="bold white")
|
445
|
+
mode_content.append("Description:\n", style="cyan")
|
446
|
+
mode_content.append(f"{failure_mode.description}\n\n")
|
447
|
+
mode_content.append("Root Cause:\n", style="cyan")
|
448
|
+
mode_content.append(f"{failure_mode.root_cause}")
|
449
|
+
|
450
|
+
panel = Panel(
|
451
|
+
mode_content,
|
452
|
+
title=f"Failure Mode {idx}",
|
453
|
+
title_align="left",
|
454
|
+
border_style="red" if idx == 1 else "yellow",
|
455
|
+
width=PANEL_WIDTH,
|
456
|
+
)
|
457
|
+
|
458
|
+
# Capture and prefix each line
|
459
|
+
with console.capture() as capture:
|
460
|
+
console.print(panel)
|
461
|
+
|
462
|
+
rendered_panel = capture.get()
|
463
|
+
for line in rendered_panel.splitlines():
|
464
|
+
console.print(Text("│ ") + Text.from_ansi(line))
|
465
|
+
|
466
|
+
if idx < len(failure_modes):
|
467
|
+
console.print("│")
|
468
|
+
|
469
|
+
|
470
|
+
@contextmanager
|
471
|
+
def display_prompt_improvement(
|
472
|
+
failure_mode_name: str, verbose: int = 1
|
473
|
+
) -> Iterator[Any]:
|
474
|
+
"""Context manager to display progress while generating improved prompt."""
|
475
|
+
if verbose >= 1:
|
476
|
+
console.print()
|
477
|
+
console.print(Text("│ "))
|
478
|
+
console.print(Text(f"│ Addressing: {failure_mode_name}", style="bold cyan"))
|
479
|
+
|
480
|
+
class Reporter:
|
481
|
+
def set_reasoning(self, reasoning: str) -> None:
|
482
|
+
if verbose >= 1:
|
483
|
+
reasoning_content = Text()
|
484
|
+
reasoning_content.append("Improvement Strategy:\n", style="cyan")
|
485
|
+
reasoning_content.append(reasoning)
|
486
|
+
|
487
|
+
panel = Panel(
|
488
|
+
reasoning_content,
|
489
|
+
title="💡 Reasoning",
|
490
|
+
title_align="left",
|
491
|
+
border_style="blue",
|
492
|
+
width=PANEL_WIDTH - 10,
|
493
|
+
padding=(0, 1),
|
494
|
+
)
|
495
|
+
|
496
|
+
# Capture and prefix each line
|
497
|
+
with console.capture() as capture:
|
498
|
+
console.print(panel)
|
499
|
+
|
500
|
+
rendered_panel = capture.get()
|
501
|
+
for line in rendered_panel.splitlines():
|
502
|
+
console.print(Text("│ ") + Text.from_ansi(line))
|
503
|
+
|
504
|
+
console.print(Text("│ "))
|
505
|
+
|
506
|
+
try:
|
507
|
+
with suppress_opik_logs():
|
508
|
+
with convert_tqdm_to_rich(
|
509
|
+
"│ Generating improved prompt", verbose=verbose
|
510
|
+
):
|
511
|
+
yield Reporter()
|
512
|
+
finally:
|
513
|
+
pass
|
514
|
+
|
515
|
+
|
516
|
+
def display_improvement_reasoning(
|
517
|
+
failure_mode_name: str, reasoning: str, verbose: int = 1
|
518
|
+
) -> None:
|
519
|
+
"""Display prompt improvement reasoning for a specific failure mode."""
|
520
|
+
if verbose < 1:
|
521
|
+
return
|
522
|
+
|
523
|
+
console.print()
|
524
|
+
console.print(Text("│ "))
|
525
|
+
console.print(Text(f"│ Addressing: {failure_mode_name}", style="bold cyan"))
|
526
|
+
|
527
|
+
reasoning_content = Text()
|
528
|
+
reasoning_content.append("Improvement Strategy:\n", style="cyan")
|
529
|
+
reasoning_content.append(reasoning)
|
530
|
+
|
531
|
+
panel = Panel(
|
532
|
+
reasoning_content,
|
533
|
+
title="💡 Reasoning",
|
534
|
+
title_align="left",
|
535
|
+
border_style="blue",
|
536
|
+
width=PANEL_WIDTH - 10,
|
537
|
+
padding=(0, 1),
|
538
|
+
)
|
539
|
+
|
540
|
+
# Capture and prefix each line
|
541
|
+
with console.capture() as capture:
|
542
|
+
console.print(panel)
|
543
|
+
|
544
|
+
rendered_panel = capture.get()
|
545
|
+
for line in rendered_panel.splitlines():
|
546
|
+
console.print(Text("│ ") + Text.from_ansi(line))
|
547
|
+
|
548
|
+
console.print(Text("│ "))
|
549
|
+
|
550
|
+
|
551
|
+
def display_iteration_improvement(
|
552
|
+
improvement: float, current_score: float, best_score: float, verbose: int = 1
|
553
|
+
) -> None:
|
554
|
+
"""Display the improvement result for a failure mode iteration."""
|
555
|
+
if verbose < 1:
|
556
|
+
return
|
557
|
+
|
558
|
+
if improvement > 0:
|
559
|
+
console.print(
|
560
|
+
Text(
|
561
|
+
f"│ ✓ Improvement: {improvement:.2%} (from {best_score:.4f} to {current_score:.4f})",
|
562
|
+
style="green bold",
|
563
|
+
)
|
564
|
+
)
|
565
|
+
else:
|
566
|
+
console.print(
|
567
|
+
Text(
|
568
|
+
f"│ ✗ No improvement: {improvement:.2%} (score: {current_score:.4f}, best: {best_score:.4f})",
|
569
|
+
style="yellow",
|
570
|
+
)
|
571
|
+
)
|
572
|
+
|
573
|
+
|
574
|
+
def display_optimized_prompt_diff(
|
575
|
+
initial_messages: list[dict[str, str]],
|
576
|
+
optimized_messages: list[dict[str, str]],
|
577
|
+
initial_score: float,
|
578
|
+
best_score: float,
|
579
|
+
verbose: int = 1,
|
580
|
+
) -> None:
|
581
|
+
"""Display git-style diff of prompt changes."""
|
582
|
+
import difflib
|
583
|
+
|
584
|
+
if verbose < 1:
|
585
|
+
return
|
586
|
+
|
587
|
+
console.print()
|
588
|
+
console.print(Text("│"))
|
589
|
+
console.print(Text("│ > Optimization Results", style="bold green"))
|
590
|
+
console.print(Text("│"))
|
591
|
+
|
592
|
+
# Show score improvement
|
593
|
+
if best_score > initial_score:
|
594
|
+
perc_change = (best_score - initial_score) / initial_score
|
595
|
+
console.print(
|
596
|
+
Text(
|
597
|
+
f"│ Prompt improved from {initial_score:.4f} to {best_score:.4f} ({perc_change:.2%})",
|
598
|
+
style="green",
|
599
|
+
)
|
600
|
+
)
|
601
|
+
else:
|
602
|
+
console.print(
|
603
|
+
Text(f"│ No improvement found (score: {best_score:.4f})", style="yellow")
|
604
|
+
)
|
605
|
+
|
606
|
+
console.print(Text("│"))
|
607
|
+
console.print(Text("│ Prompt Changes:", style="cyan"))
|
608
|
+
console.print(Text("│"))
|
609
|
+
|
610
|
+
# Compare each message
|
611
|
+
for idx in range(max(len(initial_messages), len(optimized_messages))):
|
612
|
+
initial_msg = initial_messages[idx] if idx < len(initial_messages) else None
|
613
|
+
optimized_msg = (
|
614
|
+
optimized_messages[idx] if idx < len(optimized_messages) else None
|
615
|
+
)
|
616
|
+
|
617
|
+
# Get role from whichever message exists
|
618
|
+
role = "message"
|
619
|
+
if initial_msg:
|
620
|
+
role = initial_msg.get("role", "message")
|
621
|
+
elif optimized_msg:
|
622
|
+
role = optimized_msg.get("role", "message")
|
623
|
+
|
624
|
+
initial_content = initial_msg.get("content", "") if initial_msg else ""
|
625
|
+
optimized_content = optimized_msg.get("content", "") if optimized_msg else ""
|
626
|
+
|
627
|
+
# Handle added messages
|
628
|
+
if not initial_msg:
|
629
|
+
console.print(Text(f"│ {role}: (added)", style="green bold"))
|
630
|
+
for line in optimized_content.splitlines():
|
631
|
+
console.print(Text(f"│ +{line}", style="green"))
|
632
|
+
console.print(Text("│"))
|
633
|
+
continue
|
634
|
+
|
635
|
+
# Handle removed messages
|
636
|
+
if not optimized_msg:
|
637
|
+
console.print(Text(f"│ {role}: (removed)", style="red bold"))
|
638
|
+
for line in initial_content.splitlines():
|
639
|
+
console.print(Text(f"│ -{line}", style="red"))
|
640
|
+
console.print(Text("│"))
|
641
|
+
continue
|
642
|
+
|
643
|
+
# Check if there are changes
|
644
|
+
if initial_content == optimized_content:
|
645
|
+
# No changes in this message
|
646
|
+
console.print(Text(f"│ {role}: (unchanged)", style="dim"))
|
647
|
+
continue
|
648
|
+
|
649
|
+
# Generate unified diff
|
650
|
+
diff_lines = list(
|
651
|
+
difflib.unified_diff(
|
652
|
+
initial_content.splitlines(keepends=False),
|
653
|
+
optimized_content.splitlines(keepends=False),
|
654
|
+
lineterm="",
|
655
|
+
n=3, # 3 lines of context
|
656
|
+
)
|
657
|
+
)
|
658
|
+
|
659
|
+
if not diff_lines:
|
660
|
+
continue
|
661
|
+
|
662
|
+
# Display message header
|
663
|
+
console.print(Text(f"│ {role}:", style="bold cyan"))
|
664
|
+
|
665
|
+
# Create diff content
|
666
|
+
diff_content = Text()
|
667
|
+
for line in diff_lines[3:]: # Skip first 3 lines (---, +++, @@)
|
668
|
+
if line.startswith("+"):
|
669
|
+
diff_content.append("│ " + line + "\n", style="green")
|
670
|
+
elif line.startswith("-"):
|
671
|
+
diff_content.append("│ " + line + "\n", style="red")
|
672
|
+
elif line.startswith("@@"):
|
673
|
+
diff_content.append("│ " + line + "\n", style="cyan dim")
|
674
|
+
else:
|
675
|
+
# Context line
|
676
|
+
diff_content.append("│ " + line + "\n", style="dim")
|
677
|
+
|
678
|
+
console.print(diff_content)
|
679
|
+
console.print(Text("│"))
|