opik-optimizer 2.1.2__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/__init__.py +2 -2
- opik_optimizer/base_optimizer.py +314 -145
- opik_optimizer/evolutionary_optimizer/crossover_ops.py +31 -4
- opik_optimizer/evolutionary_optimizer/evaluation_ops.py +23 -3
- opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +122 -95
- opik_optimizer/evolutionary_optimizer/mcp.py +11 -6
- opik_optimizer/evolutionary_optimizer/mutation_ops.py +25 -5
- opik_optimizer/evolutionary_optimizer/population_ops.py +26 -10
- opik_optimizer/evolutionary_optimizer/reporting.py +5 -5
- opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +53 -99
- opik_optimizer/few_shot_bayesian_optimizer/reporting.py +4 -4
- opik_optimizer/gepa_optimizer/gepa_optimizer.py +183 -172
- opik_optimizer/gepa_optimizer/reporting.py +164 -22
- opik_optimizer/hierarchical_reflective_optimizer/hierarchical_reflective_optimizer.py +221 -245
- opik_optimizer/hierarchical_reflective_optimizer/hierarchical_root_cause_analyzer.py +38 -14
- opik_optimizer/hierarchical_reflective_optimizer/prompts.py +7 -1
- opik_optimizer/hierarchical_reflective_optimizer/reporting.py +287 -132
- opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +185 -205
- opik_optimizer/meta_prompt_optimizer/reporting.py +4 -4
- opik_optimizer/mipro_optimizer/__init__.py +2 -2
- opik_optimizer/mipro_optimizer/_lm.py +4 -4
- opik_optimizer/mipro_optimizer/{_mipro_optimizer_v2.py → mipro_optimizer_v2.py} +1 -7
- opik_optimizer/mipro_optimizer/utils.py +1 -0
- opik_optimizer/multi_metric_objective.py +33 -0
- opik_optimizer/optimizable_agent.py +7 -4
- opik_optimizer/optimization_config/chat_prompt.py +7 -10
- opik_optimizer/parameter_optimizer/parameter_optimizer.py +188 -40
- opik_optimizer/parameter_optimizer/reporting.py +148 -0
- opik_optimizer/reporting_utils.py +42 -15
- opik_optimizer/task_evaluator.py +26 -9
- opik_optimizer/utils/core.py +16 -2
- opik_optimizer/utils/prompt_segments.py +1 -2
- {opik_optimizer-2.1.2.dist-info → opik_optimizer-2.2.0.dist-info}/METADATA +2 -3
- {opik_optimizer-2.1.2.dist-info → opik_optimizer-2.2.0.dist-info}/RECORD +37 -37
- opik_optimizer/evolutionary_optimizer/llm_support.py +0 -136
- opik_optimizer/mipro_optimizer/mipro_optimizer.py +0 -680
- {opik_optimizer-2.1.2.dist-info → opik_optimizer-2.2.0.dist-info}/WHEEL +0 -0
- {opik_optimizer-2.1.2.dist-info → opik_optimizer-2.2.0.dist-info}/licenses/LICENSE +0 -0
- {opik_optimizer-2.1.2.dist-info → opik_optimizer-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
from contextlib import contextmanager
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any, Literal
|
|
3
3
|
from collections.abc import Iterator
|
|
4
|
+
from dataclasses import dataclass
|
|
4
5
|
|
|
5
6
|
from rich.panel import Panel
|
|
6
7
|
from rich.text import Text
|
|
7
8
|
|
|
8
9
|
from ..optimization_config import chat_prompt
|
|
9
|
-
from ..reporting_utils import (
|
|
10
|
+
from ..reporting_utils import ( # noqa: F401
|
|
10
11
|
convert_tqdm_to_rich,
|
|
11
|
-
display_configuration,
|
|
12
|
-
display_header,
|
|
12
|
+
display_configuration,
|
|
13
|
+
display_header,
|
|
13
14
|
display_messages,
|
|
14
|
-
display_result,
|
|
15
|
+
display_result,
|
|
15
16
|
get_console,
|
|
16
17
|
suppress_opik_logs,
|
|
17
18
|
)
|
|
@@ -20,6 +21,97 @@ PANEL_WIDTH = 90
|
|
|
20
21
|
console = get_console()
|
|
21
22
|
|
|
22
23
|
|
|
24
|
+
@dataclass
|
|
25
|
+
class MessageDiffItem:
|
|
26
|
+
"""Represents a single message's diff information."""
|
|
27
|
+
|
|
28
|
+
role: str
|
|
29
|
+
change_type: Literal["added", "removed", "unchanged", "changed"]
|
|
30
|
+
initial_content: str | None
|
|
31
|
+
optimized_content: str | None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def compute_message_diff_order(
|
|
35
|
+
initial_messages: list[dict[str, str]],
|
|
36
|
+
optimized_messages: list[dict[str, str]],
|
|
37
|
+
) -> list[MessageDiffItem]:
|
|
38
|
+
"""
|
|
39
|
+
Compute the diff between initial and optimized messages, returning them in optimized message order.
|
|
40
|
+
|
|
41
|
+
This function groups messages by role and compares them to determine what changed.
|
|
42
|
+
The returned list maintains the order of roles as they appear in the optimized messages.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
initial_messages: List of initial message dictionaries with 'role' and 'content' keys
|
|
46
|
+
optimized_messages: List of optimized message dictionaries with 'role' and 'content' keys
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
List of MessageDiffItem objects in the order roles appear in optimized_messages,
|
|
50
|
+
followed by any removed roles that only existed in initial_messages.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def group_by_role(
|
|
54
|
+
messages: list[dict[str, str]],
|
|
55
|
+
) -> dict[str, list[tuple[int, str]]]:
|
|
56
|
+
"""Group messages by role, storing (index, content) tuples."""
|
|
57
|
+
groups: dict[str, list[tuple[int, str]]] = {}
|
|
58
|
+
for idx, msg in enumerate(messages):
|
|
59
|
+
role = msg.get("role", "message")
|
|
60
|
+
content = msg.get("content", "")
|
|
61
|
+
if role not in groups:
|
|
62
|
+
groups[role] = []
|
|
63
|
+
groups[role].append((idx, content))
|
|
64
|
+
return groups
|
|
65
|
+
|
|
66
|
+
initial_by_role = group_by_role(initial_messages)
|
|
67
|
+
optimized_by_role = group_by_role(optimized_messages)
|
|
68
|
+
|
|
69
|
+
# Get all unique roles maintaining order from optimized messages
|
|
70
|
+
all_roles = []
|
|
71
|
+
seen_roles = set()
|
|
72
|
+
for msg in optimized_messages:
|
|
73
|
+
role = msg.get("role", "message")
|
|
74
|
+
if role not in seen_roles:
|
|
75
|
+
all_roles.append(role)
|
|
76
|
+
seen_roles.add(role)
|
|
77
|
+
# Add any roles that were in initial but not in optimized (removed roles)
|
|
78
|
+
for msg in initial_messages:
|
|
79
|
+
role = msg.get("role", "message")
|
|
80
|
+
if role not in seen_roles:
|
|
81
|
+
all_roles.append(role)
|
|
82
|
+
seen_roles.add(role)
|
|
83
|
+
|
|
84
|
+
# Build diff items for each role
|
|
85
|
+
diff_items: list[MessageDiffItem] = []
|
|
86
|
+
for role in all_roles:
|
|
87
|
+
initial_content = (
|
|
88
|
+
initial_by_role[role][0][1] if role in initial_by_role else None
|
|
89
|
+
)
|
|
90
|
+
optimized_content = (
|
|
91
|
+
optimized_by_role[role][0][1] if role in optimized_by_role else None
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
if initial_content is None and optimized_content is not None:
|
|
95
|
+
change_type: Literal["added", "removed", "unchanged", "changed"] = "added"
|
|
96
|
+
elif initial_content is not None and optimized_content is None:
|
|
97
|
+
change_type = "removed"
|
|
98
|
+
elif initial_content == optimized_content:
|
|
99
|
+
change_type = "unchanged"
|
|
100
|
+
else:
|
|
101
|
+
change_type = "changed"
|
|
102
|
+
|
|
103
|
+
diff_items.append(
|
|
104
|
+
MessageDiffItem(
|
|
105
|
+
role=role,
|
|
106
|
+
change_type=change_type,
|
|
107
|
+
initial_content=initial_content,
|
|
108
|
+
optimized_content=optimized_content,
|
|
109
|
+
)
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
return diff_items
|
|
113
|
+
|
|
114
|
+
|
|
23
115
|
def display_retry_attempt(
|
|
24
116
|
attempt: int,
|
|
25
117
|
max_attempts: int,
|
|
@@ -29,9 +121,11 @@ def display_retry_attempt(
|
|
|
29
121
|
"""Display retry attempt information."""
|
|
30
122
|
if verbose >= 1:
|
|
31
123
|
console.print(
|
|
32
|
-
Text(
|
|
33
|
-
|
|
34
|
-
|
|
124
|
+
Text("│ ").append(
|
|
125
|
+
Text(
|
|
126
|
+
f"Retry attempt {attempt + 1}/{max_attempts} for failure mode '{failure_mode_name}' (no improvement observed)",
|
|
127
|
+
style="yellow",
|
|
128
|
+
)
|
|
35
129
|
)
|
|
36
130
|
)
|
|
37
131
|
|
|
@@ -132,14 +226,16 @@ def display_evaluation(
|
|
|
132
226
|
def set_score(self, s: float) -> None:
|
|
133
227
|
if verbose >= 1:
|
|
134
228
|
# Adjust score indentation based on indent style
|
|
135
|
-
score_indent = "
|
|
229
|
+
score_indent = "│ " if indent == "> " else "│ "
|
|
136
230
|
|
|
137
231
|
if baseline_score is None:
|
|
138
232
|
# This is the baseline evaluation
|
|
139
233
|
console.print(
|
|
140
|
-
Text(
|
|
141
|
-
|
|
142
|
-
|
|
234
|
+
Text(score_indent).append(
|
|
235
|
+
Text(
|
|
236
|
+
f"Baseline score was: {s:.4f}.",
|
|
237
|
+
style="green",
|
|
238
|
+
)
|
|
143
239
|
)
|
|
144
240
|
)
|
|
145
241
|
console.print(Text("│"))
|
|
@@ -152,9 +248,11 @@ def display_evaluation(
|
|
|
152
248
|
else 0
|
|
153
249
|
)
|
|
154
250
|
console.print(
|
|
155
|
-
Text(
|
|
156
|
-
|
|
157
|
-
|
|
251
|
+
Text(score_indent).append(
|
|
252
|
+
Text(
|
|
253
|
+
f"Score for updated prompt: {s:.4f} (+{improvement_pct:.1f}%)",
|
|
254
|
+
style="green bold",
|
|
255
|
+
)
|
|
158
256
|
)
|
|
159
257
|
)
|
|
160
258
|
elif s < baseline_score:
|
|
@@ -164,23 +262,27 @@ def display_evaluation(
|
|
|
164
262
|
else 0
|
|
165
263
|
)
|
|
166
264
|
console.print(
|
|
167
|
-
Text(
|
|
168
|
-
|
|
169
|
-
|
|
265
|
+
Text(score_indent).append(
|
|
266
|
+
Text(
|
|
267
|
+
f"Score for updated prompt: {s:.4f} (-{decline_pct:.1f}%)",
|
|
268
|
+
style="red",
|
|
269
|
+
)
|
|
170
270
|
)
|
|
171
271
|
)
|
|
172
272
|
else:
|
|
173
273
|
console.print(
|
|
174
|
-
Text(
|
|
175
|
-
|
|
176
|
-
|
|
274
|
+
Text(score_indent).append(
|
|
275
|
+
Text(
|
|
276
|
+
f"Score for updated prompt: {s:.4f} (no change)",
|
|
277
|
+
style="yellow",
|
|
278
|
+
)
|
|
177
279
|
)
|
|
178
280
|
)
|
|
179
281
|
console.print(Text("│"))
|
|
180
282
|
|
|
181
283
|
# Use our log suppression context manager and yield the reporter
|
|
182
284
|
# Adjust progress bar indentation based on indent style
|
|
183
|
-
progress_indent = "
|
|
285
|
+
progress_indent = "│ Evaluation" if indent == "> " else "│ Evaluation"
|
|
184
286
|
with suppress_opik_logs():
|
|
185
287
|
with convert_tqdm_to_rich(progress_indent, verbose=verbose):
|
|
186
288
|
try:
|
|
@@ -306,25 +408,31 @@ def display_prompt_candidate_scoring_report(verbose: int = 1) -> Any:
|
|
|
306
408
|
def display_optimization_iteration(iteration: int, verbose: int = 1) -> Iterator[Any]:
|
|
307
409
|
"""Context manager to display progress for a single optimization iteration."""
|
|
308
410
|
if verbose >= 1:
|
|
309
|
-
console.print()
|
|
310
411
|
console.print(Text("│"))
|
|
311
|
-
console.print(Text(
|
|
412
|
+
console.print(Text("│"))
|
|
413
|
+
console.print(
|
|
414
|
+
Text("│ ").append(Text(f"Iteration {iteration}", style="bold cyan"))
|
|
415
|
+
)
|
|
312
416
|
|
|
313
417
|
class Reporter:
|
|
314
418
|
def iteration_complete(self, best_score: float, improved: bool) -> None:
|
|
315
419
|
if verbose >= 1:
|
|
316
420
|
if improved:
|
|
317
421
|
console.print(
|
|
318
|
-
Text(
|
|
319
|
-
|
|
320
|
-
|
|
422
|
+
Text("│ ").append(
|
|
423
|
+
Text(
|
|
424
|
+
f"Iteration {iteration} complete - New best score: {best_score:.4f}",
|
|
425
|
+
style="green",
|
|
426
|
+
)
|
|
321
427
|
)
|
|
322
428
|
)
|
|
323
429
|
else:
|
|
324
430
|
console.print(
|
|
325
|
-
Text(
|
|
326
|
-
|
|
327
|
-
|
|
431
|
+
Text("│ ").append(
|
|
432
|
+
Text(
|
|
433
|
+
f"Iteration {iteration} complete - No improvement (best: {best_score:.4f})",
|
|
434
|
+
style="yellow",
|
|
435
|
+
)
|
|
328
436
|
)
|
|
329
437
|
)
|
|
330
438
|
console.print(Text("│"))
|
|
@@ -341,16 +449,20 @@ def display_root_cause_analysis(verbose: int = 1) -> Iterator[Any]:
|
|
|
341
449
|
if verbose >= 1:
|
|
342
450
|
console.print(Text("│ "))
|
|
343
451
|
console.print(
|
|
344
|
-
Text("│
|
|
452
|
+
Text("│ ").append(
|
|
453
|
+
Text("Analyzing root cause of failed evaluation items", style="cyan")
|
|
454
|
+
)
|
|
345
455
|
)
|
|
346
456
|
|
|
347
457
|
class Reporter:
|
|
348
458
|
def set_completed(self, total_test_cases: int, num_batches: int) -> None:
|
|
349
459
|
if verbose >= 1:
|
|
350
460
|
console.print(
|
|
351
|
-
Text(
|
|
352
|
-
|
|
353
|
-
|
|
461
|
+
Text("│ ").append(
|
|
462
|
+
Text(
|
|
463
|
+
f"Analyzed {total_test_cases} test cases across {num_batches} batches",
|
|
464
|
+
style="green",
|
|
465
|
+
)
|
|
354
466
|
)
|
|
355
467
|
)
|
|
356
468
|
console.print(Text("│ "))
|
|
@@ -367,7 +479,9 @@ def display_root_cause_analysis(verbose: int = 1) -> Iterator[Any]:
|
|
|
367
479
|
def display_batch_synthesis(num_batches: int, verbose: int = 1) -> Iterator[Any]:
|
|
368
480
|
"""Context manager to display message during batch synthesis."""
|
|
369
481
|
if verbose >= 1:
|
|
370
|
-
console.print(
|
|
482
|
+
console.print(
|
|
483
|
+
Text("│ ").append(Text("Synthesizing failure modes", style="cyan"))
|
|
484
|
+
)
|
|
371
485
|
|
|
372
486
|
class Reporter:
|
|
373
487
|
def set_completed(self, num_unified_modes: int) -> None:
|
|
@@ -406,10 +520,13 @@ def display_hierarchical_synthesis(
|
|
|
406
520
|
console.print(panel)
|
|
407
521
|
|
|
408
522
|
rendered_panel = capture.get()
|
|
409
|
-
for line in rendered_panel.splitlines():
|
|
410
|
-
console.print(Text("│ ") + Text.from_ansi(line))
|
|
411
523
|
|
|
412
|
-
|
|
524
|
+
# Prefix each line with '│ ', preserving ANSI styles
|
|
525
|
+
prefixed_output = "\n".join(f"│ {line}" for line in rendered_panel.splitlines())
|
|
526
|
+
|
|
527
|
+
# Print the prefixed output (will include colors)
|
|
528
|
+
console.print(prefixed_output, highlight=False)
|
|
529
|
+
console.print(Text("│"))
|
|
413
530
|
|
|
414
531
|
|
|
415
532
|
def display_failure_modes(failure_modes: list[Any], verbose: int = 1) -> None:
|
|
@@ -433,10 +550,13 @@ def display_failure_modes(failure_modes: list[Any], verbose: int = 1) -> None:
|
|
|
433
550
|
console.print(header_panel)
|
|
434
551
|
|
|
435
552
|
rendered_header = capture.get()
|
|
436
|
-
for line in rendered_header.splitlines():
|
|
437
|
-
console.print(Text("│ ") + Text.from_ansi(line))
|
|
438
553
|
|
|
439
|
-
|
|
554
|
+
# Prefix each line with '│ ', preserving ANSI styles
|
|
555
|
+
prefixed_output = "\n".join(f"│ {line}" for line in rendered_header.splitlines())
|
|
556
|
+
|
|
557
|
+
# Print the prefixed output (will include colors)
|
|
558
|
+
console.print(prefixed_output, highlight=False)
|
|
559
|
+
console.print(Text("│"))
|
|
440
560
|
|
|
441
561
|
for idx, failure_mode in enumerate(failure_modes, 1):
|
|
442
562
|
# Create content for this failure mode
|
|
@@ -460,8 +580,14 @@ def display_failure_modes(failure_modes: list[Any], verbose: int = 1) -> None:
|
|
|
460
580
|
console.print(panel)
|
|
461
581
|
|
|
462
582
|
rendered_panel = capture.get()
|
|
463
|
-
|
|
464
|
-
|
|
583
|
+
|
|
584
|
+
# Prefix each line with '│ ', preserving ANSI styles
|
|
585
|
+
prefixed_output = "\n".join(
|
|
586
|
+
f"│ {line}" for line in rendered_panel.splitlines()
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
# Print the prefixed output (will include colors)
|
|
590
|
+
console.print(prefixed_output, highlight=False)
|
|
465
591
|
|
|
466
592
|
if idx < len(failure_modes):
|
|
467
593
|
console.print("│")
|
|
@@ -473,9 +599,13 @@ def display_prompt_improvement(
|
|
|
473
599
|
) -> Iterator[Any]:
|
|
474
600
|
"""Context manager to display progress while generating improved prompt."""
|
|
475
601
|
if verbose >= 1:
|
|
476
|
-
console.print()
|
|
602
|
+
console.print(Text("│"))
|
|
477
603
|
console.print(Text("│ "))
|
|
478
|
-
console.print(
|
|
604
|
+
console.print(
|
|
605
|
+
Text("│ ").append(
|
|
606
|
+
Text(f"Addressing: {failure_mode_name}", style="bold cyan")
|
|
607
|
+
)
|
|
608
|
+
)
|
|
479
609
|
|
|
480
610
|
class Reporter:
|
|
481
611
|
def set_reasoning(self, reasoning: str) -> None:
|
|
@@ -498,9 +628,14 @@ def display_prompt_improvement(
|
|
|
498
628
|
console.print(panel)
|
|
499
629
|
|
|
500
630
|
rendered_panel = capture.get()
|
|
501
|
-
for line in rendered_panel.splitlines():
|
|
502
|
-
console.print(Text("│ ") + Text.from_ansi(line))
|
|
503
631
|
|
|
632
|
+
# Prefix each line with '│ ', preserving ANSI styles
|
|
633
|
+
prefixed_output = "\n".join(
|
|
634
|
+
f"│ {line}" for line in rendered_panel.splitlines()
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
# Print the prefixed output (will include colors)
|
|
638
|
+
console.print(prefixed_output, highlight=False)
|
|
504
639
|
console.print(Text("│ "))
|
|
505
640
|
|
|
506
641
|
try:
|
|
@@ -520,9 +655,11 @@ def display_improvement_reasoning(
|
|
|
520
655
|
if verbose < 1:
|
|
521
656
|
return
|
|
522
657
|
|
|
523
|
-
console.print()
|
|
658
|
+
console.print(Text("│"))
|
|
524
659
|
console.print(Text("│ "))
|
|
525
|
-
console.print(
|
|
660
|
+
console.print(
|
|
661
|
+
Text("│ ").append(Text(f"Addressing: {failure_mode_name}", style="bold cyan"))
|
|
662
|
+
)
|
|
526
663
|
|
|
527
664
|
reasoning_content = Text()
|
|
528
665
|
reasoning_content.append("Improvement Strategy:\n", style="cyan")
|
|
@@ -542,9 +679,12 @@ def display_improvement_reasoning(
|
|
|
542
679
|
console.print(panel)
|
|
543
680
|
|
|
544
681
|
rendered_panel = capture.get()
|
|
545
|
-
for line in rendered_panel.splitlines():
|
|
546
|
-
console.print(Text("│ ") + Text.from_ansi(line))
|
|
547
682
|
|
|
683
|
+
# Prefix each line with '│ ', preserving ANSI styles
|
|
684
|
+
prefixed_output = "\n".join(f"│ {line}" for line in rendered_panel.splitlines())
|
|
685
|
+
|
|
686
|
+
# Print the prefixed output (will include colors)
|
|
687
|
+
console.print(prefixed_output, highlight=False)
|
|
548
688
|
console.print(Text("│ "))
|
|
549
689
|
|
|
550
690
|
|
|
@@ -557,16 +697,20 @@ def display_iteration_improvement(
|
|
|
557
697
|
|
|
558
698
|
if improvement > 0:
|
|
559
699
|
console.print(
|
|
560
|
-
Text(
|
|
561
|
-
|
|
562
|
-
|
|
700
|
+
Text("│ ").append(
|
|
701
|
+
Text(
|
|
702
|
+
f"✓ Improvement: {improvement:.2%} (from {best_score:.4f} to {current_score:.4f})",
|
|
703
|
+
style="green bold",
|
|
704
|
+
)
|
|
563
705
|
)
|
|
564
706
|
)
|
|
565
707
|
else:
|
|
566
708
|
console.print(
|
|
567
|
-
Text(
|
|
568
|
-
|
|
569
|
-
|
|
709
|
+
Text("│ ").append(
|
|
710
|
+
Text(
|
|
711
|
+
f"✗ No improvement: {improvement:.2%} (score: {current_score:.4f}, best: {best_score:.4f})",
|
|
712
|
+
style="yellow",
|
|
713
|
+
)
|
|
570
714
|
)
|
|
571
715
|
)
|
|
572
716
|
|
|
@@ -584,96 +728,107 @@ def display_optimized_prompt_diff(
|
|
|
584
728
|
if verbose < 1:
|
|
585
729
|
return
|
|
586
730
|
|
|
587
|
-
console.print()
|
|
588
731
|
console.print(Text("│"))
|
|
589
|
-
console.print(Text("│
|
|
732
|
+
console.print(Text("│"))
|
|
733
|
+
console.print(Text("│ ").append(Text("> Optimization Results", style="bold green")))
|
|
590
734
|
console.print(Text("│"))
|
|
591
735
|
|
|
592
736
|
# Show score improvement
|
|
593
737
|
if best_score > initial_score:
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
738
|
+
from ..reporting_utils import safe_percentage_change
|
|
739
|
+
|
|
740
|
+
perc_change, has_percentage = safe_percentage_change(best_score, initial_score)
|
|
741
|
+
if has_percentage:
|
|
742
|
+
console.print(
|
|
743
|
+
Text("│ ").append(
|
|
744
|
+
Text(
|
|
745
|
+
f"Prompt improved from {initial_score:.4f} to {best_score:.4f} ({perc_change:.2%})",
|
|
746
|
+
style="green",
|
|
747
|
+
)
|
|
748
|
+
)
|
|
749
|
+
)
|
|
750
|
+
else:
|
|
751
|
+
console.print(
|
|
752
|
+
Text("│ ").append(
|
|
753
|
+
Text(
|
|
754
|
+
f"Prompt improved from {initial_score:.4f} to {best_score:.4f}",
|
|
755
|
+
style="green",
|
|
756
|
+
)
|
|
757
|
+
)
|
|
599
758
|
)
|
|
600
|
-
)
|
|
601
759
|
else:
|
|
602
760
|
console.print(
|
|
603
|
-
Text(
|
|
761
|
+
Text("│ ").append(
|
|
762
|
+
Text(f"No improvement found (score: {best_score:.4f})", style="yellow")
|
|
763
|
+
)
|
|
604
764
|
)
|
|
605
765
|
|
|
606
766
|
console.print(Text("│"))
|
|
607
|
-
console.print(Text("│ Prompt Changes:", style="cyan"))
|
|
767
|
+
console.print(Text("│ ").append(Text("Prompt Changes:", style="cyan")))
|
|
608
768
|
console.print(Text("│"))
|
|
609
769
|
|
|
610
|
-
#
|
|
611
|
-
|
|
612
|
-
initial_msg = initial_messages[idx] if idx < len(initial_messages) else None
|
|
613
|
-
optimized_msg = (
|
|
614
|
-
optimized_messages[idx] if idx < len(optimized_messages) else None
|
|
615
|
-
)
|
|
770
|
+
# Compute diff items using the extracted function
|
|
771
|
+
diff_items = compute_message_diff_order(initial_messages, optimized_messages)
|
|
616
772
|
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
if
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
# Handle added messages
|
|
628
|
-
if not initial_msg:
|
|
629
|
-
console.print(Text(f"│ {role}: (added)", style="green bold"))
|
|
630
|
-
for line in optimized_content.splitlines():
|
|
631
|
-
console.print(Text(f"│ +{line}", style="green"))
|
|
773
|
+
# Display each diff item
|
|
774
|
+
for item in diff_items:
|
|
775
|
+
if item.change_type == "added":
|
|
776
|
+
# Role was added
|
|
777
|
+
console.print(
|
|
778
|
+
Text("│ ").append(Text(f"{item.role}: (added)", style="green bold"))
|
|
779
|
+
)
|
|
780
|
+
assert item.optimized_content is not None
|
|
781
|
+
for line in item.optimized_content.splitlines():
|
|
782
|
+
console.print(Text("│ ").append(Text(f"+{line}", style="green")))
|
|
632
783
|
console.print(Text("│"))
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
784
|
+
elif item.change_type == "removed":
|
|
785
|
+
# Role was removed
|
|
786
|
+
console.print(
|
|
787
|
+
Text("│ ").append(Text(f"{item.role}: (removed)", style="red bold"))
|
|
788
|
+
)
|
|
789
|
+
assert item.initial_content is not None
|
|
790
|
+
for line in item.initial_content.splitlines():
|
|
791
|
+
console.print(Text("│ ").append(Text(f"-{line}", style="red")))
|
|
640
792
|
console.print(Text("│"))
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
initial_content.splitlines(keepends=False),
|
|
653
|
-
optimized_content.splitlines(keepends=False),
|
|
654
|
-
lineterm="",
|
|
655
|
-
n=3, # 3 lines of context
|
|
793
|
+
elif item.change_type == "unchanged":
|
|
794
|
+
# No changes
|
|
795
|
+
console.print(
|
|
796
|
+
Text("│ ").append(Text(f"{item.role}: (unchanged)", style="dim"))
|
|
797
|
+
)
|
|
798
|
+
else: # changed
|
|
799
|
+
# Content changed - show diff
|
|
800
|
+
console.print(
|
|
801
|
+
Text("│ ").append(
|
|
802
|
+
Text(f"{item.role}: (changed)", style="cyan bold")
|
|
803
|
+
)
|
|
656
804
|
)
|
|
657
|
-
)
|
|
658
805
|
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
#
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
806
|
+
assert item.initial_content is not None
|
|
807
|
+
assert item.optimized_content is not None
|
|
808
|
+
|
|
809
|
+
# Generate unified diff
|
|
810
|
+
diff_lines = list(
|
|
811
|
+
difflib.unified_diff(
|
|
812
|
+
item.initial_content.splitlines(keepends=False),
|
|
813
|
+
item.optimized_content.splitlines(keepends=False),
|
|
814
|
+
lineterm="",
|
|
815
|
+
n=3, # 3 lines of context
|
|
816
|
+
)
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
if diff_lines:
|
|
820
|
+
# Create diff content
|
|
821
|
+
diff_content = Text()
|
|
822
|
+
for line in diff_lines[3:]: # Skip first 3 lines (---, +++, @@)
|
|
823
|
+
if line.startswith("+"):
|
|
824
|
+
diff_content.append("│ " + line + "\n", style="green")
|
|
825
|
+
elif line.startswith("-"):
|
|
826
|
+
diff_content.append("│ " + line + "\n", style="red")
|
|
827
|
+
elif line.startswith("@@"):
|
|
828
|
+
diff_content.append("│ " + line + "\n", style="cyan dim")
|
|
829
|
+
else:
|
|
830
|
+
# Context line
|
|
831
|
+
diff_content.append("│ " + line + "\n", style="dim")
|
|
832
|
+
|
|
833
|
+
console.print(diff_content)
|
|
834
|
+
console.print(Text("│"))
|