opik-optimizer 2.1.2__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opik_optimizer/__init__.py +2 -2
- opik_optimizer/base_optimizer.py +314 -145
- opik_optimizer/evolutionary_optimizer/crossover_ops.py +31 -4
- opik_optimizer/evolutionary_optimizer/evaluation_ops.py +23 -3
- opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +122 -95
- opik_optimizer/evolutionary_optimizer/mcp.py +11 -6
- opik_optimizer/evolutionary_optimizer/mutation_ops.py +25 -5
- opik_optimizer/evolutionary_optimizer/population_ops.py +26 -10
- opik_optimizer/evolutionary_optimizer/reporting.py +5 -5
- opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +53 -99
- opik_optimizer/few_shot_bayesian_optimizer/reporting.py +4 -4
- opik_optimizer/gepa_optimizer/gepa_optimizer.py +183 -172
- opik_optimizer/gepa_optimizer/reporting.py +164 -22
- opik_optimizer/hierarchical_reflective_optimizer/hierarchical_reflective_optimizer.py +221 -245
- opik_optimizer/hierarchical_reflective_optimizer/hierarchical_root_cause_analyzer.py +38 -14
- opik_optimizer/hierarchical_reflective_optimizer/prompts.py +7 -1
- opik_optimizer/hierarchical_reflective_optimizer/reporting.py +287 -132
- opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +185 -205
- opik_optimizer/meta_prompt_optimizer/reporting.py +4 -4
- opik_optimizer/mipro_optimizer/__init__.py +2 -2
- opik_optimizer/mipro_optimizer/_lm.py +4 -4
- opik_optimizer/mipro_optimizer/{_mipro_optimizer_v2.py → mipro_optimizer_v2.py} +1 -7
- opik_optimizer/mipro_optimizer/utils.py +1 -0
- opik_optimizer/multi_metric_objective.py +33 -0
- opik_optimizer/optimizable_agent.py +7 -4
- opik_optimizer/optimization_config/chat_prompt.py +7 -10
- opik_optimizer/parameter_optimizer/parameter_optimizer.py +188 -40
- opik_optimizer/parameter_optimizer/reporting.py +148 -0
- opik_optimizer/reporting_utils.py +42 -15
- opik_optimizer/task_evaluator.py +26 -9
- opik_optimizer/utils/core.py +16 -2
- opik_optimizer/utils/prompt_segments.py +1 -2
- {opik_optimizer-2.1.2.dist-info → opik_optimizer-2.2.0.dist-info}/METADATA +2 -3
- {opik_optimizer-2.1.2.dist-info → opik_optimizer-2.2.0.dist-info}/RECORD +37 -37
- opik_optimizer/evolutionary_optimizer/llm_support.py +0 -136
- opik_optimizer/mipro_optimizer/mipro_optimizer.py +0 -680
- {opik_optimizer-2.1.2.dist-info → opik_optimizer-2.2.0.dist-info}/WHEEL +0 -0
- {opik_optimizer-2.1.2.dist-info → opik_optimizer-2.2.0.dist-info}/licenses/LICENSE +0 -0
- {opik_optimizer-2.1.2.dist-info → opik_optimizer-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import asyncio
|
|
3
3
|
from typing import Any
|
|
4
|
-
from tqdm import tqdm
|
|
5
4
|
|
|
5
|
+
from rich.progress import Progress, TextColumn, BarColumn, TaskProgressColumn
|
|
6
6
|
from opik.evaluation.evaluation_result import EvaluationResult
|
|
7
7
|
from .types import (
|
|
8
8
|
RootCauseAnalysis,
|
|
@@ -11,6 +11,7 @@ from .types import (
|
|
|
11
11
|
)
|
|
12
12
|
from . import reporting
|
|
13
13
|
from .prompts import BATCH_ANALYSIS_PROMPT, SYNTHESIS_PROMPT
|
|
14
|
+
from ..reporting_utils import get_console
|
|
14
15
|
|
|
15
16
|
logger = logging.getLogger(__name__)
|
|
16
17
|
|
|
@@ -285,13 +286,11 @@ Scores:
|
|
|
285
286
|
|
|
286
287
|
semaphore = asyncio.Semaphore(self.max_parallel_batches)
|
|
287
288
|
|
|
288
|
-
# Create progress bar for batch processing
|
|
289
|
-
|
|
290
|
-
total=len(batch_tasks), desc="Processing batches", unit="batch", leave=False
|
|
291
|
-
)
|
|
289
|
+
# Create progress bar for batch processing using Rich
|
|
290
|
+
console = get_console()
|
|
292
291
|
|
|
293
292
|
async def run_with_semaphore(
|
|
294
|
-
batch_num: int, task: Any
|
|
293
|
+
batch_num: int, task: Any, progress: Progress | None, task_id: Any | None
|
|
295
294
|
) -> tuple[int, BatchAnalysis]:
|
|
296
295
|
async with semaphore:
|
|
297
296
|
try:
|
|
@@ -300,19 +299,44 @@ Scores:
|
|
|
300
299
|
f"Completed batch {batch_num}: "
|
|
301
300
|
f"identified {len(result.failure_modes)} failure modes"
|
|
302
301
|
)
|
|
303
|
-
|
|
302
|
+
if progress and task_id is not None:
|
|
303
|
+
progress.update(task_id, advance=1) # Update progress bar
|
|
304
304
|
return batch_num, result
|
|
305
305
|
except Exception as exc:
|
|
306
306
|
logger.error(f"Batch {batch_num} failed: {exc}")
|
|
307
|
-
|
|
307
|
+
if progress and task_id is not None:
|
|
308
|
+
progress.update(
|
|
309
|
+
task_id, advance=1
|
|
310
|
+
) # Update progress bar even on error
|
|
308
311
|
raise
|
|
309
312
|
|
|
310
|
-
# Run all tasks with semaphore control
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
313
|
+
# Run all tasks with semaphore control and rich progress bar
|
|
314
|
+
if self.verbose >= 1:
|
|
315
|
+
with Progress(
|
|
316
|
+
TextColumn("│ "),
|
|
317
|
+
TextColumn("[progress.description]{task.description}"),
|
|
318
|
+
BarColumn(),
|
|
319
|
+
TaskProgressColumn(),
|
|
320
|
+
console=console,
|
|
321
|
+
transient=True,
|
|
322
|
+
) as progress:
|
|
323
|
+
task_id = progress.add_task(
|
|
324
|
+
"Processing batches", total=len(batch_tasks)
|
|
325
|
+
)
|
|
326
|
+
results = await asyncio.gather(
|
|
327
|
+
*[
|
|
328
|
+
run_with_semaphore(num, task, progress, task_id)
|
|
329
|
+
for num, task in batch_tasks
|
|
330
|
+
]
|
|
331
|
+
)
|
|
332
|
+
else:
|
|
333
|
+
# No progress bar in non-verbose mode
|
|
334
|
+
results = await asyncio.gather(
|
|
335
|
+
*[
|
|
336
|
+
run_with_semaphore(num, task, None, None)
|
|
337
|
+
for num, task in batch_tasks
|
|
338
|
+
]
|
|
339
|
+
)
|
|
316
340
|
|
|
317
341
|
# Sort by batch number to maintain order
|
|
318
342
|
batch_analyses = [result for _, result in sorted(results)]
|
|
@@ -14,6 +14,8 @@ TEST RESULTS:
|
|
|
14
14
|
{formatted_batch}
|
|
15
15
|
```
|
|
16
16
|
|
|
17
|
+
Important constraint: Base your analysis exclusively on the TEST RESULTS shown above. Do not infer, speculate, or hypothesize failure modes that are not directly evidenced in the provided results.
|
|
18
|
+
|
|
17
19
|
Think through the failures systematically:
|
|
18
20
|
|
|
19
21
|
1. IDENTIFY: List all distinct types of failures you observe in the test results
|
|
@@ -86,6 +88,10 @@ INSTRUCTIONS FOR IMPROVING THE PROMPT:
|
|
|
86
88
|
|
|
87
89
|
4. **Maintain Structure**: Keep the same message structure (role and content format). Only modify the content where necessary.
|
|
88
90
|
|
|
89
|
-
5. **
|
|
91
|
+
5. **Do NOT Add Messages**: Do not add new messages to the prompt. Only modify existing messages. The number of messages in the prompt must remain exactly the same.
|
|
92
|
+
|
|
93
|
+
6. **Be Specific**: Ensure your changes provide concrete, actionable guidance that directly addresses the identified failure mode.
|
|
94
|
+
|
|
95
|
+
Do not remove any variables or placeholders from any prompt message. You can reposition them within the same message content if needed but never remove them.
|
|
90
96
|
|
|
91
97
|
Provide your reasoning for the changes you made, explaining WHY each change addresses the failure mode, and then provide the improved prompt."""
|