opik-optimizer 2.1.2__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. opik_optimizer/__init__.py +2 -2
  2. opik_optimizer/base_optimizer.py +314 -145
  3. opik_optimizer/evolutionary_optimizer/crossover_ops.py +31 -4
  4. opik_optimizer/evolutionary_optimizer/evaluation_ops.py +23 -3
  5. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +122 -95
  6. opik_optimizer/evolutionary_optimizer/mcp.py +11 -6
  7. opik_optimizer/evolutionary_optimizer/mutation_ops.py +25 -5
  8. opik_optimizer/evolutionary_optimizer/population_ops.py +26 -10
  9. opik_optimizer/evolutionary_optimizer/reporting.py +5 -5
  10. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +53 -99
  11. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +4 -4
  12. opik_optimizer/gepa_optimizer/gepa_optimizer.py +183 -172
  13. opik_optimizer/gepa_optimizer/reporting.py +164 -22
  14. opik_optimizer/hierarchical_reflective_optimizer/hierarchical_reflective_optimizer.py +221 -245
  15. opik_optimizer/hierarchical_reflective_optimizer/hierarchical_root_cause_analyzer.py +38 -14
  16. opik_optimizer/hierarchical_reflective_optimizer/prompts.py +7 -1
  17. opik_optimizer/hierarchical_reflective_optimizer/reporting.py +287 -132
  18. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +185 -205
  19. opik_optimizer/meta_prompt_optimizer/reporting.py +4 -4
  20. opik_optimizer/mipro_optimizer/__init__.py +2 -2
  21. opik_optimizer/mipro_optimizer/_lm.py +4 -4
  22. opik_optimizer/mipro_optimizer/{_mipro_optimizer_v2.py → mipro_optimizer_v2.py} +1 -7
  23. opik_optimizer/mipro_optimizer/utils.py +1 -0
  24. opik_optimizer/multi_metric_objective.py +33 -0
  25. opik_optimizer/optimizable_agent.py +7 -4
  26. opik_optimizer/optimization_config/chat_prompt.py +7 -10
  27. opik_optimizer/parameter_optimizer/parameter_optimizer.py +188 -40
  28. opik_optimizer/parameter_optimizer/reporting.py +148 -0
  29. opik_optimizer/reporting_utils.py +42 -15
  30. opik_optimizer/task_evaluator.py +26 -9
  31. opik_optimizer/utils/core.py +16 -2
  32. opik_optimizer/utils/prompt_segments.py +1 -2
  33. {opik_optimizer-2.1.2.dist-info → opik_optimizer-2.2.0.dist-info}/METADATA +2 -3
  34. {opik_optimizer-2.1.2.dist-info → opik_optimizer-2.2.0.dist-info}/RECORD +37 -37
  35. opik_optimizer/evolutionary_optimizer/llm_support.py +0 -136
  36. opik_optimizer/mipro_optimizer/mipro_optimizer.py +0 -680
  37. {opik_optimizer-2.1.2.dist-info → opik_optimizer-2.2.0.dist-info}/WHEEL +0 -0
  38. {opik_optimizer-2.1.2.dist-info → opik_optimizer-2.2.0.dist-info}/licenses/LICENSE +0 -0
  39. {opik_optimizer-2.1.2.dist-info → opik_optimizer-2.2.0.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,8 @@
1
1
  import logging
2
2
  import asyncio
3
3
  from typing import Any
4
- from tqdm import tqdm
5
4
 
5
+ from rich.progress import Progress, TextColumn, BarColumn, TaskProgressColumn
6
6
  from opik.evaluation.evaluation_result import EvaluationResult
7
7
  from .types import (
8
8
  RootCauseAnalysis,
@@ -11,6 +11,7 @@ from .types import (
11
11
  )
12
12
  from . import reporting
13
13
  from .prompts import BATCH_ANALYSIS_PROMPT, SYNTHESIS_PROMPT
14
+ from ..reporting_utils import get_console
14
15
 
15
16
  logger = logging.getLogger(__name__)
16
17
 
@@ -285,13 +286,11 @@ Scores:
285
286
 
286
287
  semaphore = asyncio.Semaphore(self.max_parallel_batches)
287
288
 
288
- # Create progress bar for batch processing
289
- pbar = tqdm(
290
- total=len(batch_tasks), desc="Processing batches", unit="batch", leave=False
291
- )
289
+ # Create progress bar for batch processing using Rich
290
+ console = get_console()
292
291
 
293
292
  async def run_with_semaphore(
294
- batch_num: int, task: Any
293
+ batch_num: int, task: Any, progress: Progress | None, task_id: Any | None
295
294
  ) -> tuple[int, BatchAnalysis]:
296
295
  async with semaphore:
297
296
  try:
@@ -300,19 +299,44 @@ Scores:
300
299
  f"Completed batch {batch_num}: "
301
300
  f"identified {len(result.failure_modes)} failure modes"
302
301
  )
303
- pbar.update(1) # Update progress bar
302
+ if progress and task_id is not None:
303
+ progress.update(task_id, advance=1) # Update progress bar
304
304
  return batch_num, result
305
305
  except Exception as exc:
306
306
  logger.error(f"Batch {batch_num} failed: {exc}")
307
- pbar.update(1) # Update progress bar even on error
307
+ if progress and task_id is not None:
308
+ progress.update(
309
+ task_id, advance=1
310
+ ) # Update progress bar even on error
308
311
  raise
309
312
 
310
- # Run all tasks with semaphore control
311
- results = await asyncio.gather(
312
- *[run_with_semaphore(num, task) for num, task in batch_tasks]
313
- )
314
-
315
- pbar.close() # Close progress bar
313
+ # Run all tasks with semaphore control and rich progress bar
314
+ if self.verbose >= 1:
315
+ with Progress(
316
+ TextColumn("│ "),
317
+ TextColumn("[progress.description]{task.description}"),
318
+ BarColumn(),
319
+ TaskProgressColumn(),
320
+ console=console,
321
+ transient=True,
322
+ ) as progress:
323
+ task_id = progress.add_task(
324
+ "Processing batches", total=len(batch_tasks)
325
+ )
326
+ results = await asyncio.gather(
327
+ *[
328
+ run_with_semaphore(num, task, progress, task_id)
329
+ for num, task in batch_tasks
330
+ ]
331
+ )
332
+ else:
333
+ # No progress bar in non-verbose mode
334
+ results = await asyncio.gather(
335
+ *[
336
+ run_with_semaphore(num, task, None, None)
337
+ for num, task in batch_tasks
338
+ ]
339
+ )
316
340
 
317
341
  # Sort by batch number to maintain order
318
342
  batch_analyses = [result for _, result in sorted(results)]
@@ -14,6 +14,8 @@ TEST RESULTS:
14
14
  {formatted_batch}
15
15
  ```
16
16
 
17
+ Important constraint: Base your analysis exclusively on the TEST RESULTS shown above. Do not infer, speculate, or hypothesize failure modes that are not directly evidenced in the provided results.
18
+
17
19
  Think through the failures systematically:
18
20
 
19
21
  1. IDENTIFY: List all distinct types of failures you observe in the test results
@@ -86,6 +88,10 @@ INSTRUCTIONS FOR IMPROVING THE PROMPT:
86
88
 
87
89
  4. **Maintain Structure**: Keep the same message structure (role and content format). Only modify the content where necessary.
88
90
 
89
- 5. **Be Specific**: Ensure your changes provide concrete, actionable guidance that directly addresses the identified failure mode.
91
+ 5. **Do NOT Add Messages**: Do not add new messages to the prompt. Only modify existing messages. The number of messages in the prompt must remain exactly the same.
92
+
93
+ 6. **Be Specific**: Ensure your changes provide concrete, actionable guidance that directly addresses the identified failure mode.
94
+
95
+ Do not remove any variables or placeholders from any prompt message. You can reposition them within the same message content if needed but never remove them.
90
96
 
91
97
  Provide your reasoning for the changes you made, explaining WHY each change addresses the failure mode, and then provide the improved prompt."""