opik-optimizer 1.0.6__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. opik_optimizer/__init__.py +4 -0
  2. opik_optimizer/_throttle.py +2 -1
  3. opik_optimizer/base_optimizer.py +402 -28
  4. opik_optimizer/data/context7_eval.jsonl +3 -0
  5. opik_optimizer/datasets/context7_eval.py +90 -0
  6. opik_optimizer/datasets/tiny_test.py +33 -34
  7. opik_optimizer/datasets/truthful_qa.py +2 -2
  8. opik_optimizer/evolutionary_optimizer/crossover_ops.py +194 -0
  9. opik_optimizer/evolutionary_optimizer/evaluation_ops.py +136 -0
  10. opik_optimizer/evolutionary_optimizer/evolutionary_optimizer.py +289 -966
  11. opik_optimizer/evolutionary_optimizer/helpers.py +10 -0
  12. opik_optimizer/evolutionary_optimizer/llm_support.py +136 -0
  13. opik_optimizer/evolutionary_optimizer/mcp.py +249 -0
  14. opik_optimizer/evolutionary_optimizer/mutation_ops.py +306 -0
  15. opik_optimizer/evolutionary_optimizer/population_ops.py +228 -0
  16. opik_optimizer/evolutionary_optimizer/prompts.py +352 -0
  17. opik_optimizer/evolutionary_optimizer/reporting.py +28 -4
  18. opik_optimizer/evolutionary_optimizer/style_ops.py +86 -0
  19. opik_optimizer/few_shot_bayesian_optimizer/few_shot_bayesian_optimizer.py +90 -81
  20. opik_optimizer/few_shot_bayesian_optimizer/reporting.py +12 -5
  21. opik_optimizer/gepa_optimizer/__init__.py +3 -0
  22. opik_optimizer/gepa_optimizer/adapter.py +154 -0
  23. opik_optimizer/gepa_optimizer/gepa_optimizer.py +653 -0
  24. opik_optimizer/gepa_optimizer/reporting.py +181 -0
  25. opik_optimizer/logging_config.py +42 -7
  26. opik_optimizer/mcp_utils/__init__.py +22 -0
  27. opik_optimizer/mcp_utils/mcp.py +541 -0
  28. opik_optimizer/mcp_utils/mcp_second_pass.py +152 -0
  29. opik_optimizer/mcp_utils/mcp_simulator.py +116 -0
  30. opik_optimizer/mcp_utils/mcp_workflow.py +547 -0
  31. opik_optimizer/meta_prompt_optimizer/meta_prompt_optimizer.py +470 -134
  32. opik_optimizer/meta_prompt_optimizer/reporting.py +16 -2
  33. opik_optimizer/mipro_optimizer/_lm.py +30 -23
  34. opik_optimizer/mipro_optimizer/_mipro_optimizer_v2.py +52 -51
  35. opik_optimizer/mipro_optimizer/mipro_optimizer.py +126 -46
  36. opik_optimizer/mipro_optimizer/utils.py +2 -4
  37. opik_optimizer/optimizable_agent.py +21 -16
  38. opik_optimizer/optimization_config/chat_prompt.py +44 -23
  39. opik_optimizer/optimization_config/configs.py +3 -3
  40. opik_optimizer/optimization_config/mappers.py +9 -8
  41. opik_optimizer/optimization_result.py +22 -14
  42. opik_optimizer/reporting_utils.py +61 -10
  43. opik_optimizer/task_evaluator.py +9 -8
  44. opik_optimizer/utils/__init__.py +15 -0
  45. opik_optimizer/utils/colbert.py +236 -0
  46. opik_optimizer/{utils.py → utils/core.py} +160 -33
  47. opik_optimizer/utils/dataset_utils.py +49 -0
  48. opik_optimizer/utils/prompt_segments.py +186 -0
  49. opik_optimizer-2.0.0.dist-info/METADATA +345 -0
  50. opik_optimizer-2.0.0.dist-info/RECORD +74 -0
  51. opik_optimizer-2.0.0.dist-info/licenses/LICENSE +203 -0
  52. opik_optimizer-1.0.6.dist-info/METADATA +0 -181
  53. opik_optimizer-1.0.6.dist-info/RECORD +0 -50
  54. opik_optimizer-1.0.6.dist-info/licenses/LICENSE +0 -21
  55. {opik_optimizer-1.0.6.dist-info → opik_optimizer-2.0.0.dist-info}/WHEEL +0 -0
  56. {opik_optimizer-1.0.6.dist-info → opik_optimizer-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,541 @@
1
+ """Helpers for working with MCP tool signatures in optimization flows."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import copy
7
+ import importlib
8
+ import json
9
+ import textwrap
10
+ from difflib import SequenceMatcher
11
+ from dataclasses import dataclass, field
12
+ from pathlib import Path
13
+ from types import TracebackType
14
+ from typing import (
15
+ Any,
16
+ TypeVar,
17
+ cast,
18
+ )
19
+ from collections.abc import Coroutine, Iterable, Mapping
20
+
21
+ ClientSession: type[Any] | None = None
22
+ StdioClientFactory: type[Any] | None = None
23
+ StdioServerParameters: type[Any] | None = None
24
+ types_mod: Any | None = None
25
+
26
+ _T = TypeVar("_T")
27
+
28
+
29
+ TOOL_ENTRY_KEY = "function"
30
+
31
+
32
+ @dataclass
33
+ class ToolSignature:
34
+ """Representation of an MCP tool definition used for tuning."""
35
+
36
+ name: str
37
+ description: str
38
+ parameters: Mapping[str, Any]
39
+ examples: list[dict[str, Any]] | None = None
40
+ extra: dict[str, Any] = field(default_factory=dict)
41
+
42
+ @classmethod
43
+ def from_tool_entry(cls, entry: Mapping[str, Any]) -> ToolSignature:
44
+ if TOOL_ENTRY_KEY not in entry:
45
+ raise ValueError("Tool entry missing 'function' block")
46
+
47
+ function_block = entry[TOOL_ENTRY_KEY]
48
+ name = function_block.get("name")
49
+ if not name:
50
+ raise ValueError("Tool entry missing function name")
51
+
52
+ description = function_block.get("description", "")
53
+ parameters = function_block.get("parameters", {}) or {}
54
+ examples = function_block.get("examples")
55
+ extra = {
56
+ key: value for key, value in entry.items() if key not in {TOOL_ENTRY_KEY}
57
+ }
58
+
59
+ return cls(
60
+ name=name,
61
+ description=description,
62
+ parameters=parameters,
63
+ examples=examples,
64
+ extra=extra,
65
+ )
66
+
67
+ def to_tool_entry(self) -> dict[str, Any]:
68
+ entry = copy.deepcopy(self.extra)
69
+ entry.update(
70
+ {
71
+ TOOL_ENTRY_KEY: {
72
+ "name": self.name,
73
+ "description": self.description,
74
+ "parameters": self.parameters,
75
+ }
76
+ }
77
+ )
78
+ if self.examples is not None:
79
+ entry[TOOL_ENTRY_KEY]["examples"] = self.examples
80
+ return entry
81
+
82
+ def segment_update(self) -> tuple[str, str]:
83
+ return (f"tool:{self.name}", self.description)
84
+
85
+
86
+ def load_mcp_signature(path: Path) -> list[ToolSignature]:
87
+ data = json.loads(Path(path).read_text())
88
+
89
+ if isinstance(data, dict) and "tools" in data:
90
+ data = data["tools"]
91
+
92
+ if not isinstance(data, list):
93
+ raise ValueError("MCP signature file must contain a list of tools")
94
+
95
+ return [ToolSignature.from_tool_entry(entry) for entry in data]
96
+
97
+
98
+ def dump_mcp_signature(signatures: Iterable[ToolSignature], path: Path) -> None:
99
+ payload = [signature.to_tool_entry() for signature in signatures]
100
+ Path(path).write_text(json.dumps(payload, indent=2, sort_keys=True))
101
+
102
+
103
+ def tools_from_signatures(signatures: Iterable[ToolSignature]) -> list[dict[str, Any]]:
104
+ return [signature.to_tool_entry() for signature in signatures]
105
+
106
+
107
+ def signature_updates(signatures: Iterable[ToolSignature]) -> dict[str, str]:
108
+ return dict(signature.segment_update() for signature in signatures)
109
+
110
+
111
+ def validate_tool_arguments(
112
+ signature: ToolSignature, arguments: Mapping[str, Any]
113
+ ) -> tuple[bool, str]:
114
+ """Validate ``arguments`` against required fields in the signature schema."""
115
+
116
+ schema_required = signature.parameters.get("required", [])
117
+ for required_field in schema_required:
118
+ if required_field not in arguments:
119
+ return False, f"Missing required argument '{required_field}'"
120
+
121
+ properties = signature.parameters.get("properties", {})
122
+ for key, value in arguments.items():
123
+ prop_schema = properties.get(key)
124
+ if not prop_schema:
125
+ continue
126
+ expected_type = prop_schema.get("type")
127
+ if expected_type:
128
+ if expected_type == "string" and not isinstance(value, str):
129
+ return False, f"Argument '{key}' must be a string"
130
+ if expected_type == "number" and not isinstance(value, (int, float)):
131
+ return False, f"Argument '{key}' must be a number"
132
+ if expected_type == "integer" and not isinstance(value, int):
133
+ return False, f"Argument '{key}' must be an integer"
134
+ if expected_type == "boolean" and not isinstance(value, bool):
135
+ return False, f"Argument '{key}' must be a boolean"
136
+
137
+ return True, ""
138
+
139
+
140
+ # ---------------------------------------------------------------------------
141
+ # MCP runtime helpers using the official Python SDK
142
+
143
+
144
+ class MCPDependencyError(RuntimeError):
145
+ """Raised when the Model Context Protocol SDK is unavailable."""
146
+
147
+
148
+ def _load_sdk() -> tuple[Any, Any, Any, Any]:
149
+ candidates = (
150
+ (
151
+ "mcp.client.session",
152
+ "mcp.client.stdio",
153
+ "mcp.types",
154
+ ),
155
+ (
156
+ "modelcontextprotocol.client.session",
157
+ "modelcontextprotocol.client.stdio",
158
+ "modelcontextprotocol.types",
159
+ ),
160
+ )
161
+
162
+ for session_path, stdio_path, types_path in candidates:
163
+ try:
164
+ session_mod = importlib.import_module(session_path)
165
+ stdio_mod = importlib.import_module(stdio_path)
166
+ types_mod = importlib.import_module(types_path)
167
+ except ImportError:
168
+ continue
169
+
170
+ session_cls = getattr(session_mod, "ClientSession", None)
171
+ stdio_client_fn = getattr(stdio_mod, "stdio_client", None)
172
+ stdio_params_cls = getattr(stdio_mod, "StdioServerParameters", None)
173
+
174
+ if session_cls and stdio_client_fn and stdio_params_cls:
175
+ return session_cls, stdio_client_fn, stdio_params_cls, types_mod
176
+
177
+ raise MCPDependencyError(
178
+ "modelcontextprotocol Python SDK not found. Install it with 'pip install mcp'."
179
+ )
180
+
181
+
182
+ try:
183
+ (ClientSession, StdioClientFactory, StdioServerParameters, types_mod) = _load_sdk()
184
+ _SDK_ERROR: Exception | None = None
185
+ except MCPDependencyError as exc: # pragma: no cover
186
+ ClientSession = None # type: ignore[assignment]
187
+ StdioClientFactory = None # type: ignore[assignment]
188
+ StdioServerParameters = None # type: ignore[assignment]
189
+ types_mod = None # type: ignore[assignment]
190
+ _SDK_ERROR = exc
191
+
192
+
193
+ @dataclass
194
+ class MCPManifest:
195
+ name: str
196
+ command: str
197
+ args: list[str]
198
+ env: dict[str, str]
199
+
200
+ @classmethod
201
+ def from_dict(cls, data: Mapping[str, Any]) -> MCPManifest:
202
+ command = data.get("command")
203
+ if not command:
204
+ raise ValueError("mcp.json missing 'command'")
205
+ return cls(
206
+ name=data.get("name", "mcp-server"),
207
+ command=command,
208
+ args=data.get("args", []),
209
+ env=data.get("env", {}),
210
+ )
211
+
212
+ @classmethod
213
+ def from_json(cls, path: Path) -> MCPManifest:
214
+ return cls.from_dict(json.loads(Path(path).read_text()))
215
+
216
+
217
+ class MCPClient:
218
+ def __init__(self, manifest: MCPManifest) -> None:
219
+ if _SDK_ERROR is not None:
220
+ raise MCPDependencyError(str(_SDK_ERROR))
221
+ if (
222
+ ClientSession is None
223
+ or StdioClientFactory is None
224
+ or StdioServerParameters is None
225
+ ):
226
+ raise MCPDependencyError("MCP SDK is not available")
227
+ self.manifest = manifest
228
+ self._transport_cm: Any | None = None
229
+ self._session: Any | None = None
230
+ self._read_stream: Any | None = None
231
+ self._write_stream: Any | None = None
232
+
233
+ async def __aenter__(self) -> MCPClient:
234
+ server_params = cast(type[Any], StdioServerParameters)(
235
+ command=self.manifest.command,
236
+ args=self.manifest.args,
237
+ env=self.manifest.env or None,
238
+ )
239
+
240
+ transport_factory = cast(type[Any], StdioClientFactory)
241
+ transport_cm = transport_factory(server_params)
242
+ self._transport_cm = transport_cm
243
+ self._read_stream, self._write_stream = await transport_cm.__aenter__()
244
+ session_cls = cast(type[Any], ClientSession)
245
+ self._session = session_cls(self._read_stream, self._write_stream)
246
+
247
+ if hasattr(self._session, "__aenter__"):
248
+ await self._session.__aenter__()
249
+
250
+ if hasattr(self._session, "initialize"):
251
+ await self._session.initialize()
252
+ return self
253
+
254
+ async def __aexit__(
255
+ self,
256
+ exc_type: type[BaseException] | None,
257
+ exc: BaseException | None,
258
+ tb: TracebackType | None,
259
+ ) -> bool | None:
260
+ if self._session is not None:
261
+ if hasattr(self._session, "__aexit__"):
262
+ await self._session.__aexit__(exc_type, exc, tb)
263
+ if self._transport_cm is not None:
264
+ await self._transport_cm.__aexit__(exc_type, exc, tb)
265
+ return None
266
+
267
+ async def list_tools(self) -> Any:
268
+ if self._session is None:
269
+ raise RuntimeError("MCP session not started")
270
+ if hasattr(self._session, "list_tools"):
271
+ response = await self._session.list_tools()
272
+ return getattr(response, "tools", response)
273
+ if hasattr(self._session, "tools"):
274
+ return await self._session.tools()
275
+ raise RuntimeError("MCP session missing list_tools")
276
+
277
+ async def get_tool(self, tool_name: str) -> Any:
278
+ tools = await self.list_tools()
279
+ for tool in tools:
280
+ if tool.name == tool_name:
281
+ return tool
282
+ raise ValueError(f"Tool '{tool_name}' not found")
283
+
284
+ async def call_tool(self, tool_name: str, arguments: Mapping[str, Any]) -> Any:
285
+ if self._session is None:
286
+ raise RuntimeError("MCP session not started")
287
+ return await self._session.call_tool(name=tool_name, arguments=arguments)
288
+
289
+
290
+ def run_sync(coro: Coroutine[Any, Any, _T]) -> _T:
291
+ return asyncio.run(coro)
292
+
293
+
294
+ def list_tools_from_manifest(manifest: MCPManifest) -> Any:
295
+ async def _inner() -> Any:
296
+ async with MCPClient(manifest) as client:
297
+ return await client.list_tools()
298
+
299
+ return run_sync(_inner())
300
+
301
+
302
+ def call_tool_from_manifest(
303
+ manifest: MCPManifest, tool_name: str, arguments: dict[str, Any]
304
+ ) -> Any:
305
+ async def _inner() -> Any:
306
+ async with MCPClient(manifest) as client:
307
+ return await client.call_tool(tool_name, arguments)
308
+
309
+ return run_sync(_inner())
310
+
311
+
312
+ def response_to_text(response: object) -> str:
313
+ if hasattr(response, "content"):
314
+ content = getattr(response, "content")
315
+ if isinstance(content, list):
316
+ texts = []
317
+ for item in content:
318
+ text_value = getattr(item, "text", None)
319
+ if text_value:
320
+ texts.append(text_value)
321
+ if texts:
322
+ return "\n".join(texts)
323
+ return str(content)
324
+ if hasattr(response, "output"):
325
+ return str(getattr(response, "output"))
326
+ return str(response)
327
+
328
+
329
+ PROMPT_TOOL_HEADER = "<<TOOL_DESCRIPTION>>"
330
+ PROMPT_TOOL_FOOTER = "<<END_TOOL_DESCRIPTION>>"
331
+
332
+ # System-prompt scaffolding below is inspired by the MCP section of Cline's
333
+ # system prompt (Apache-2.0). See https://github.com/cline/cline for details.
334
+ TOOL_USE_GUIDELINES = textwrap.dedent(
335
+ """
336
+ # Tool Use Guidelines
337
+
338
+ 1. In <thinking> tags, decide what you already know and what information you still need.
339
+ 2. Choose the best tool for the current step using the descriptions and schemas provided.
340
+ 3. Use one tool call per message, wait for its result, then decide the next step.
341
+ 4. Format tool calls exactly with the XML shown in the tool examples.
342
+ 5. After each tool call, read the result carefully before responding or calling another tool.
343
+ 6. Always incorporate the tool output into your final answer.
344
+ """
345
+ ).strip()
346
+
347
+
348
+ def _format_json_block(data: Mapping[str, Any]) -> str:
349
+ return json.dumps(data, sort_keys=True)
350
+
351
+
352
+ def system_prompt_from_tool(
353
+ signature: ToolSignature, manifest: MCPManifest | None = None
354
+ ) -> str:
355
+ parameters = signature.parameters or {}
356
+ parameter_lines = []
357
+ for name, schema in parameters.get("properties", {}).items():
358
+ type_hint = schema.get("type", "any")
359
+ desc = schema.get("description", "")
360
+ parameter_lines.append(f"- {name} ({type_hint}): {desc}")
361
+ parameter_section = (
362
+ "\n".join(parameter_lines) if parameter_lines else "- No structured parameters."
363
+ )
364
+
365
+ mcp_header = ""
366
+ if manifest is not None:
367
+ command_line_parts = [manifest.command]
368
+ if manifest.args:
369
+ sanitized_args: list[str] = []
370
+ skip_next = False
371
+ for idx, token in enumerate(manifest.args):
372
+ if skip_next:
373
+ skip_next = False
374
+ continue
375
+
376
+ lowered = token.lower()
377
+ if lowered in {"--api-key", "--apikey", "--token"}:
378
+ sanitized_args.append(f"{token} ***")
379
+ if idx + 1 < len(manifest.args):
380
+ skip_next = True
381
+ continue
382
+
383
+ if any(keyword in lowered for keyword in ("key", "token", "secret")):
384
+ sanitized_args.append("***")
385
+ continue
386
+
387
+ sanitized_args.append(token)
388
+ command_line_parts.extend(sanitized_args)
389
+
390
+ command_line = " ".join(command_line_parts)
391
+
392
+ schema_block = (
393
+ _format_json_block(signature.parameters) if signature.parameters else "{}"
394
+ )
395
+
396
+ mcp_header = textwrap.dedent(
397
+ f"""
398
+ MCP SERVERS
399
+
400
+ The Model Context Protocol (MCP) enables communication between the system and locally running MCP servers that provide additional tools and resources to extend your capabilities.
401
+
402
+ # Connected MCP Servers
403
+
404
+ When a server is connected, you can use the server's tools via the `use_mcp_tool` tool, and access the server's resources via the `access_mcp_resource` tool.
405
+
406
+ ## {manifest.name} (`{command_line}`)
407
+
408
+ ### Available Tools
409
+ - {signature.name}: {signature.description}
410
+ Input Schema:
411
+ {schema_block}
412
+ """
413
+ ).strip()
414
+
415
+ body = textwrap.dedent(
416
+ f"""
417
+ You are an assistant that answers developer questions using the available MCP tool.
418
+ Always decide whether the tool is required before answering.
419
+ Always call the tool at least once before replying and incorporate the returned documentation into your answer (quote key terms, mention the library ID).
420
+
421
+ Tool description:
422
+ {PROMPT_TOOL_HEADER}
423
+ {signature.description}
424
+ {PROMPT_TOOL_FOOTER}
425
+
426
+ Tool parameters:
427
+ {parameter_section}
428
+ When you call the tool, read its response carefully before replying.
429
+ """
430
+ ).strip()
431
+
432
+ sections = [mcp_header, TOOL_USE_GUIDELINES, body]
433
+ return "\n\n".join(section for section in sections if section).strip()
434
+
435
+
436
+ def extract_description_from_system(system_prompt: str) -> str | None:
437
+ if (
438
+ PROMPT_TOOL_HEADER not in system_prompt
439
+ or PROMPT_TOOL_FOOTER not in system_prompt
440
+ ):
441
+ return None
442
+ start = system_prompt.index(PROMPT_TOOL_HEADER) + len(PROMPT_TOOL_HEADER)
443
+ end = system_prompt.index(PROMPT_TOOL_FOOTER)
444
+ return system_prompt[start:end].strip()
445
+
446
+
447
+ def load_tool_signature_from_manifest(
448
+ manifest: MCPManifest, tool_name: str
449
+ ) -> ToolSignature:
450
+ tools = list_tools_from_manifest(manifest)
451
+ tool = next(
452
+ (tool for tool in tools if getattr(tool, "name", None) == tool_name), None
453
+ )
454
+ if tool is None:
455
+ raise ValueError(f"Tool '{tool_name}' not found")
456
+ entry = tool.model_dump(by_alias=True)
457
+ annotations = entry.get("annotations") or {}
458
+ examples = annotations.get("examples")
459
+ return ToolSignature.from_tool_entry(
460
+ {
461
+ "type": "function",
462
+ "function": {
463
+ "name": entry.get("name", tool_name),
464
+ "description": entry.get("description", ""),
465
+ "parameters": entry.get("inputSchema", {}),
466
+ "examples": examples,
467
+ },
468
+ }
469
+ )
470
+
471
+
472
+ def score_query_tool(
473
+ manifest: MCPManifest,
474
+ tool_name: str,
475
+ dataset: Iterable[Mapping[str, Any]],
476
+ description: str,
477
+ argument_key: str = "query",
478
+ ) -> float:
479
+ successes = 0
480
+ total = 0
481
+ description_tokens = set(description.lower().split())
482
+ for record in dataset:
483
+ arguments = record.get("arguments", {})
484
+ value = arguments.get(argument_key, "")
485
+ if not value:
486
+ continue
487
+ total += 1
488
+ value_tokens = set(value.lower().split())
489
+ if description_tokens.isdisjoint(value_tokens):
490
+ continue
491
+ response = call_tool_from_manifest(manifest, tool_name, arguments)
492
+ text = response_to_text(response)
493
+ reference = record.get("reference_answer") or record.get(
494
+ "expected_answer_contains", ""
495
+ )
496
+ if reference:
497
+ ratio = SequenceMatcher(
498
+ None,
499
+ " ".join(reference.lower().split()),
500
+ " ".join(text.lower().split()),
501
+ ).ratio()
502
+ if ratio >= 0.6:
503
+ successes += 1
504
+ return successes / total if total else 0.0
505
+
506
+
507
+ def score_url_tool(
508
+ manifest: MCPManifest,
509
+ tool_name: str,
510
+ dataset: Iterable[Mapping[str, Any]],
511
+ description: str,
512
+ argument_key: str = "url",
513
+ ) -> float:
514
+ from urllib.parse import urlparse
515
+
516
+ successes = 0
517
+ total = 0
518
+ description_tokens = set(description.lower().split())
519
+ for record in dataset:
520
+ arguments = record.get("arguments", {})
521
+ url = arguments.get(argument_key, "")
522
+ if not url:
523
+ continue
524
+ total += 1
525
+ host_tokens = set(urlparse(url).netloc.lower().split("."))
526
+ if description_tokens.isdisjoint(host_tokens):
527
+ continue
528
+ response = call_tool_from_manifest(manifest, tool_name, arguments)
529
+ text = response_to_text(response)
530
+ reference = record.get("reference_answer") or record.get(
531
+ "expected_answer_contains", ""
532
+ )
533
+ if reference:
534
+ ratio = SequenceMatcher(
535
+ None,
536
+ " ".join(reference.lower().split()),
537
+ " ".join(text.lower().split()),
538
+ ).ratio()
539
+ if ratio >= 0.6:
540
+ successes += 1
541
+ return successes / total if total else 0.0
@@ -0,0 +1,152 @@
1
+ """Utilities for coordinating multi-pass MCP prompt evaluations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from contextvars import ContextVar
7
+ from typing import Any, Optional
8
+ from collections.abc import Callable
9
+
10
+
11
+ FollowUpBuilder = Callable[[dict[str, Any], str], Optional[str]]
12
+
13
+
14
+ def _insert_tool_message(
15
+ *,
16
+ messages: list[dict[str, Any]],
17
+ tool_name: str,
18
+ tool_content: str,
19
+ ) -> list[dict[str, Any]]:
20
+ """Insert a tool message immediately after the first assistant reply."""
21
+
22
+ with_tool: list[dict[str, Any]] = []
23
+ inserted = False
24
+ for message in messages:
25
+ with_tool.append(message)
26
+ if message.get("role") == "assistant" and not inserted:
27
+ logger.debug(
28
+ "Inserting tool summary for %s after assistant message", tool_name
29
+ )
30
+ with_tool.append(
31
+ {
32
+ "role": "assistant",
33
+ "content": (
34
+ f"Here is the result from tool `{tool_name}`:\n\n{tool_content}"
35
+ ),
36
+ }
37
+ )
38
+ inserted = True
39
+
40
+ if not inserted:
41
+ logger.debug("No assistant message found; appending summary for %s", tool_name)
42
+ with_tool.append(
43
+ {
44
+ "role": "assistant",
45
+ "content": (f"Tool result from `{tool_name}`:\n\n{tool_content}"),
46
+ }
47
+ )
48
+
49
+ return with_tool
50
+
51
+
52
+ def extract_user_query(dataset_item: dict[str, Any]) -> str | None:
53
+ """Best-effort extraction of a user query from dataset item structures."""
54
+
55
+ user_query = dataset_item.get("user_query")
56
+ if user_query:
57
+ return user_query
58
+
59
+ payload = dataset_item.get("input")
60
+ if isinstance(payload, dict):
61
+ for key in ("query", "user_query", "prompt"):
62
+ value = payload.get(key)
63
+ if isinstance(value, str) and value.strip():
64
+ return value
65
+
66
+ return None
67
+
68
+
69
+ class MCPSecondPassCoordinator:
70
+ """Tracks MCP tool summaries and builds second-pass message sets.
71
+
72
+ TODO(opik-mcp): Replace this shim once optimizers understand multi-pass flows
73
+ natively and expose tool transcripts without direct ChatPrompt mutation.
74
+ """
75
+
76
+ def __init__(
77
+ self,
78
+ *,
79
+ tool_name: str,
80
+ summary_var: ContextVar[str | None],
81
+ follow_up_builder: FollowUpBuilder,
82
+ ) -> None:
83
+ self._tool_name = tool_name
84
+ self._summary_var = summary_var
85
+ self._follow_up_builder = follow_up_builder
86
+ self._last_summary: str | None = None
87
+ self._last_follow_up: str | None = None
88
+
89
+ @property
90
+ def tool_name(self) -> str:
91
+ return self._tool_name
92
+
93
+ def reset(self) -> None:
94
+ self._summary_var.set(None)
95
+
96
+ def record_summary(self, summary: str) -> None:
97
+ logger.debug("Recording summary for %s", self.tool_name)
98
+ self._summary_var.set(summary)
99
+
100
+ def fetch_summary(self) -> str | None:
101
+ return self._summary_var.get()
102
+
103
+ def get_last_summary(self) -> str | None:
104
+ return self._last_summary
105
+
106
+ def build_second_pass_messages(
107
+ self,
108
+ *,
109
+ base_messages: list[dict[str, Any]],
110
+ dataset_item: dict[str, Any],
111
+ summary_override: str | None = None,
112
+ ) -> list[dict[str, Any]] | None:
113
+ self._last_summary = None
114
+ self._last_follow_up = None
115
+ summary = (
116
+ summary_override if summary_override is not None else self.fetch_summary()
117
+ )
118
+ if not summary:
119
+ logger.debug(
120
+ "No summary available for %s; skipping second pass", self.tool_name
121
+ )
122
+ return None
123
+
124
+ logger.debug("Summary captured for %s", self.tool_name)
125
+ logger.debug(
126
+ "Summary preview for %s: %s",
127
+ self.tool_name,
128
+ summary[:160].replace(chr(10), " "),
129
+ )
130
+
131
+ messages = _insert_tool_message(
132
+ messages=base_messages,
133
+ tool_name=self.tool_name,
134
+ tool_content=summary,
135
+ )
136
+
137
+ follow_up = self._follow_up_builder(dataset_item, summary)
138
+ if follow_up:
139
+ messages.append({"role": "user", "content": follow_up})
140
+ logger.debug(
141
+ "Follow-up appended for %s: %s",
142
+ self.tool_name,
143
+ follow_up[:120] if follow_up else "None",
144
+ )
145
+
146
+ self._last_summary = summary
147
+ self._last_follow_up = follow_up
148
+ self.reset()
149
+ return messages
150
+
151
+
152
+ logger = logging.getLogger(__name__)