deepeval 3.5.1__py3-none-any.whl → 3.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/config/settings.py +94 -2
  3. deepeval/config/utils.py +54 -1
  4. deepeval/constants.py +27 -0
  5. deepeval/integrations/langchain/__init__.py +2 -3
  6. deepeval/integrations/langchain/callback.py +126 -301
  7. deepeval/integrations/langchain/patch.py +24 -13
  8. deepeval/integrations/langchain/utils.py +203 -1
  9. deepeval/integrations/pydantic_ai/patcher.py +220 -185
  10. deepeval/integrations/pydantic_ai/utils.py +86 -0
  11. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +1 -0
  12. deepeval/metrics/pii_leakage/pii_leakage.py +1 -1
  13. deepeval/models/embedding_models/azure_embedding_model.py +40 -9
  14. deepeval/models/embedding_models/local_embedding_model.py +54 -11
  15. deepeval/models/embedding_models/ollama_embedding_model.py +25 -7
  16. deepeval/models/embedding_models/openai_embedding_model.py +47 -5
  17. deepeval/models/llms/amazon_bedrock_model.py +31 -4
  18. deepeval/models/llms/anthropic_model.py +39 -13
  19. deepeval/models/llms/azure_model.py +37 -38
  20. deepeval/models/llms/deepseek_model.py +36 -7
  21. deepeval/models/llms/gemini_model.py +10 -0
  22. deepeval/models/llms/grok_model.py +50 -3
  23. deepeval/models/llms/kimi_model.py +37 -7
  24. deepeval/models/llms/local_model.py +38 -12
  25. deepeval/models/llms/ollama_model.py +15 -3
  26. deepeval/models/llms/openai_model.py +37 -44
  27. deepeval/models/mlllms/gemini_model.py +21 -3
  28. deepeval/models/mlllms/ollama_model.py +38 -13
  29. deepeval/models/mlllms/openai_model.py +18 -42
  30. deepeval/models/retry_policy.py +548 -64
  31. deepeval/prompt/api.py +13 -9
  32. deepeval/prompt/prompt.py +19 -9
  33. deepeval/tracing/tracing.py +87 -0
  34. deepeval/utils.py +12 -0
  35. {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/METADATA +1 -1
  36. {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/RECORD +39 -38
  37. {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/LICENSE.md +0 -0
  38. {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/WHEEL +0 -0
  39. {deepeval-3.5.1.dist-info → deepeval-3.5.3.dist-info}/entry_points.txt +0 -0
@@ -12,6 +12,8 @@ from deepeval.confident.api import get_confident_api_key
12
12
  from deepeval.integrations.pydantic_ai.otel import instrument_pydantic_ai
13
13
  from deepeval.telemetry import capture_tracing_integration
14
14
  from deepeval.prompt import Prompt
15
+ import inspect
16
+ from contextvars import ContextVar
15
17
 
16
18
  try:
17
19
  from pydantic_ai.agent import Agent
@@ -26,11 +28,68 @@ try:
26
28
  ToolReturnPart,
27
29
  UserPromptPart,
28
30
  )
31
+ from pydantic_ai._run_context import RunContext
32
+ from deepeval.integrations.pydantic_ai.utils import (
33
+ extract_tools_called_from_llm_response,
34
+ extract_tools_called,
35
+ sanitize_run_context,
36
+ )
29
37
 
30
38
  pydantic_ai_installed = True
31
39
  except:
32
40
  pydantic_ai_installed = True
33
41
 
42
+ _IN_RUN_SYNC = ContextVar("deepeval_in_run_sync", default=False)
43
+ _INSTRUMENTED = False
44
+
45
+
46
+ def instrument(otel: Optional[bool] = False, api_key: Optional[str] = None):
47
+ global _INSTRUMENTED
48
+ if api_key:
49
+ deepeval.login(api_key)
50
+
51
+ api_key = get_confident_api_key()
52
+
53
+ if not api_key:
54
+ raise ValueError("No api key provided.")
55
+
56
+ if otel:
57
+ instrument_pydantic_ai(api_key)
58
+ else:
59
+ with capture_tracing_integration("pydantic_ai"):
60
+ if _INSTRUMENTED:
61
+ return
62
+ _patch_agent_init()
63
+ _patch_agent_tool_decorator()
64
+ _INSTRUMENTED = True
65
+
66
+
67
+ ################### Init Patches ###################
68
+
69
+
70
+ def _patch_agent_init():
71
+ original_init = Agent.__init__
72
+
73
+ @functools.wraps(original_init)
74
+ def wrapper(
75
+ *args,
76
+ llm_metric_collection: Optional[str] = None,
77
+ llm_metrics: Optional[List[BaseMetric]] = None,
78
+ llm_prompt: Optional[Prompt] = None,
79
+ agent_metric_collection: Optional[str] = None,
80
+ agent_metrics: Optional[List[BaseMetric]] = None,
81
+ **kwargs
82
+ ):
83
+ result = original_init(*args, **kwargs)
84
+ _patch_llm_model(
85
+ args[0]._model, llm_metric_collection, llm_metrics, llm_prompt
86
+ ) # runtime patch of the model
87
+ _patch_agent_run(args[0], agent_metric_collection, agent_metrics)
88
+ _patch_agent_run_sync(args[0], agent_metric_collection, agent_metrics)
89
+ return result
90
+
91
+ Agent.__init__ = wrapper
92
+
34
93
 
35
94
  def _patch_agent_tool_decorator():
36
95
  original_tool = Agent.tool
@@ -64,101 +123,101 @@ def _patch_agent_tool_decorator():
64
123
  Agent.tool = wrapper
65
124
 
66
125
 
67
- def _create_patched_tool(
68
- func: Callable,
69
- metrics: Optional[List[BaseMetric]] = None,
70
- metric_collection: Optional[str] = None,
71
- ):
72
- import asyncio
73
-
74
- original_func = func
75
-
76
- is_async = asyncio.iscoroutinefunction(original_func)
126
+ ################### Runtime Patches ###################
77
127
 
78
- if is_async:
79
128
 
80
- @functools.wraps(original_func)
81
- async def async_wrapper(*args, **kwargs):
82
- with Observer(
83
- span_type="tool",
84
- func_name=original_func.__name__,
85
- metrics=metrics,
86
- metric_collection=metric_collection,
87
- function_kwargs={"args": args, **kwargs},
88
- ) as observer:
89
- result = await original_func(*args, **kwargs)
90
- observer.result = result
91
-
92
- return result
129
+ def _patch_agent_run_sync(
130
+ agent: Agent,
131
+ agent_metric_collection: Optional[str] = None,
132
+ agent_metrics: Optional[List[BaseMetric]] = None,
133
+ ):
134
+ original_run_sync = agent.run_sync
93
135
 
94
- return async_wrapper
95
- else:
136
+ @functools.wraps(original_run_sync)
137
+ def wrapper(
138
+ *args,
139
+ metric_collection: Optional[str] = None,
140
+ metrics: Optional[List[BaseMetric]] = None,
141
+ name: Optional[str] = None,
142
+ tags: Optional[List[str]] = None,
143
+ metadata: Optional[dict] = None,
144
+ thread_id: Optional[str] = None,
145
+ user_id: Optional[str] = None,
146
+ **kwargs
147
+ ):
96
148
 
97
- @functools.wraps(original_func)
98
- def sync_wrapper(*args, **kwargs):
99
- with Observer(
100
- span_type="tool",
101
- func_name=original_func.__name__,
102
- metrics=metrics,
103
- metric_collection=metric_collection,
104
- function_kwargs={"args": args, **kwargs},
105
- ) as observer:
106
- result = original_func(*args, **kwargs)
107
- observer.result = result
149
+ sig = inspect.signature(original_run_sync)
150
+ bound = sig.bind_partial(*args, **kwargs)
151
+ bound.apply_defaults()
152
+ input = bound.arguments.get("user_prompt", None)
108
153
 
109
- return result
154
+ with Observer(
155
+ span_type="agent",
156
+ func_name="Agent",
157
+ function_kwargs={"input": input},
158
+ metrics=agent_metrics,
159
+ metric_collection=agent_metric_collection,
160
+ ) as observer:
110
161
 
111
- return sync_wrapper
162
+ token = _IN_RUN_SYNC.set(True)
163
+ try:
164
+ result = original_run_sync(*args, **kwargs)
165
+ finally:
166
+ _IN_RUN_SYNC.reset(token)
112
167
 
168
+ observer.update_span_properties = (
169
+ lambda agent_span: set_agent_span_attributes(agent_span, result)
170
+ )
171
+ observer.result = result.output
113
172
 
114
- def _patch_agent_init():
115
- original_init = Agent.__init__
173
+ _update_trace_context(
174
+ trace_name=name,
175
+ trace_tags=tags,
176
+ trace_metadata=metadata,
177
+ trace_thread_id=thread_id,
178
+ trace_user_id=user_id,
179
+ trace_metric_collection=metric_collection,
180
+ trace_metrics=metrics,
181
+ trace_input=input,
182
+ trace_output=result.output,
183
+ )
116
184
 
117
- @functools.wraps(original_init)
118
- def wrapper(
119
- self,
120
- *args,
121
- llm_metric_collection: Optional[str] = None,
122
- llm_metrics: Optional[List[BaseMetric]] = None,
123
- llm_prompt: Optional[Prompt] = None,
124
- agent_metric_collection: Optional[str] = None,
125
- agent_metrics: Optional[List[BaseMetric]] = None,
126
- **kwargs
127
- ):
128
- result = original_init(self, *args, **kwargs)
129
- _patch_llm_model(
130
- self._model, llm_metric_collection, llm_metrics, llm_prompt
131
- ) # runtime patch of the model
132
- _patch_agent_run(agent_metric_collection, agent_metrics)
133
185
  return result
134
186
 
135
- Agent.__init__ = wrapper
187
+ agent.run_sync = wrapper
136
188
 
137
189
 
138
190
  def _patch_agent_run(
191
+ agent: Agent,
139
192
  agent_metric_collection: Optional[str] = None,
140
193
  agent_metrics: Optional[List[BaseMetric]] = None,
141
194
  ):
142
- original_run = Agent.run
195
+ original_run = agent.run
143
196
 
144
197
  @functools.wraps(original_run)
145
198
  async def wrapper(
146
199
  *args,
147
- trace_metric_collection: Optional[str] = None,
148
- trace_metrics: Optional[List[BaseMetric]] = None,
149
- trace_name: Optional[str] = None,
150
- trace_tags: Optional[List[str]] = None,
151
- trace_metadata: Optional[dict] = None,
152
- trace_thread_id: Optional[str] = None,
153
- trace_user_id: Optional[str] = None,
200
+ metric_collection: Optional[str] = None,
201
+ metrics: Optional[List[BaseMetric]] = None,
202
+ name: Optional[str] = None,
203
+ tags: Optional[List[str]] = None,
204
+ metadata: Optional[dict] = None,
205
+ thread_id: Optional[str] = None,
206
+ user_id: Optional[str] = None,
154
207
  **kwargs
155
208
  ):
209
+ sig = inspect.signature(original_run)
210
+ bound = sig.bind_partial(*args, **kwargs)
211
+ bound.apply_defaults()
212
+ input = bound.arguments.get("user_prompt", None)
213
+
214
+ in_sync = _IN_RUN_SYNC.get()
156
215
  with Observer(
157
- span_type="agent",
158
- func_name="Agent",
159
- function_kwargs={"input": args[1]},
160
- metrics=agent_metrics,
161
- metric_collection=agent_metric_collection,
216
+ span_type="agent" if not in_sync else "custom",
217
+ func_name="Agent" if not in_sync else "run",
218
+ function_kwargs={"input": input},
219
+ metrics=agent_metrics if not in_sync else None,
220
+ metric_collection=agent_metric_collection if not in_sync else None,
162
221
  ) as observer:
163
222
  result = await original_run(*args, **kwargs)
164
223
  observer.update_span_properties = (
@@ -167,44 +226,20 @@ def _patch_agent_run(
167
226
  observer.result = result.output
168
227
 
169
228
  _update_trace_context(
170
- trace_name=trace_name,
171
- trace_tags=trace_tags,
172
- trace_metadata=trace_metadata,
173
- trace_thread_id=trace_thread_id,
174
- trace_user_id=trace_user_id,
175
- trace_metric_collection=trace_metric_collection,
176
- trace_metrics=trace_metrics,
177
- trace_input=args[1],
229
+ trace_name=name,
230
+ trace_tags=tags,
231
+ trace_metadata=metadata,
232
+ trace_thread_id=thread_id,
233
+ trace_user_id=user_id,
234
+ trace_metric_collection=metric_collection,
235
+ trace_metrics=metrics,
236
+ trace_input=input,
178
237
  trace_output=result.output,
179
238
  )
180
239
 
181
240
  return result
182
241
 
183
- Agent.run = wrapper
184
-
185
-
186
- def _update_trace_context(
187
- trace_name: Optional[str] = None,
188
- trace_tags: Optional[List[str]] = None,
189
- trace_metadata: Optional[dict] = None,
190
- trace_thread_id: Optional[str] = None,
191
- trace_user_id: Optional[str] = None,
192
- trace_metric_collection: Optional[str] = None,
193
- trace_metrics: Optional[List[BaseMetric]] = None,
194
- trace_input: Optional[Any] = None,
195
- trace_output: Optional[Any] = None,
196
- ):
197
-
198
- current_trace = current_trace_context.get()
199
- current_trace.name = trace_name
200
- current_trace.tags = trace_tags
201
- current_trace.metadata = trace_metadata
202
- current_trace.thread_id = trace_thread_id
203
- current_trace.user_id = trace_user_id
204
- current_trace.metric_collection = trace_metric_collection
205
- current_trace.metrics = trace_metrics
206
- current_trace.input = trace_input
207
- current_trace.output = trace_output
242
+ agent.run = wrapper
208
243
 
209
244
 
210
245
  def _patch_llm_model(
@@ -214,6 +249,8 @@ def _patch_llm_model(
214
249
  llm_prompt: Optional[Prompt] = None,
215
250
  ):
216
251
  original_func = model.request
252
+ sig = inspect.signature(original_func)
253
+
217
254
  try:
218
255
  model_name = model.model_name
219
256
  except Exception:
@@ -221,6 +258,10 @@ def _patch_llm_model(
221
258
 
222
259
  @functools.wraps(original_func)
223
260
  async def wrapper(*args, **kwargs):
261
+ bound = sig.bind_partial(*args, **kwargs)
262
+ bound.apply_defaults()
263
+ request = bound.arguments.get("messages", [])
264
+
224
265
  with Observer(
225
266
  span_type="llm",
226
267
  func_name="LLM",
@@ -229,36 +270,93 @@ def _patch_llm_model(
229
270
  metric_collection=llm_metric_collection,
230
271
  ) as observer:
231
272
  result = await original_func(*args, **kwargs)
232
- request = kwargs.get("messages", [])
233
- if not request:
234
- request = args[0]
235
273
  observer.update_span_properties = (
236
274
  lambda llm_span: set_llm_span_attributes(
237
- llm_span, args[0], result, llm_prompt
275
+ llm_span, request, result, llm_prompt
238
276
  )
239
277
  )
240
278
  observer.result = result
241
- return result
279
+ return result
242
280
 
243
281
  model.request = wrapper
244
282
 
245
283
 
246
- def instrument(otel: Optional[bool] = False, api_key: Optional[str] = None):
284
+ ################### Helper Functions ###################
247
285
 
248
- if api_key:
249
- deepeval.login(api_key)
250
286
 
251
- api_key = get_confident_api_key()
287
+ def _create_patched_tool(
288
+ func: Callable,
289
+ metrics: Optional[List[BaseMetric]] = None,
290
+ metric_collection: Optional[str] = None,
291
+ ):
292
+ import asyncio
252
293
 
253
- if not api_key:
254
- raise ValueError("No api key provided.")
294
+ original_func = func
255
295
 
256
- if otel:
257
- instrument_pydantic_ai(api_key)
296
+ is_async = asyncio.iscoroutinefunction(original_func)
297
+
298
+ if is_async:
299
+
300
+ @functools.wraps(original_func)
301
+ async def async_wrapper(*args, **kwargs):
302
+ sanitized_args = sanitize_run_context(args)
303
+ sanitized_kwargs = sanitize_run_context(kwargs)
304
+ with Observer(
305
+ span_type="tool",
306
+ func_name=original_func.__name__,
307
+ metrics=metrics,
308
+ metric_collection=metric_collection,
309
+ function_kwargs={"args": sanitized_args, **sanitized_kwargs},
310
+ ) as observer:
311
+ result = await original_func(*args, **kwargs)
312
+ observer.result = result
313
+
314
+ return result
315
+
316
+ return async_wrapper
258
317
  else:
259
- with capture_tracing_integration("pydantic_ai"):
260
- _patch_agent_init()
261
- _patch_agent_tool_decorator()
318
+
319
+ @functools.wraps(original_func)
320
+ def sync_wrapper(*args, **kwargs):
321
+ sanitized_args = sanitize_run_context(args)
322
+ sanitized_kwargs = sanitize_run_context(kwargs)
323
+ with Observer(
324
+ span_type="tool",
325
+ func_name=original_func.__name__,
326
+ metrics=metrics,
327
+ metric_collection=metric_collection,
328
+ function_kwargs={"args": sanitized_args, **sanitized_kwargs},
329
+ ) as observer:
330
+ result = original_func(*args, **kwargs)
331
+ observer.result = result
332
+
333
+ return result
334
+
335
+ return sync_wrapper
336
+
337
+
338
+ def _update_trace_context(
339
+ trace_name: Optional[str] = None,
340
+ trace_tags: Optional[List[str]] = None,
341
+ trace_metadata: Optional[dict] = None,
342
+ trace_thread_id: Optional[str] = None,
343
+ trace_user_id: Optional[str] = None,
344
+ trace_metric_collection: Optional[str] = None,
345
+ trace_metrics: Optional[List[BaseMetric]] = None,
346
+ trace_input: Optional[Any] = None,
347
+ trace_output: Optional[Any] = None,
348
+ ):
349
+
350
+ current_trace = current_trace_context.get()
351
+ current_trace.name = trace_name
352
+ current_trace.tags = trace_tags
353
+ current_trace.metadata = trace_metadata
354
+ current_trace.thread_id = trace_thread_id
355
+ current_trace.user_id = trace_user_id
356
+ current_trace.metric_collection = trace_metric_collection
357
+ current_trace.metrics = trace_metrics
358
+ current_trace.input = trace_input
359
+ current_trace.output = trace_output
262
360
 
263
361
 
264
362
  def set_llm_span_attributes(
@@ -306,71 +404,8 @@ def set_llm_span_attributes(
306
404
  llm_span.output = LlmOutput(
307
405
  role="Assistant", content=content, tool_calls=tool_calls
308
406
  )
309
- llm_span.tools_called = _extract_tools_called_from_llm_response(
310
- result.parts
311
- )
407
+ llm_span.tools_called = extract_tools_called_from_llm_response(result.parts)
312
408
 
313
409
 
314
410
  def set_agent_span_attributes(agent_span: AgentSpan, result: AgentRunResult):
315
- agent_span.tools_called = _extract_tools_called(result)
316
-
317
-
318
- # llm tools called
319
- def _extract_tools_called_from_llm_response(
320
- result: List[ModelResponsePart],
321
- ) -> List[ToolCall]:
322
- tool_calls = []
323
-
324
- # Loop through each ModelResponsePart
325
- for part in result:
326
- # Look for parts with part_kind="tool-call"
327
- if hasattr(part, "part_kind") and part.part_kind == "tool-call":
328
- # Extract tool name and args from the ToolCallPart
329
- tool_name = part.tool_name
330
- input_parameters = (
331
- part.args_as_dict() if hasattr(part, "args_as_dict") else None
332
- )
333
-
334
- # Create and append ToolCall object
335
- tool_call = ToolCall(
336
- name=tool_name, input_parameters=input_parameters
337
- )
338
- tool_calls.append(tool_call)
339
-
340
- return tool_calls
341
-
342
-
343
- # TODO: llm tools called (reposne is present next message)
344
- def _extract_tools_called(result: AgentRunResult) -> List[ToolCall]:
345
- tool_calls = []
346
-
347
- # Access the message history from the _state
348
- message_history = result._state.message_history
349
-
350
- # Scan through all messages in the history
351
- for message in message_history:
352
- # Check if this is a ModelResponse (kind="response")
353
- if hasattr(message, "kind") and message.kind == "response":
354
- # For ModelResponse messages, check each part
355
- if hasattr(message, "parts"):
356
- for part in message.parts:
357
- # Look for parts with part_kind="tool-call"
358
- if (
359
- hasattr(part, "part_kind")
360
- and part.part_kind == "tool-call"
361
- ):
362
- # Extract tool name and args from the ToolCallPart
363
- tool_name = part.tool_name
364
- input_parameters = (
365
- part.args_as_dict()
366
- if hasattr(part, "args_as_dict")
367
- else None
368
- )
369
-
370
- # Create and append ToolCall object
371
- tool_call = ToolCall(
372
- name=tool_name, input_parameters=input_parameters
373
- )
374
- tool_calls.append(tool_call)
375
-
376
- return tool_calls
411
+ agent_span.tools_called = extract_tools_called(result)
@@ -0,0 +1,86 @@
1
+ from typing import List
2
+ from pydantic_ai.messages import ModelResponsePart
3
+ from pydantic_ai.agent import AgentRunResult
4
+ from pydantic_ai._run_context import RunContext
5
+ from deepeval.test_case.llm_test_case import ToolCall
6
+
7
+
8
+ # llm tools called
9
+ def extract_tools_called_from_llm_response(
10
+ result: List[ModelResponsePart],
11
+ ) -> List[ToolCall]:
12
+ tool_calls = []
13
+
14
+ # Loop through each ModelResponsePart
15
+ for part in result:
16
+ # Look for parts with part_kind="tool-call"
17
+ if hasattr(part, "part_kind") and part.part_kind == "tool-call":
18
+ # Extract tool name and args from the ToolCallPart
19
+ tool_name = part.tool_name
20
+ input_parameters = (
21
+ part.args_as_dict() if hasattr(part, "args_as_dict") else None
22
+ )
23
+
24
+ # Create and append ToolCall object
25
+ tool_call = ToolCall(
26
+ name=tool_name, input_parameters=input_parameters
27
+ )
28
+ tool_calls.append(tool_call)
29
+
30
+ return tool_calls
31
+
32
+
33
+ # TODO: llm tools called (reposne is present next message)
34
+ def extract_tools_called(result: AgentRunResult) -> List[ToolCall]:
35
+ tool_calls = []
36
+
37
+ # Access the message history from the _state
38
+ message_history = result._state.message_history
39
+
40
+ # Scan through all messages in the history
41
+ for message in message_history:
42
+ # Check if this is a ModelResponse (kind="response")
43
+ if hasattr(message, "kind") and message.kind == "response":
44
+ # For ModelResponse messages, check each part
45
+ if hasattr(message, "parts"):
46
+ for part in message.parts:
47
+ # Look for parts with part_kind="tool-call"
48
+ if (
49
+ hasattr(part, "part_kind")
50
+ and part.part_kind == "tool-call"
51
+ ):
52
+ # Extract tool name and args from the ToolCallPart
53
+ tool_name = part.tool_name
54
+ input_parameters = (
55
+ part.args_as_dict()
56
+ if hasattr(part, "args_as_dict")
57
+ else None
58
+ )
59
+
60
+ # Create and append ToolCall object
61
+ tool_call = ToolCall(
62
+ name=tool_name, input_parameters=input_parameters
63
+ )
64
+ tool_calls.append(tool_call)
65
+
66
+ return tool_calls
67
+
68
+
69
+ def sanitize_run_context(value):
70
+ """
71
+ Recursively replace pydantic-ai RunContext instances with '<RunContext>'.
72
+
73
+ This avoids leaking internal context details into recorded function kwargs,
74
+ while keeping the original arguments intact for the actual function call.
75
+ """
76
+ if isinstance(value, RunContext):
77
+ return "<RunContext>"
78
+ if isinstance(value, dict):
79
+ return {k: sanitize_run_context(v) for k, v in value.items()}
80
+ if isinstance(value, (list, tuple)):
81
+ sanitized = [sanitize_run_context(v) for v in value]
82
+ return tuple(sanitized) if isinstance(value, tuple) else sanitized
83
+ if isinstance(value, set):
84
+ return {sanitize_run_context(v) for v in value}
85
+
86
+ return value
@@ -316,6 +316,7 @@ class ConversationalGEval(BaseConversationalMetric):
316
316
  else:
317
317
  prompt = ConversationalGEvalTemplate.generate_evaluation_results(
318
318
  evaluation_steps=self.number_evaluation_steps(),
319
+ test_case_content=test_case_content,
319
320
  turns=[
320
321
  convert_turn_to_dict(turn, self.evaluation_params)
321
322
  for turn in test_case.turns
@@ -284,7 +284,7 @@ class PIILeakageMetric(BaseMetric):
284
284
  no_privacy_count += 1
285
285
 
286
286
  score = no_privacy_count / number_of_verdicts
287
- return 1 if self.strict_mode and score < 1 else score
287
+ return 0 if self.strict_mode and score < self.threshold else score
288
288
 
289
289
  def is_successful(self) -> bool:
290
290
  if self.error is not None: