deepeval 3.6.7__py3-none-any.whl → 3.6.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/errors.py +20 -2
  3. deepeval/evaluate/execute.py +725 -217
  4. deepeval/evaluate/types.py +1 -0
  5. deepeval/evaluate/utils.py +13 -3
  6. deepeval/integrations/crewai/__init__.py +2 -1
  7. deepeval/integrations/crewai/tool.py +71 -0
  8. deepeval/integrations/llama_index/__init__.py +0 -4
  9. deepeval/integrations/llama_index/handler.py +20 -21
  10. deepeval/integrations/pydantic_ai/instrumentator.py +125 -76
  11. deepeval/metrics/__init__.py +13 -0
  12. deepeval/metrics/base_metric.py +1 -0
  13. deepeval/metrics/contextual_precision/contextual_precision.py +27 -21
  14. deepeval/metrics/conversational_g_eval/__init__.py +3 -0
  15. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +11 -7
  16. deepeval/metrics/dag/schema.py +1 -1
  17. deepeval/metrics/dag/templates.py +2 -2
  18. deepeval/metrics/goal_accuracy/__init__.py +1 -0
  19. deepeval/metrics/goal_accuracy/goal_accuracy.py +349 -0
  20. deepeval/metrics/goal_accuracy/schema.py +17 -0
  21. deepeval/metrics/goal_accuracy/template.py +235 -0
  22. deepeval/metrics/hallucination/hallucination.py +8 -8
  23. deepeval/metrics/mcp/mcp_task_completion.py +7 -2
  24. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +16 -6
  25. deepeval/metrics/mcp_use_metric/mcp_use_metric.py +2 -1
  26. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +32 -24
  27. deepeval/metrics/plan_adherence/__init__.py +1 -0
  28. deepeval/metrics/plan_adherence/plan_adherence.py +292 -0
  29. deepeval/metrics/plan_adherence/schema.py +11 -0
  30. deepeval/metrics/plan_adherence/template.py +170 -0
  31. deepeval/metrics/plan_quality/__init__.py +1 -0
  32. deepeval/metrics/plan_quality/plan_quality.py +292 -0
  33. deepeval/metrics/plan_quality/schema.py +11 -0
  34. deepeval/metrics/plan_quality/template.py +101 -0
  35. deepeval/metrics/step_efficiency/__init__.py +1 -0
  36. deepeval/metrics/step_efficiency/schema.py +11 -0
  37. deepeval/metrics/step_efficiency/step_efficiency.py +234 -0
  38. deepeval/metrics/step_efficiency/template.py +256 -0
  39. deepeval/metrics/task_completion/task_completion.py +1 -0
  40. deepeval/metrics/tool_correctness/schema.py +6 -0
  41. deepeval/metrics/tool_correctness/template.py +88 -0
  42. deepeval/metrics/tool_correctness/tool_correctness.py +226 -22
  43. deepeval/metrics/tool_use/__init__.py +1 -0
  44. deepeval/metrics/tool_use/schema.py +19 -0
  45. deepeval/metrics/tool_use/template.py +220 -0
  46. deepeval/metrics/tool_use/tool_use.py +458 -0
  47. deepeval/metrics/topic_adherence/__init__.py +1 -0
  48. deepeval/metrics/topic_adherence/schema.py +16 -0
  49. deepeval/metrics/topic_adherence/template.py +162 -0
  50. deepeval/metrics/topic_adherence/topic_adherence.py +355 -0
  51. deepeval/models/embedding_models/azure_embedding_model.py +37 -36
  52. deepeval/models/embedding_models/local_embedding_model.py +30 -32
  53. deepeval/models/embedding_models/ollama_embedding_model.py +18 -20
  54. deepeval/models/embedding_models/openai_embedding_model.py +22 -31
  55. deepeval/openai/extractors.py +61 -16
  56. deepeval/openai/patch.py +8 -12
  57. deepeval/openai/types.py +1 -1
  58. deepeval/openai/utils.py +108 -1
  59. deepeval/prompt/prompt.py +1 -0
  60. deepeval/prompt/utils.py +43 -14
  61. deepeval/synthesizer/synthesizer.py +11 -10
  62. deepeval/test_case/llm_test_case.py +6 -2
  63. deepeval/test_run/test_run.py +190 -207
  64. deepeval/tracing/__init__.py +2 -1
  65. deepeval/tracing/otel/exporter.py +3 -4
  66. deepeval/tracing/otel/utils.py +23 -4
  67. deepeval/tracing/trace_context.py +53 -38
  68. deepeval/tracing/tracing.py +23 -0
  69. deepeval/tracing/types.py +16 -14
  70. deepeval/utils.py +21 -0
  71. {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/METADATA +1 -1
  72. {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/RECORD +75 -53
  73. deepeval/integrations/llama_index/agent/patched.py +0 -68
  74. deepeval/tracing/message_types/__init__.py +0 -10
  75. deepeval/tracing/message_types/base.py +0 -6
  76. deepeval/tracing/message_types/messages.py +0 -14
  77. deepeval/tracing/message_types/tools.py +0 -18
  78. {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/LICENSE.md +0 -0
  79. {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/WHEEL +0 -0
  80. {deepeval-3.6.7.dist-info → deepeval-3.6.8.dist-info}/entry_points.txt +0 -0
deepeval/prompt/utils.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import re
2
2
  import uuid
3
3
  from jinja2 import Template
4
- from typing import Any, Dict, Type, Optional, List
4
+ from typing import Any, Dict, Type, Optional, List, Match
5
5
  from pydantic import BaseModel, create_model
6
6
 
7
7
  from deepeval.prompt.api import (
@@ -16,36 +16,65 @@ from deepeval.prompt.api import (
16
16
  ###################################
17
17
 
18
18
 
19
- def interpolate_mustache(text: str, **kwargs) -> str:
19
+ def interpolate_mustache(text: str, **kwargs: Any) -> str:
20
20
  """Interpolate using Mustache format: {{variable}}"""
21
- formatted_template = re.sub(r"\{\{(\w+)\}\}", r"{\1}", text)
22
- return formatted_template.format(**kwargs)
23
21
 
22
+ def replace_match(match: Match[str]) -> str:
23
+ var_name = match.group(1)
24
+ if var_name in kwargs:
25
+ return str(kwargs[var_name])
26
+ # Raise error for missing variables to maintain consistency
27
+ raise KeyError(f"Missing variable in template: {var_name}")
24
28
 
25
- def interpolate_mustache_with_space(text: str, **kwargs) -> str:
29
+ return re.sub(r"\{\{([a-zA-Z_][a-zA-Z0-9_]*)\}\}", replace_match, text)
30
+
31
+
32
+ def interpolate_mustache_with_space(text: str, **kwargs: Any) -> str:
26
33
  """Interpolate using Mustache with space format: {{ variable }}"""
27
- formatted_template = re.sub(r"\{\{ (\w+) \}\}", r"{\1}", text)
28
- return formatted_template.format(**kwargs)
34
+
35
+ def replace_match(match: Match[str]) -> str:
36
+ var_name = match.group(1)
37
+ if var_name in kwargs:
38
+ return str(kwargs[var_name])
39
+ # Raise error for missing variables to maintain consistency
40
+ raise KeyError(f"Missing variable in template: {var_name}")
41
+
42
+ return re.sub(r"\{\{ ([a-zA-Z_][a-zA-Z0-9_]*) \}\}", replace_match, text)
29
43
 
30
44
 
31
- def interpolate_fstring(text: str, **kwargs) -> str:
45
+ def interpolate_fstring(text: str, **kwargs: Any) -> str:
32
46
  """Interpolate using F-string format: {variable}"""
33
- return text.format(**kwargs)
34
47
 
48
+ def replace_match(match: Match[str]) -> str:
49
+ var_name = match.group(1)
50
+ if var_name in kwargs:
51
+ return str(kwargs[var_name])
52
+ # Raise error for missing variables to maintain consistency
53
+ raise KeyError(f"Missing variable in template: {var_name}")
35
54
 
36
- def interpolate_dollar_brackets(text: str, **kwargs) -> str:
55
+ return re.sub(r"\{([a-zA-Z_][a-zA-Z0-9_]*)\}", replace_match, text)
56
+
57
+
58
+ def interpolate_dollar_brackets(text: str, **kwargs: Any) -> str:
37
59
  """Interpolate using Dollar Brackets format: ${variable}"""
38
- formatted_template = re.sub(r"\$\{(\w+)\}", r"{\1}", text)
39
- return formatted_template.format(**kwargs)
60
+
61
+ def replace_match(match: Match[str]) -> str:
62
+ var_name = match.group(1)
63
+ if var_name in kwargs:
64
+ return str(kwargs[var_name])
65
+ # Raise error for missing variables to maintain consistency
66
+ raise KeyError(f"Missing variable in template: {var_name}")
67
+
68
+ return re.sub(r"\$\{([a-zA-Z_][a-zA-Z0-9_]*)\}", replace_match, text)
40
69
 
41
70
 
42
- def interpolate_jinja(text: str, **kwargs) -> str:
71
+ def interpolate_jinja(text: str, **kwargs: Any) -> str:
43
72
  template = Template(text)
44
73
  return template.render(**kwargs)
45
74
 
46
75
 
47
76
  def interpolate_text(
48
- interpolation_type: PromptInterpolationType, text: str, **kwargs
77
+ interpolation_type: PromptInterpolationType, text: str, **kwargs: Any
49
78
  ) -> str:
50
79
  """Apply the appropriate interpolation method based on the type"""
51
80
  if interpolation_type == PromptInterpolationType.MUSTACHE:
@@ -555,7 +555,7 @@ class Synthesizer:
555
555
  include_expected_output=include_expected_output,
556
556
  max_goldens_per_context=max_goldens_per_context,
557
557
  source_files=source_files,
558
- index=index,
558
+ context_index=index,
559
559
  progress=progress,
560
560
  pbar_id=pbar_id,
561
561
  context_scores=_context_scores,
@@ -577,7 +577,7 @@ class Synthesizer:
577
577
  include_expected_output: bool,
578
578
  max_goldens_per_context: int,
579
579
  source_files: Optional[List[str]],
580
- index: int,
580
+ context_index: int,
581
581
  progress: Optional[Progress] = None,
582
582
  pbar_id: Optional[int] = None,
583
583
  context_scores: Optional[List[float]] = None,
@@ -599,7 +599,7 @@ class Synthesizer:
599
599
  # Add pbars
600
600
  pbar_generate_goldens_id = add_pbar(
601
601
  progress,
602
- f"\t⚡ Generating goldens from context #{index}",
602
+ f"\t⚡ Generating goldens from context #{context_index}",
603
603
  total=1 + max_goldens_per_context,
604
604
  )
605
605
  pbar_generate_inputs_id = add_pbar(
@@ -643,7 +643,7 @@ class Synthesizer:
643
643
 
644
644
  # Helper function to process each input in parallel
645
645
  async def process_input(
646
- index: int,
646
+ input_index: int,
647
647
  data: SyntheticData,
648
648
  progress: Optional[Progress] = None,
649
649
  ):
@@ -654,7 +654,7 @@ class Synthesizer:
654
654
  num_evolutions=self.evolution_config.num_evolutions,
655
655
  evolutions=self.evolution_config.evolutions,
656
656
  progress=progress,
657
- pbar_evolve_input_id=pbar_evolve_input_ids[index],
657
+ pbar_evolve_input_id=pbar_evolve_input_ids[input_index],
658
658
  remove_pbar=False,
659
659
  )
660
660
 
@@ -672,7 +672,7 @@ class Synthesizer:
672
672
  )
673
673
  evolved_input = res.input
674
674
  update_pbar(
675
- progress, pbar_evolve_input_ids[index], remove=False
675
+ progress, pbar_evolve_input_ids[input_index], remove=False
676
676
  )
677
677
 
678
678
  # Generate expected output
@@ -685,7 +685,7 @@ class Synthesizer:
685
685
  )
686
686
  expected_output = await self._a_generate(expected_output_prompt)
687
687
  update_pbar(
688
- progress, pbar_evolve_input_ids[index], remove=False
688
+ progress, pbar_evolve_input_ids[input_index], remove=False
689
689
  )
690
690
 
691
691
  # Create Golden
@@ -694,13 +694,14 @@ class Synthesizer:
694
694
  context=context,
695
695
  expected_output=expected_output,
696
696
  source_file=(
697
- source_files[index]
698
- if source_files is not None and index < len(source_files)
697
+ source_files[context_index]
698
+ if source_files is not None
699
+ and context_index < len(source_files)
699
700
  else None
700
701
  ),
701
702
  additional_metadata={
702
703
  "evolutions": evolutions_used,
703
- "synthetic_input_quality": scores[index],
704
+ "synthetic_input_quality": scores[input_index],
704
705
  # "context_quality": (
705
706
  # context_scores[data_index]
706
707
  # if context_scores is not None
@@ -122,7 +122,9 @@ class ToolCall(BaseModel):
122
122
 
123
123
  # Handle nested fields like input_parameters
124
124
  if self.input_parameters:
125
- formatted_input = json.dumps(self.input_parameters, indent=4)
125
+ formatted_input = json.dumps(
126
+ self.input_parameters, indent=4, ensure_ascii=False
127
+ )
126
128
  formatted_input = self._indent_nested_field(
127
129
  "input_parameters", formatted_input
128
130
  )
@@ -130,7 +132,9 @@ class ToolCall(BaseModel):
130
132
 
131
133
  # Handle nested fields like output
132
134
  if isinstance(self.output, dict):
133
- formatted_output = json.dumps(self.output, indent=4)
135
+ formatted_output = json.dumps(
136
+ self.output, indent=4, ensure_ascii=False
137
+ )
134
138
  formatted_output = self._indent_nested_field(
135
139
  "output", formatted_output
136
140
  )