deepeval 3.6.7__py3-none-any.whl → 3.6.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/config/settings.py +104 -36
  3. deepeval/config/utils.py +5 -0
  4. deepeval/dataset/dataset.py +162 -30
  5. deepeval/dataset/utils.py +41 -13
  6. deepeval/errors.py +20 -2
  7. deepeval/evaluate/execute.py +1662 -688
  8. deepeval/evaluate/types.py +1 -0
  9. deepeval/evaluate/utils.py +13 -3
  10. deepeval/integrations/crewai/__init__.py +2 -1
  11. deepeval/integrations/crewai/tool.py +71 -0
  12. deepeval/integrations/llama_index/__init__.py +0 -4
  13. deepeval/integrations/llama_index/handler.py +20 -21
  14. deepeval/integrations/pydantic_ai/instrumentator.py +125 -76
  15. deepeval/metrics/__init__.py +13 -0
  16. deepeval/metrics/base_metric.py +1 -0
  17. deepeval/metrics/contextual_precision/contextual_precision.py +27 -21
  18. deepeval/metrics/conversational_g_eval/__init__.py +3 -0
  19. deepeval/metrics/conversational_g_eval/conversational_g_eval.py +11 -7
  20. deepeval/metrics/dag/schema.py +1 -1
  21. deepeval/metrics/dag/templates.py +2 -2
  22. deepeval/metrics/goal_accuracy/__init__.py +1 -0
  23. deepeval/metrics/goal_accuracy/goal_accuracy.py +349 -0
  24. deepeval/metrics/goal_accuracy/schema.py +17 -0
  25. deepeval/metrics/goal_accuracy/template.py +235 -0
  26. deepeval/metrics/hallucination/hallucination.py +8 -8
  27. deepeval/metrics/indicator.py +21 -1
  28. deepeval/metrics/mcp/mcp_task_completion.py +7 -2
  29. deepeval/metrics/mcp/multi_turn_mcp_use_metric.py +16 -6
  30. deepeval/metrics/mcp_use_metric/mcp_use_metric.py +2 -1
  31. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/multimodal_contextual_precision.py +32 -24
  32. deepeval/metrics/plan_adherence/__init__.py +1 -0
  33. deepeval/metrics/plan_adherence/plan_adherence.py +292 -0
  34. deepeval/metrics/plan_adherence/schema.py +11 -0
  35. deepeval/metrics/plan_adherence/template.py +170 -0
  36. deepeval/metrics/plan_quality/__init__.py +1 -0
  37. deepeval/metrics/plan_quality/plan_quality.py +292 -0
  38. deepeval/metrics/plan_quality/schema.py +11 -0
  39. deepeval/metrics/plan_quality/template.py +101 -0
  40. deepeval/metrics/step_efficiency/__init__.py +1 -0
  41. deepeval/metrics/step_efficiency/schema.py +11 -0
  42. deepeval/metrics/step_efficiency/step_efficiency.py +234 -0
  43. deepeval/metrics/step_efficiency/template.py +256 -0
  44. deepeval/metrics/task_completion/task_completion.py +1 -0
  45. deepeval/metrics/tool_correctness/schema.py +6 -0
  46. deepeval/metrics/tool_correctness/template.py +88 -0
  47. deepeval/metrics/tool_correctness/tool_correctness.py +226 -22
  48. deepeval/metrics/tool_use/__init__.py +1 -0
  49. deepeval/metrics/tool_use/schema.py +19 -0
  50. deepeval/metrics/tool_use/template.py +220 -0
  51. deepeval/metrics/tool_use/tool_use.py +458 -0
  52. deepeval/metrics/topic_adherence/__init__.py +1 -0
  53. deepeval/metrics/topic_adherence/schema.py +16 -0
  54. deepeval/metrics/topic_adherence/template.py +162 -0
  55. deepeval/metrics/topic_adherence/topic_adherence.py +355 -0
  56. deepeval/models/embedding_models/azure_embedding_model.py +37 -36
  57. deepeval/models/embedding_models/local_embedding_model.py +30 -32
  58. deepeval/models/embedding_models/ollama_embedding_model.py +18 -20
  59. deepeval/models/embedding_models/openai_embedding_model.py +22 -31
  60. deepeval/models/llms/amazon_bedrock_model.py +20 -17
  61. deepeval/models/llms/openai_model.py +10 -1
  62. deepeval/models/retry_policy.py +103 -20
  63. deepeval/openai/extractors.py +61 -16
  64. deepeval/openai/patch.py +8 -12
  65. deepeval/openai/types.py +1 -1
  66. deepeval/openai/utils.py +108 -1
  67. deepeval/prompt/prompt.py +1 -0
  68. deepeval/prompt/utils.py +43 -14
  69. deepeval/simulator/conversation_simulator.py +25 -18
  70. deepeval/synthesizer/chunking/context_generator.py +9 -1
  71. deepeval/synthesizer/synthesizer.py +11 -10
  72. deepeval/test_case/llm_test_case.py +6 -2
  73. deepeval/test_run/test_run.py +190 -207
  74. deepeval/tracing/__init__.py +2 -1
  75. deepeval/tracing/otel/exporter.py +3 -4
  76. deepeval/tracing/otel/utils.py +23 -4
  77. deepeval/tracing/trace_context.py +53 -38
  78. deepeval/tracing/tracing.py +23 -0
  79. deepeval/tracing/types.py +16 -14
  80. deepeval/utils.py +21 -0
  81. {deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/METADATA +1 -1
  82. {deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/RECORD +85 -63
  83. deepeval/integrations/llama_index/agent/patched.py +0 -68
  84. deepeval/tracing/message_types/__init__.py +0 -10
  85. deepeval/tracing/message_types/base.py +0 -6
  86. deepeval/tracing/message_types/messages.py +0 -14
  87. deepeval/tracing/message_types/tools.py +0 -18
  88. {deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/LICENSE.md +0 -0
  89. {deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/WHEEL +0 -0
  90. {deepeval-3.6.7.dist-info → deepeval-3.6.9.dist-info}/entry_points.txt +0 -0
@@ -1,68 +0,0 @@
1
- from typing import TypeVar, cast, Optional, List
2
- from pydantic import Field
3
- from deepeval.metrics import BaseMetric
4
- from deepeval.telemetry import capture_tracing_integration
5
-
6
- try:
7
- from llama_index.core.agent.workflow import (
8
- FunctionAgent,
9
- ReActAgent,
10
- CodeActAgent,
11
- )
12
-
13
- is_llama_index_installed = True
14
- except:
15
- is_llama_index_installed = False
16
-
17
-
18
- def is_llama_index_agent_installed():
19
- if not is_llama_index_installed:
20
- raise ImportError(
21
- "llama-index is neccesary for this functionality. Please install it with `pip install llama-index` or with package manager of choice."
22
- )
23
-
24
-
25
- T = TypeVar("T", bound=type)
26
-
27
-
28
- def with_metrics(cls: T) -> T:
29
- class SubClassWithMetric(cls): # type: ignore
30
- metric_collection: Optional[str] = Field(default=None)
31
- metrics: Optional[List[BaseMetric]] = Field(default_factory=list)
32
-
33
- SubClassWithMetric.__name__ = cls.__name__
34
- SubClassWithMetric.__qualname__ = cls.__qualname__
35
- return cast(T, SubClassWithMetric)
36
-
37
-
38
- @with_metrics
39
- class FunctionAgent(FunctionAgent):
40
- def __init__(self, *args, metric_collection=None, metrics=None, **kwargs):
41
- with capture_tracing_integration(
42
- "llama_index.agent.patched.FunctionAgent"
43
- ):
44
- super().__init__(*args, **kwargs)
45
- self.metric_collection = metric_collection
46
- self.metrics = metrics
47
-
48
-
49
- @with_metrics
50
- class ReActAgent(ReActAgent):
51
- def __init__(self, *args, metric_collection=None, metrics=None, **kwargs):
52
- with capture_tracing_integration(
53
- "llama_index.agent.patched.ReActAgent"
54
- ):
55
- super().__init__(*args, **kwargs)
56
- self.metric_collection = metric_collection
57
- self.metrics = metrics
58
-
59
-
60
- @with_metrics
61
- class CodeActAgent(CodeActAgent):
62
- def __init__(self, *args, metric_collection=None, metrics=None, **kwargs):
63
- with capture_tracing_integration(
64
- "llama_index.agent.patched.CodeActAgent"
65
- ):
66
- super().__init__(*args, **kwargs)
67
- self.metric_collection = metric_collection
68
- self.metrics = metrics
@@ -1,10 +0,0 @@
1
- from .messages import TextMessage, ToolCallMessage
2
- from .tools import BaseTool, ToolSchema, ToolOutput
3
-
4
- __all__ = [
5
- "BaseTool",
6
- "TextMessage",
7
- "ToolCallMessage",
8
- "ToolSchema",
9
- "ToolOutput",
10
- ]
@@ -1,6 +0,0 @@
1
- from typing import Literal
2
- from pydantic import BaseModel
3
-
4
-
5
- class BaseMessage(BaseModel):
6
- role: Literal["user", "assistant"]
@@ -1,14 +0,0 @@
1
- from typing import Literal, Dict, Any
2
- from .base import BaseMessage
3
-
4
-
5
- class TextMessage(BaseMessage):
6
- type: Literal["text", "thinking"]
7
- content: str
8
-
9
-
10
- class ToolCallMessage(BaseMessage):
11
- """This is a message for tool calls in response.choices[0].message.tool_calls"""
12
-
13
- name: str
14
- args: Dict[str, Any]
@@ -1,18 +0,0 @@
1
- from typing import Any, Optional, Dict
2
- from pydantic import BaseModel
3
-
4
-
5
- class BaseTool(BaseModel):
6
- name: str
7
- description: Optional[str] = None
8
-
9
-
10
- class ToolSchema(BaseTool):
11
- parameters: Dict[str, Any]
12
- is_called: Optional[bool] = False
13
-
14
-
15
- class ToolOutput(BaseTool):
16
- """Output of the tool function"""
17
-
18
- output: Any