deepeval 3.7.3__py3-none-any.whl → 3.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. deepeval/_version.py +1 -1
  2. deepeval/cli/test.py +1 -1
  3. deepeval/config/settings.py +102 -13
  4. deepeval/evaluate/configs.py +1 -1
  5. deepeval/evaluate/execute.py +4 -1
  6. deepeval/metrics/answer_relevancy/template.py +4 -4
  7. deepeval/metrics/argument_correctness/template.py +2 -2
  8. deepeval/metrics/bias/template.py +3 -3
  9. deepeval/metrics/contextual_precision/template.py +6 -6
  10. deepeval/metrics/contextual_recall/template.py +2 -2
  11. deepeval/metrics/contextual_relevancy/template.py +3 -3
  12. deepeval/metrics/conversation_completeness/template.py +2 -2
  13. deepeval/metrics/conversational_dag/templates.py +4 -4
  14. deepeval/metrics/conversational_g_eval/template.py +4 -3
  15. deepeval/metrics/dag/templates.py +4 -4
  16. deepeval/metrics/faithfulness/template.py +4 -4
  17. deepeval/metrics/hallucination/template.py +4 -4
  18. deepeval/metrics/misuse/template.py +2 -2
  19. deepeval/metrics/multimodal_metrics/multimodal_answer_relevancy/template.py +7 -7
  20. deepeval/metrics/multimodal_metrics/multimodal_contextual_precision/template.py +6 -6
  21. deepeval/metrics/multimodal_metrics/multimodal_contextual_recall/template.py +2 -2
  22. deepeval/metrics/multimodal_metrics/multimodal_contextual_relevancy/template.py +3 -3
  23. deepeval/metrics/multimodal_metrics/multimodal_faithfulness/template.py +9 -9
  24. deepeval/metrics/multimodal_metrics/multimodal_g_eval/template.py +4 -4
  25. deepeval/metrics/non_advice/template.py +2 -2
  26. deepeval/metrics/pii_leakage/template.py +2 -2
  27. deepeval/metrics/prompt_alignment/template.py +4 -4
  28. deepeval/metrics/role_violation/template.py +2 -2
  29. deepeval/metrics/step_efficiency/step_efficiency.py +1 -1
  30. deepeval/metrics/toxicity/template.py +4 -4
  31. deepeval/metrics/turn_relevancy/template.py +2 -2
  32. deepeval/models/embedding_models/azure_embedding_model.py +28 -15
  33. deepeval/models/embedding_models/local_embedding_model.py +23 -10
  34. deepeval/models/embedding_models/ollama_embedding_model.py +8 -6
  35. deepeval/models/embedding_models/openai_embedding_model.py +18 -2
  36. deepeval/models/llms/anthropic_model.py +17 -5
  37. deepeval/models/llms/azure_model.py +30 -18
  38. deepeval/models/llms/deepseek_model.py +22 -12
  39. deepeval/models/llms/gemini_model.py +120 -87
  40. deepeval/models/llms/grok_model.py +23 -16
  41. deepeval/models/llms/kimi_model.py +23 -12
  42. deepeval/models/llms/litellm_model.py +63 -25
  43. deepeval/models/llms/local_model.py +26 -18
  44. deepeval/models/llms/ollama_model.py +17 -7
  45. deepeval/models/llms/openai_model.py +22 -17
  46. deepeval/models/llms/portkey_model.py +132 -0
  47. deepeval/models/mlllms/azure_model.py +28 -19
  48. deepeval/models/mlllms/gemini_model.py +102 -73
  49. deepeval/models/mlllms/ollama_model.py +40 -9
  50. deepeval/models/mlllms/openai_model.py +65 -14
  51. deepeval/models/utils.py +48 -3
  52. deepeval/optimization/__init__.py +13 -0
  53. deepeval/optimization/adapters/__init__.py +2 -0
  54. deepeval/optimization/adapters/deepeval_scoring_adapter.py +588 -0
  55. deepeval/optimization/aggregates.py +14 -0
  56. deepeval/optimization/configs.py +34 -0
  57. deepeval/optimization/copro/configs.py +31 -0
  58. deepeval/optimization/copro/loop.py +837 -0
  59. deepeval/optimization/gepa/__init__.py +7 -0
  60. deepeval/optimization/gepa/configs.py +115 -0
  61. deepeval/optimization/gepa/loop.py +677 -0
  62. deepeval/optimization/miprov2/configs.py +134 -0
  63. deepeval/optimization/miprov2/loop.py +785 -0
  64. deepeval/optimization/mutations/__init__.py +0 -0
  65. deepeval/optimization/mutations/prompt_rewriter.py +458 -0
  66. deepeval/optimization/policies/__init__.py +16 -0
  67. deepeval/optimization/policies/selection.py +166 -0
  68. deepeval/optimization/policies/tie_breaker.py +67 -0
  69. deepeval/optimization/prompt_optimizer.py +462 -0
  70. deepeval/optimization/simba/__init__.py +0 -0
  71. deepeval/optimization/simba/configs.py +33 -0
  72. deepeval/optimization/simba/loop.py +983 -0
  73. deepeval/optimization/simba/types.py +15 -0
  74. deepeval/optimization/types.py +361 -0
  75. deepeval/optimization/utils.py +598 -0
  76. deepeval/prompt/prompt.py +10 -5
  77. deepeval/test_run/cache.py +2 -0
  78. deepeval/test_run/test_run.py +6 -1
  79. deepeval/utils.py +24 -0
  80. {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/METADATA +1 -1
  81. {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/RECORD +84 -59
  82. {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/LICENSE.md +0 -0
  83. {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/WHEEL +0 -0
  84. {deepeval-3.7.3.dist-info → deepeval-3.7.4.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,134 @@
1
+ from __future__ import annotations
2
+ import time
3
+ from typing import Optional
4
+
5
+ from pydantic import (
6
+ BaseModel,
7
+ Field,
8
+ PositiveInt,
9
+ conint,
10
+ confloat,
11
+ field_validator,
12
+ )
13
+
14
+
15
+ class MIPROConfig(BaseModel):
16
+ """
17
+ Configuration for 0-shot MIPRO style prompt optimization.
18
+
19
+ This is adapted to the DeepEval setting where we optimize a single Prompt
20
+ (instruction) against a list of Goldens, using mini-batch evaluation and a
21
+ simple surrogate over prompt candidates.
22
+
23
+ Fields
24
+ ------
25
+ iterations:
26
+ Total number of optimization trials. Each iteration selects
27
+ a parent candidate, proposes a child via the PromptRewriter,
28
+ evaluates it on a mini-batch, and updates the surrogate stats.
29
+
30
+ minibatch_size:
31
+ Fixed minibatch size drawn from the full set of goldens. When set,
32
+ this overrides dynamic sizing based on `minibatch_ratio`,
33
+ `minibatch_min_size`, and `minibatch_max_size`.
34
+
35
+ minibatch_min_size:
36
+ Hard lower bound on minibatch size when dynamic sizing is in effect.
37
+
38
+ minibatch_max_size:
39
+ Hard upper bound on minibatch size when dynamic sizing is in effect.
40
+
41
+ minibatch_ratio:
42
+ Target fraction of len(goldens) used to compute a dynamic minibatch
43
+ size. The final size is bounded between `minibatch_min_size` and
44
+ `minibatch_max_size`.
45
+
46
+ random_seed:
47
+ RNG seed for reproducibility. If set to None, a seed is derived from
48
+ time.time_ns() by the validator.
49
+
50
+ exploration_probability:
51
+ Epsilon greedy exploration rate for candidate selection. With this
52
+ probability the runner picks a random candidate; otherwise it picks
53
+ the candidate with the highest mean minibatch score.
54
+
55
+ full_eval_every:
56
+ If set, every `full_eval_every` trials the runner fully evaluates the
57
+ current best candidate (by mean minibatch score) on the full set of
58
+ goldens, storing scores per-instance. If None, only a final full
59
+ evaluation is done at the end.
60
+
61
+ rewrite_instruction_max_chars:
62
+ Maximum number of characters pulled into rewrite instructions
63
+ (prompt text + feedback) when using PromptRewriter.
64
+
65
+ min_delta:
66
+ Minimum improvement on minibatch mean required for a child
67
+ configuration to be accepted over its parent.
68
+ """
69
+
70
+ iterations: PositiveInt = Field(
71
+ default=5,
72
+ description="Total number of MIPRO trials or prompt proposals.",
73
+ )
74
+ minibatch_size: Optional[conint(ge=1)] = Field(
75
+ default=None,
76
+ description=(
77
+ "Fixed minibatch size for goldens; when set, overrides dynamic sizing."
78
+ ),
79
+ )
80
+ minibatch_min_size: conint(ge=1) = Field(
81
+ default=4,
82
+ description="Hard lower bound on minibatch size.",
83
+ )
84
+ minibatch_max_size: PositiveInt = Field(
85
+ default=32,
86
+ description="Hard upper bound on minibatch size.",
87
+ )
88
+ minibatch_ratio: confloat(gt=0.0, le=1.0) = Field(
89
+ default=0.05,
90
+ description=(
91
+ "Target fraction of len(goldens) used to compute a dynamic minibatch "
92
+ "size; bounded between minibatch_min_size and minibatch_max_size."
93
+ ),
94
+ )
95
+ random_seed: conint(ge=0) = 0
96
+ min_delta: confloat(ge=0.0) = Field(
97
+ default=0.0,
98
+ description=(
99
+ "Minimum improvement in minibatch score required for a child "
100
+ "prompt to be accepted over its parent."
101
+ ),
102
+ )
103
+
104
+ exploration_probability: confloat(ge=0.0, le=1.0) = Field(
105
+ default=0.2,
106
+ description=(
107
+ "Probability of sampling a random candidate instead of "
108
+ "the best-by-mean minibatch score."
109
+ ),
110
+ )
111
+
112
+ full_eval_every: Optional[PositiveInt] = Field(
113
+ default=5,
114
+ description=(
115
+ "If set, the runner fully evaluates the current best candidate on the "
116
+ "full goldens every N trials. If None, only a single full evaluation "
117
+ "is performed at the end."
118
+ ),
119
+ )
120
+
121
+ rewrite_instruction_max_chars: PositiveInt = Field(
122
+ default=4096,
123
+ description=(
124
+ "Maximum number of characters from prompt, feedback, and related "
125
+ "text included in rewrite instructions."
126
+ ),
127
+ )
128
+
129
+ @field_validator("random_seed", mode="before")
130
+ @classmethod
131
+ def _coerce_random_seed(cls, seed):
132
+ if seed is None:
133
+ return time.time_ns()
134
+ return seed