deepeval 3.7.9__py3-none-any.whl → 3.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/annotation/annotation.py +2 -2
- deepeval/cli/main.py +168 -0
- deepeval/confident/api.py +2 -0
- deepeval/config/settings.py +13 -0
- deepeval/constants.py +1 -0
- deepeval/dataset/dataset.py +6 -4
- deepeval/integrations/langchain/callback.py +330 -158
- deepeval/integrations/langchain/utils.py +31 -8
- deepeval/key_handler.py +8 -1
- deepeval/metrics/contextual_recall/contextual_recall.py +25 -6
- deepeval/metrics/contextual_recall/schema.py +6 -0
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +35 -0
- deepeval/metrics/g_eval/g_eval.py +35 -1
- deepeval/metrics/g_eval/utils.py +65 -0
- deepeval/metrics/multimodal_metrics/image_coherence/image_coherence.py +10 -1
- deepeval/metrics/multimodal_metrics/image_helpfulness/image_helpfulness.py +10 -1
- deepeval/metrics/multimodal_metrics/image_reference/image_reference.py +10 -1
- deepeval/metrics/utils.py +1 -1
- deepeval/models/__init__.py +2 -0
- deepeval/models/llms/__init__.py +2 -0
- deepeval/models/llms/amazon_bedrock_model.py +51 -6
- deepeval/models/llms/azure_model.py +33 -7
- deepeval/models/llms/constants.py +23 -0
- deepeval/models/llms/gemini_model.py +6 -1
- deepeval/models/llms/openai_model.py +5 -4
- deepeval/models/llms/openrouter_model.py +398 -0
- deepeval/models/retry_policy.py +3 -0
- deepeval/prompt/api.py +1 -0
- deepeval/prompt/prompt.py +7 -5
- deepeval/test_case/llm_test_case.py +1 -0
- deepeval/tracing/tracing.py +6 -1
- deepeval/tracing/types.py +1 -1
- {deepeval-3.7.9.dist-info → deepeval-3.8.1.dist-info}/METADATA +3 -3
- {deepeval-3.7.9.dist-info → deepeval-3.8.1.dist-info}/RECORD +38 -37
- {deepeval-3.7.9.dist-info → deepeval-3.8.1.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.9.dist-info → deepeval-3.8.1.dist-info}/WHEEL +0 -0
- {deepeval-3.7.9.dist-info → deepeval-3.8.1.dist-info}/entry_points.txt +0 -0
deepeval/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__: str = "3.
|
|
1
|
+
__version__: str = "3.8.1"
|
|
@@ -14,7 +14,7 @@ def send_annotation(
|
|
|
14
14
|
explanation: Optional[str] = None,
|
|
15
15
|
user_id: Optional[str] = None,
|
|
16
16
|
type: Optional[AnnotationType] = AnnotationType.THUMBS_RATING,
|
|
17
|
-
) ->
|
|
17
|
+
) -> None:
|
|
18
18
|
api_annotation = APIAnnotation(
|
|
19
19
|
rating=rating,
|
|
20
20
|
traceUuid=trace_uuid,
|
|
@@ -50,7 +50,7 @@ async def a_send_annotation(
|
|
|
50
50
|
explanation: Optional[str] = None,
|
|
51
51
|
type: Optional[AnnotationType] = AnnotationType.THUMBS_RATING,
|
|
52
52
|
user_id: Optional[str] = None,
|
|
53
|
-
) ->
|
|
53
|
+
) -> None:
|
|
54
54
|
api_annotation = APIAnnotation(
|
|
55
55
|
rating=rating,
|
|
56
56
|
traceUuid=trace_uuid,
|
deepeval/cli/main.py
CHANGED
|
@@ -2937,5 +2937,173 @@ def unset_portkey_model_env(
|
|
|
2937
2937
|
)
|
|
2938
2938
|
|
|
2939
2939
|
|
|
2940
|
+
#############################################
|
|
2941
|
+
# OpenRouter Integration ####################
|
|
2942
|
+
#############################################
|
|
2943
|
+
|
|
2944
|
+
|
|
2945
|
+
@app.command(name="set-openrouter")
|
|
2946
|
+
def set_openrouter_model_env(
|
|
2947
|
+
model: Optional[str] = typer.Option(
|
|
2948
|
+
None,
|
|
2949
|
+
"-m",
|
|
2950
|
+
"--model",
|
|
2951
|
+
help="Model identifier to use for this provider (e.g., `openai/gpt-4.1`).",
|
|
2952
|
+
),
|
|
2953
|
+
prompt_api_key: bool = typer.Option(
|
|
2954
|
+
False,
|
|
2955
|
+
"-k",
|
|
2956
|
+
"--prompt-api-key",
|
|
2957
|
+
help=(
|
|
2958
|
+
"Prompt for OPENROUTER_API_KEY (input hidden). Not suitable for CI. "
|
|
2959
|
+
"If --save (or DEEPEVAL_DEFAULT_SAVE) is used, the key is written to dotenv in plaintext."
|
|
2960
|
+
),
|
|
2961
|
+
),
|
|
2962
|
+
base_url: Optional[str] = typer.Option(
|
|
2963
|
+
None,
|
|
2964
|
+
"-u",
|
|
2965
|
+
"--base-url",
|
|
2966
|
+
help="Override the API endpoint/base URL used by this provider (default: https://openrouter.ai/api/v1).",
|
|
2967
|
+
),
|
|
2968
|
+
temperature: Optional[float] = typer.Option(
|
|
2969
|
+
None,
|
|
2970
|
+
"-t",
|
|
2971
|
+
"--temperature",
|
|
2972
|
+
help="Override the global TEMPERATURE used by LLM providers (e.g., 0.0 for deterministic behavior).",
|
|
2973
|
+
),
|
|
2974
|
+
cost_per_input_token: Optional[float] = typer.Option(
|
|
2975
|
+
None,
|
|
2976
|
+
"-i",
|
|
2977
|
+
"--cost-per-input-token",
|
|
2978
|
+
help=(
|
|
2979
|
+
"USD per input token used for cost tracking. "
|
|
2980
|
+
"If unset and OpenRouter does not return pricing metadata, "
|
|
2981
|
+
"costs will not be calculated."
|
|
2982
|
+
),
|
|
2983
|
+
),
|
|
2984
|
+
cost_per_output_token: Optional[float] = typer.Option(
|
|
2985
|
+
None,
|
|
2986
|
+
"-o",
|
|
2987
|
+
"--cost-per-output-token",
|
|
2988
|
+
help=(
|
|
2989
|
+
"USD per output token used for cost tracking. "
|
|
2990
|
+
"If unset and OpenRouter does not return pricing metadata, "
|
|
2991
|
+
"costs will not be calculated."
|
|
2992
|
+
),
|
|
2993
|
+
),
|
|
2994
|
+
save: Optional[str] = typer.Option(
|
|
2995
|
+
None,
|
|
2996
|
+
"-s",
|
|
2997
|
+
"--save",
|
|
2998
|
+
help="Persist CLI parameters as environment variables in a dotenv file. "
|
|
2999
|
+
"Usage: --save=dotenv[:path] (default: .env.local)",
|
|
3000
|
+
),
|
|
3001
|
+
quiet: bool = typer.Option(
|
|
3002
|
+
False,
|
|
3003
|
+
"-q",
|
|
3004
|
+
"--quiet",
|
|
3005
|
+
help="Suppress printing to the terminal (useful for CI).",
|
|
3006
|
+
),
|
|
3007
|
+
):
|
|
3008
|
+
api_key = None
|
|
3009
|
+
if prompt_api_key:
|
|
3010
|
+
api_key = coerce_blank_to_none(
|
|
3011
|
+
typer.prompt("OpenRouter API key", hide_input=True)
|
|
3012
|
+
)
|
|
3013
|
+
|
|
3014
|
+
model = coerce_blank_to_none(model)
|
|
3015
|
+
base_url = coerce_blank_to_none(base_url)
|
|
3016
|
+
|
|
3017
|
+
settings = get_settings()
|
|
3018
|
+
with settings.edit(save=save) as edit_ctx:
|
|
3019
|
+
edit_ctx.switch_model_provider(ModelKeyValues.USE_OPENROUTER_MODEL)
|
|
3020
|
+
if model is not None:
|
|
3021
|
+
settings.OPENROUTER_MODEL_NAME = model
|
|
3022
|
+
if api_key is not None:
|
|
3023
|
+
settings.OPENROUTER_API_KEY = api_key
|
|
3024
|
+
if base_url is not None:
|
|
3025
|
+
settings.OPENROUTER_BASE_URL = base_url
|
|
3026
|
+
if temperature is not None:
|
|
3027
|
+
settings.TEMPERATURE = temperature
|
|
3028
|
+
if cost_per_input_token is not None:
|
|
3029
|
+
settings.OPENROUTER_COST_PER_INPUT_TOKEN = cost_per_input_token
|
|
3030
|
+
if cost_per_output_token is not None:
|
|
3031
|
+
settings.OPENROUTER_COST_PER_OUTPUT_TOKEN = cost_per_output_token
|
|
3032
|
+
|
|
3033
|
+
handled, path, updates = edit_ctx.result
|
|
3034
|
+
|
|
3035
|
+
effective_model = settings.OPENROUTER_MODEL_NAME
|
|
3036
|
+
if not effective_model:
|
|
3037
|
+
raise typer.BadParameter(
|
|
3038
|
+
"OpenRouter model name is not set. Pass --model (or set OPENROUTER_MODEL_NAME).",
|
|
3039
|
+
param_hint="--model",
|
|
3040
|
+
)
|
|
3041
|
+
|
|
3042
|
+
_handle_save_result(
|
|
3043
|
+
handled=handled,
|
|
3044
|
+
path=path,
|
|
3045
|
+
updates=updates,
|
|
3046
|
+
save=save,
|
|
3047
|
+
quiet=quiet,
|
|
3048
|
+
success_msg=(
|
|
3049
|
+
f":raising_hands: Congratulations! You're now using OpenRouter `{escape(effective_model)}` for all evals that require an LLM."
|
|
3050
|
+
),
|
|
3051
|
+
)
|
|
3052
|
+
|
|
3053
|
+
|
|
3054
|
+
@app.command(name="unset-openrouter")
|
|
3055
|
+
def unset_openrouter_model_env(
|
|
3056
|
+
save: Optional[str] = typer.Option(
|
|
3057
|
+
None,
|
|
3058
|
+
"-s",
|
|
3059
|
+
"--save",
|
|
3060
|
+
help="Remove only the OpenRouter model related environment variables from a dotenv file. "
|
|
3061
|
+
"Usage: --save=dotenv[:path] (default: .env.local)",
|
|
3062
|
+
),
|
|
3063
|
+
clear_secrets: bool = typer.Option(
|
|
3064
|
+
False,
|
|
3065
|
+
"-x",
|
|
3066
|
+
"--clear-secrets",
|
|
3067
|
+
help="Also remove OPENROUTER_API_KEY from the dotenv store.",
|
|
3068
|
+
),
|
|
3069
|
+
quiet: bool = typer.Option(
|
|
3070
|
+
False,
|
|
3071
|
+
"-q",
|
|
3072
|
+
"--quiet",
|
|
3073
|
+
help="Suppress printing to the terminal (useful for CI).",
|
|
3074
|
+
),
|
|
3075
|
+
):
|
|
3076
|
+
settings = get_settings()
|
|
3077
|
+
with settings.edit(save=save) as edit_ctx:
|
|
3078
|
+
settings.USE_OPENROUTER_MODEL = None
|
|
3079
|
+
settings.OPENROUTER_MODEL_NAME = None
|
|
3080
|
+
settings.OPENROUTER_BASE_URL = None
|
|
3081
|
+
settings.OPENROUTER_COST_PER_INPUT_TOKEN = None
|
|
3082
|
+
settings.OPENROUTER_COST_PER_OUTPUT_TOKEN = None
|
|
3083
|
+
# Intentionally do NOT touch TEMPERATURE here; it's a global dial.
|
|
3084
|
+
if clear_secrets:
|
|
3085
|
+
settings.OPENROUTER_API_KEY = None
|
|
3086
|
+
|
|
3087
|
+
handled, path, updates = edit_ctx.result
|
|
3088
|
+
|
|
3089
|
+
if _handle_save_result(
|
|
3090
|
+
handled=handled,
|
|
3091
|
+
path=path,
|
|
3092
|
+
updates=updates,
|
|
3093
|
+
save=save,
|
|
3094
|
+
quiet=quiet,
|
|
3095
|
+
updated_msg="Removed OpenRouter model environment variables from {path}.",
|
|
3096
|
+
tip_msg=None,
|
|
3097
|
+
):
|
|
3098
|
+
if is_openai_configured():
|
|
3099
|
+
print(
|
|
3100
|
+
":raised_hands: OpenAI will still be used by default because OPENAI_API_KEY is set."
|
|
3101
|
+
)
|
|
3102
|
+
else:
|
|
3103
|
+
print(
|
|
3104
|
+
"The OpenRouter model configuration has been removed. No model is currently configured, but you can set one with the CLI or add credentials to .env[.local]."
|
|
3105
|
+
)
|
|
3106
|
+
|
|
3107
|
+
|
|
2940
3108
|
if __name__ == "__main__":
|
|
2941
3109
|
app()
|
deepeval/confident/api.py
CHANGED
|
@@ -106,6 +106,8 @@ class Endpoints(Enum):
|
|
|
106
106
|
EVALUATE_TRACE_ENDPOINT = "/v1/evaluate/traces/:traceUuid"
|
|
107
107
|
EVALUATE_SPAN_ENDPOINT = "/v1/evaluate/spans/:spanUuid"
|
|
108
108
|
|
|
109
|
+
METRICS_ENDPOINT = "/v1/metrics"
|
|
110
|
+
|
|
109
111
|
|
|
110
112
|
class Api:
|
|
111
113
|
def __init__(self, api_key: Optional[str] = None):
|
deepeval/config/settings.py
CHANGED
|
@@ -447,6 +447,9 @@ class Settings(BaseSettings):
|
|
|
447
447
|
AZURE_OPENAI_API_KEY: Optional[SecretStr] = Field(
|
|
448
448
|
None, description="Azure OpenAI API key."
|
|
449
449
|
)
|
|
450
|
+
AZURE_OPENAI_AD_TOKEN: Optional[SecretStr] = Field(
|
|
451
|
+
None, description="Azure OpenAI Ad Token."
|
|
452
|
+
)
|
|
450
453
|
AZURE_OPENAI_ENDPOINT: Optional[AnyUrl] = Field(
|
|
451
454
|
None, description="Azure OpenAI endpoint URL."
|
|
452
455
|
)
|
|
@@ -627,6 +630,16 @@ class Settings(BaseSettings):
|
|
|
627
630
|
PORTKEY_PROVIDER_NAME: Optional[str] = Field(
|
|
628
631
|
None, description="Provider name/routing hint for Portkey."
|
|
629
632
|
)
|
|
633
|
+
# OpenRouter
|
|
634
|
+
USE_OPENROUTER_MODEL: Optional[bool] = None
|
|
635
|
+
OPENROUTER_API_KEY: Optional[SecretStr] = None
|
|
636
|
+
OPENROUTER_MODEL_NAME: Optional[str] = None
|
|
637
|
+
OPENROUTER_COST_PER_INPUT_TOKEN: Optional[float] = None
|
|
638
|
+
OPENROUTER_COST_PER_OUTPUT_TOKEN: Optional[float] = None
|
|
639
|
+
OPENROUTER_BASE_URL: Optional[AnyUrl] = Field(
|
|
640
|
+
None, description="OpenRouter base URL (if using a custom endpoint)."
|
|
641
|
+
)
|
|
642
|
+
|
|
630
643
|
# Vertex AI
|
|
631
644
|
VERTEX_AI_MODEL_NAME: Optional[str] = Field(
|
|
632
645
|
None,
|
deepeval/constants.py
CHANGED
deepeval/dataset/dataset.py
CHANGED
|
@@ -84,9 +84,11 @@ class EvaluationDataset:
|
|
|
84
84
|
def __init__(
|
|
85
85
|
self,
|
|
86
86
|
goldens: Union[List[Golden], List[ConversationalGolden]] = [],
|
|
87
|
+
confident_api_key: Optional[str] = None,
|
|
87
88
|
):
|
|
88
89
|
self._alias = None
|
|
89
90
|
self._id = None
|
|
91
|
+
self.confident_api_key = confident_api_key
|
|
90
92
|
if len(goldens) > 0:
|
|
91
93
|
self._multi_turn = (
|
|
92
94
|
True if isinstance(goldens[0], ConversationalGolden) else False
|
|
@@ -722,7 +724,7 @@ class EvaluationDataset:
|
|
|
722
724
|
"Unable to push empty dataset to Confident AI, there must be at least one golden in dataset."
|
|
723
725
|
)
|
|
724
726
|
|
|
725
|
-
api = Api()
|
|
727
|
+
api = Api(api_key=self.confident_api_key)
|
|
726
728
|
api_dataset = APIDataset(
|
|
727
729
|
goldens=self.goldens if not self._multi_turn else None,
|
|
728
730
|
conversationalGoldens=(self.goldens if self._multi_turn else None),
|
|
@@ -755,7 +757,7 @@ class EvaluationDataset:
|
|
|
755
757
|
auto_convert_goldens_to_test_cases: bool = False,
|
|
756
758
|
public: bool = False,
|
|
757
759
|
):
|
|
758
|
-
api = Api()
|
|
760
|
+
api = Api(api_key=self.confident_api_key)
|
|
759
761
|
with capture_pull_dataset():
|
|
760
762
|
with Progress(
|
|
761
763
|
SpinnerColumn(style="rgb(106,0,255)"),
|
|
@@ -839,7 +841,7 @@ class EvaluationDataset:
|
|
|
839
841
|
raise ValueError(
|
|
840
842
|
f"Can't queue empty list of goldens to dataset with alias: {alias} on Confident AI."
|
|
841
843
|
)
|
|
842
|
-
api = Api()
|
|
844
|
+
api = Api(api_key=self.confident_api_key)
|
|
843
845
|
|
|
844
846
|
multi_turn = isinstance(goldens[0], ConversationalGolden)
|
|
845
847
|
|
|
@@ -871,7 +873,7 @@ class EvaluationDataset:
|
|
|
871
873
|
self,
|
|
872
874
|
alias: str,
|
|
873
875
|
):
|
|
874
|
-
api = Api()
|
|
876
|
+
api = Api(api_key=self.confident_api_key)
|
|
875
877
|
api.send_request(
|
|
876
878
|
method=HttpMethods.DELETE,
|
|
877
879
|
endpoint=Endpoints.DATASET_ALIAS_ENDPOINT,
|