deepeval 3.7.8__py3-none-any.whl → 3.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepeval/_version.py +1 -1
- deepeval/benchmarks/drop/drop.py +5 -2
- deepeval/benchmarks/mmlu/mmlu.py +6 -4
- deepeval/cli/main.py +168 -0
- deepeval/cli/utils.py +2 -2
- deepeval/confident/api.py +2 -0
- deepeval/config/settings.py +10 -0
- deepeval/constants.py +1 -0
- deepeval/integrations/langchain/callback.py +330 -158
- deepeval/integrations/langchain/utils.py +31 -8
- deepeval/key_handler.py +8 -1
- deepeval/metrics/conversational_g_eval/conversational_g_eval.py +35 -0
- deepeval/metrics/g_eval/g_eval.py +35 -1
- deepeval/metrics/g_eval/utils.py +65 -0
- deepeval/models/__init__.py +2 -0
- deepeval/models/llms/__init__.py +2 -0
- deepeval/models/llms/constants.py +23 -0
- deepeval/models/llms/gemini_model.py +27 -29
- deepeval/models/llms/openai_model.py +5 -4
- deepeval/models/llms/openrouter_model.py +398 -0
- deepeval/models/retry_policy.py +3 -0
- deepeval/prompt/api.py +1 -0
- deepeval/synthesizer/synthesizer.py +190 -82
- deepeval/tracing/tracing.py +6 -1
- deepeval/tracing/types.py +1 -1
- deepeval/utils.py +21 -6
- {deepeval-3.7.8.dist-info → deepeval-3.8.0.dist-info}/METADATA +7 -7
- {deepeval-3.7.8.dist-info → deepeval-3.8.0.dist-info}/RECORD +31 -30
- {deepeval-3.7.8.dist-info → deepeval-3.8.0.dist-info}/LICENSE.md +0 -0
- {deepeval-3.7.8.dist-info → deepeval-3.8.0.dist-info}/WHEEL +0 -0
- {deepeval-3.7.8.dist-info → deepeval-3.8.0.dist-info}/entry_points.txt +0 -0
deepeval/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__: str = "3.
|
|
1
|
+
__version__: str = "3.8.0"
|
deepeval/benchmarks/drop/drop.py
CHANGED
|
@@ -279,8 +279,11 @@ class DROP(DeepEvalBaseBenchmark):
|
|
|
279
279
|
prediction = predictions[i]
|
|
280
280
|
golden = goldens[i]
|
|
281
281
|
# Define Metric
|
|
282
|
-
|
|
283
|
-
golden.expected_output,
|
|
282
|
+
expected_output = DROPTemplate.parse_str_to_list(
|
|
283
|
+
golden.expected_output, DELIMITER
|
|
284
|
+
)
|
|
285
|
+
score = self.scorer.quasi_contains_score(
|
|
286
|
+
expected_output, prediction
|
|
284
287
|
)
|
|
285
288
|
res.append({"prediction": prediction, "score": score})
|
|
286
289
|
|
deepeval/benchmarks/mmlu/mmlu.py
CHANGED
|
@@ -224,10 +224,12 @@ class MMLU(DeepEvalBaseBenchmark):
|
|
|
224
224
|
responses: List[MultipleChoiceSchema] = model.batch_generate(
|
|
225
225
|
prompts=prompts, schemas=[MultipleChoiceSchema for i in prompts]
|
|
226
226
|
)
|
|
227
|
-
if isinstance(responses,
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
227
|
+
if not isinstance(responses, list):
|
|
228
|
+
raise TypeError(
|
|
229
|
+
"batch_generate must return List[MultipleChoiceSchema]"
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
predictions = [res.answer for res in responses]
|
|
231
233
|
except TypeError:
|
|
232
234
|
prompts = [
|
|
233
235
|
prompt
|
deepeval/cli/main.py
CHANGED
|
@@ -2937,5 +2937,173 @@ def unset_portkey_model_env(
|
|
|
2937
2937
|
)
|
|
2938
2938
|
|
|
2939
2939
|
|
|
2940
|
+
#############################################
|
|
2941
|
+
# OpenRouter Integration ####################
|
|
2942
|
+
#############################################
|
|
2943
|
+
|
|
2944
|
+
|
|
2945
|
+
@app.command(name="set-openrouter")
|
|
2946
|
+
def set_openrouter_model_env(
|
|
2947
|
+
model: Optional[str] = typer.Option(
|
|
2948
|
+
None,
|
|
2949
|
+
"-m",
|
|
2950
|
+
"--model",
|
|
2951
|
+
help="Model identifier to use for this provider (e.g., `openai/gpt-4.1`).",
|
|
2952
|
+
),
|
|
2953
|
+
prompt_api_key: bool = typer.Option(
|
|
2954
|
+
False,
|
|
2955
|
+
"-k",
|
|
2956
|
+
"--prompt-api-key",
|
|
2957
|
+
help=(
|
|
2958
|
+
"Prompt for OPENROUTER_API_KEY (input hidden). Not suitable for CI. "
|
|
2959
|
+
"If --save (or DEEPEVAL_DEFAULT_SAVE) is used, the key is written to dotenv in plaintext."
|
|
2960
|
+
),
|
|
2961
|
+
),
|
|
2962
|
+
base_url: Optional[str] = typer.Option(
|
|
2963
|
+
None,
|
|
2964
|
+
"-u",
|
|
2965
|
+
"--base-url",
|
|
2966
|
+
help="Override the API endpoint/base URL used by this provider (default: https://openrouter.ai/api/v1).",
|
|
2967
|
+
),
|
|
2968
|
+
temperature: Optional[float] = typer.Option(
|
|
2969
|
+
None,
|
|
2970
|
+
"-t",
|
|
2971
|
+
"--temperature",
|
|
2972
|
+
help="Override the global TEMPERATURE used by LLM providers (e.g., 0.0 for deterministic behavior).",
|
|
2973
|
+
),
|
|
2974
|
+
cost_per_input_token: Optional[float] = typer.Option(
|
|
2975
|
+
None,
|
|
2976
|
+
"-i",
|
|
2977
|
+
"--cost-per-input-token",
|
|
2978
|
+
help=(
|
|
2979
|
+
"USD per input token used for cost tracking. "
|
|
2980
|
+
"If unset and OpenRouter does not return pricing metadata, "
|
|
2981
|
+
"costs will not be calculated."
|
|
2982
|
+
),
|
|
2983
|
+
),
|
|
2984
|
+
cost_per_output_token: Optional[float] = typer.Option(
|
|
2985
|
+
None,
|
|
2986
|
+
"-o",
|
|
2987
|
+
"--cost-per-output-token",
|
|
2988
|
+
help=(
|
|
2989
|
+
"USD per output token used for cost tracking. "
|
|
2990
|
+
"If unset and OpenRouter does not return pricing metadata, "
|
|
2991
|
+
"costs will not be calculated."
|
|
2992
|
+
),
|
|
2993
|
+
),
|
|
2994
|
+
save: Optional[str] = typer.Option(
|
|
2995
|
+
None,
|
|
2996
|
+
"-s",
|
|
2997
|
+
"--save",
|
|
2998
|
+
help="Persist CLI parameters as environment variables in a dotenv file. "
|
|
2999
|
+
"Usage: --save=dotenv[:path] (default: .env.local)",
|
|
3000
|
+
),
|
|
3001
|
+
quiet: bool = typer.Option(
|
|
3002
|
+
False,
|
|
3003
|
+
"-q",
|
|
3004
|
+
"--quiet",
|
|
3005
|
+
help="Suppress printing to the terminal (useful for CI).",
|
|
3006
|
+
),
|
|
3007
|
+
):
|
|
3008
|
+
api_key = None
|
|
3009
|
+
if prompt_api_key:
|
|
3010
|
+
api_key = coerce_blank_to_none(
|
|
3011
|
+
typer.prompt("OpenRouter API key", hide_input=True)
|
|
3012
|
+
)
|
|
3013
|
+
|
|
3014
|
+
model = coerce_blank_to_none(model)
|
|
3015
|
+
base_url = coerce_blank_to_none(base_url)
|
|
3016
|
+
|
|
3017
|
+
settings = get_settings()
|
|
3018
|
+
with settings.edit(save=save) as edit_ctx:
|
|
3019
|
+
edit_ctx.switch_model_provider(ModelKeyValues.USE_OPENROUTER_MODEL)
|
|
3020
|
+
if model is not None:
|
|
3021
|
+
settings.OPENROUTER_MODEL_NAME = model
|
|
3022
|
+
if api_key is not None:
|
|
3023
|
+
settings.OPENROUTER_API_KEY = api_key
|
|
3024
|
+
if base_url is not None:
|
|
3025
|
+
settings.OPENROUTER_BASE_URL = base_url
|
|
3026
|
+
if temperature is not None:
|
|
3027
|
+
settings.TEMPERATURE = temperature
|
|
3028
|
+
if cost_per_input_token is not None:
|
|
3029
|
+
settings.OPENROUTER_COST_PER_INPUT_TOKEN = cost_per_input_token
|
|
3030
|
+
if cost_per_output_token is not None:
|
|
3031
|
+
settings.OPENROUTER_COST_PER_OUTPUT_TOKEN = cost_per_output_token
|
|
3032
|
+
|
|
3033
|
+
handled, path, updates = edit_ctx.result
|
|
3034
|
+
|
|
3035
|
+
effective_model = settings.OPENROUTER_MODEL_NAME
|
|
3036
|
+
if not effective_model:
|
|
3037
|
+
raise typer.BadParameter(
|
|
3038
|
+
"OpenRouter model name is not set. Pass --model (or set OPENROUTER_MODEL_NAME).",
|
|
3039
|
+
param_hint="--model",
|
|
3040
|
+
)
|
|
3041
|
+
|
|
3042
|
+
_handle_save_result(
|
|
3043
|
+
handled=handled,
|
|
3044
|
+
path=path,
|
|
3045
|
+
updates=updates,
|
|
3046
|
+
save=save,
|
|
3047
|
+
quiet=quiet,
|
|
3048
|
+
success_msg=(
|
|
3049
|
+
f":raising_hands: Congratulations! You're now using OpenRouter `{escape(effective_model)}` for all evals that require an LLM."
|
|
3050
|
+
),
|
|
3051
|
+
)
|
|
3052
|
+
|
|
3053
|
+
|
|
3054
|
+
@app.command(name="unset-openrouter")
|
|
3055
|
+
def unset_openrouter_model_env(
|
|
3056
|
+
save: Optional[str] = typer.Option(
|
|
3057
|
+
None,
|
|
3058
|
+
"-s",
|
|
3059
|
+
"--save",
|
|
3060
|
+
help="Remove only the OpenRouter model related environment variables from a dotenv file. "
|
|
3061
|
+
"Usage: --save=dotenv[:path] (default: .env.local)",
|
|
3062
|
+
),
|
|
3063
|
+
clear_secrets: bool = typer.Option(
|
|
3064
|
+
False,
|
|
3065
|
+
"-x",
|
|
3066
|
+
"--clear-secrets",
|
|
3067
|
+
help="Also remove OPENROUTER_API_KEY from the dotenv store.",
|
|
3068
|
+
),
|
|
3069
|
+
quiet: bool = typer.Option(
|
|
3070
|
+
False,
|
|
3071
|
+
"-q",
|
|
3072
|
+
"--quiet",
|
|
3073
|
+
help="Suppress printing to the terminal (useful for CI).",
|
|
3074
|
+
),
|
|
3075
|
+
):
|
|
3076
|
+
settings = get_settings()
|
|
3077
|
+
with settings.edit(save=save) as edit_ctx:
|
|
3078
|
+
settings.USE_OPENROUTER_MODEL = None
|
|
3079
|
+
settings.OPENROUTER_MODEL_NAME = None
|
|
3080
|
+
settings.OPENROUTER_BASE_URL = None
|
|
3081
|
+
settings.OPENROUTER_COST_PER_INPUT_TOKEN = None
|
|
3082
|
+
settings.OPENROUTER_COST_PER_OUTPUT_TOKEN = None
|
|
3083
|
+
# Intentionally do NOT touch TEMPERATURE here; it's a global dial.
|
|
3084
|
+
if clear_secrets:
|
|
3085
|
+
settings.OPENROUTER_API_KEY = None
|
|
3086
|
+
|
|
3087
|
+
handled, path, updates = edit_ctx.result
|
|
3088
|
+
|
|
3089
|
+
if _handle_save_result(
|
|
3090
|
+
handled=handled,
|
|
3091
|
+
path=path,
|
|
3092
|
+
updates=updates,
|
|
3093
|
+
save=save,
|
|
3094
|
+
quiet=quiet,
|
|
3095
|
+
updated_msg="Removed OpenRouter model environment variables from {path}.",
|
|
3096
|
+
tip_msg=None,
|
|
3097
|
+
):
|
|
3098
|
+
if is_openai_configured():
|
|
3099
|
+
print(
|
|
3100
|
+
":raised_hands: OpenAI will still be used by default because OPENAI_API_KEY is set."
|
|
3101
|
+
)
|
|
3102
|
+
else:
|
|
3103
|
+
print(
|
|
3104
|
+
"The OpenRouter model configuration has been removed. No model is currently configured, but you can set one with the CLI or add credentials to .env[.local]."
|
|
3105
|
+
)
|
|
3106
|
+
|
|
3107
|
+
|
|
2940
3108
|
if __name__ == "__main__":
|
|
2941
3109
|
app()
|
deepeval/cli/utils.py
CHANGED
|
@@ -52,10 +52,10 @@ USE_EMBED_KEYS = [
|
|
|
52
52
|
|
|
53
53
|
def render_login_message():
|
|
54
54
|
print(
|
|
55
|
-
"🥳 Welcome to [rgb(106,0,255)]Confident AI[/rgb(106,0,255)], the
|
|
55
|
+
"🥳 Welcome to [rgb(106,0,255)]Confident AI[/rgb(106,0,255)], the evals cloud platform 🏡❤️"
|
|
56
56
|
)
|
|
57
57
|
print("")
|
|
58
|
-
print(pyfiglet.Figlet(font="big_money-ne").renderText("
|
|
58
|
+
print(pyfiglet.Figlet(font="big_money-ne").renderText("Confident AI"))
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
def upload_and_open_link(_span: Span):
|
deepeval/confident/api.py
CHANGED
|
@@ -106,6 +106,8 @@ class Endpoints(Enum):
|
|
|
106
106
|
EVALUATE_TRACE_ENDPOINT = "/v1/evaluate/traces/:traceUuid"
|
|
107
107
|
EVALUATE_SPAN_ENDPOINT = "/v1/evaluate/spans/:spanUuid"
|
|
108
108
|
|
|
109
|
+
METRICS_ENDPOINT = "/v1/metrics"
|
|
110
|
+
|
|
109
111
|
|
|
110
112
|
class Api:
|
|
111
113
|
def __init__(self, api_key: Optional[str] = None):
|
deepeval/config/settings.py
CHANGED
|
@@ -627,6 +627,16 @@ class Settings(BaseSettings):
|
|
|
627
627
|
PORTKEY_PROVIDER_NAME: Optional[str] = Field(
|
|
628
628
|
None, description="Provider name/routing hint for Portkey."
|
|
629
629
|
)
|
|
630
|
+
# OpenRouter
|
|
631
|
+
USE_OPENROUTER_MODEL: Optional[bool] = None
|
|
632
|
+
OPENROUTER_API_KEY: Optional[SecretStr] = None
|
|
633
|
+
OPENROUTER_MODEL_NAME: Optional[str] = None
|
|
634
|
+
OPENROUTER_COST_PER_INPUT_TOKEN: Optional[float] = None
|
|
635
|
+
OPENROUTER_COST_PER_OUTPUT_TOKEN: Optional[float] = None
|
|
636
|
+
OPENROUTER_BASE_URL: Optional[AnyUrl] = Field(
|
|
637
|
+
None, description="OpenRouter base URL (if using a custom endpoint)."
|
|
638
|
+
)
|
|
639
|
+
|
|
630
640
|
# Vertex AI
|
|
631
641
|
VERTEX_AI_MODEL_NAME: Optional[str] = Field(
|
|
632
642
|
None,
|